Index: projects/clang380-import/Makefile.inc1 =================================================================== --- projects/clang380-import/Makefile.inc1 (revision 293686) +++ projects/clang380-import/Makefile.inc1 (revision 293687) @@ -1,2419 +1,2414 @@ # # $FreeBSD$ # # Make command line options: # -DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir # -DNO_CLEAN do not clean at all # -DDB_FROM_SRC use the user/group databases in src/etc instead of # the system database when installing. # -DNO_SHARE do not go into share subdir # -DKERNFAST define NO_KERNEL{CONFIG,CLEAN,DEPEND,OBJ} # -DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel # -DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel # -DNO_KERNELDEPEND do not run ${MAKE} depend in ${MAKE} buildkernel # -DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel # -DNO_PORTSUPDATE do not update ports in ${MAKE} update # -DNO_ROOT install without using root privilege # -DNO_DOCUPDATE do not update doc in ${MAKE} update # -DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects # LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list # LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list # LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target # LOCAL_MTREE="list of mtree files" to process to allow local directories # to be created before files are installed # LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools # list # METALOG="path to metadata log" to write permission and ownership # when NO_ROOT is set. (default: ${DESTDIR}/METALOG) # TARGET="machine" to crossbuild world for a different machine type # TARGET_ARCH= may be required when a TARGET supports multiple endians # BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL}) # WORLD_FLAGS= additional flags to pass to make(1) during buildworld # KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel # SUBDIR_OVERRIDE="list of dirs" to build rather than everything. # All libraries and includes, and some build tools will still build. # # The intended user-driven targets are: # buildworld - rebuild *everything*, including glue to help do upgrades # installworld- install everything built by "buildworld" # doxygen - build API documentation of the kernel # update - convenient way to update your source tree (eg: svn/svnup) # # Standard targets (not defined here) are documented in the makefiles in # /usr/share/mk. These include: # obj depend all install clean cleandepend cleanobj .if !defined(TARGET) || !defined(TARGET_ARCH) .error "Both TARGET and TARGET_ARCH must be defined." .endif LOCALBASE?= /usr/local # Cross toolchain changes must be in effect before bsd.compiler.mk # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes. .if defined(CROSS_TOOLCHAIN) .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk" CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}" .endif .include # don't depend on src.opts.mk doing it .include "share/mk/src.opts.mk" # We must do lib/ and libexec/ before bin/ in case of a mid-install error to # keep the users system reasonably usable. For static->dynamic root upgrades, # we don't want to install a dynamic binary without rtld and the needed # libraries. More commonly, for dynamic root, we don't want to install a # binary that requires a newer library version that hasn't been installed yet. # This ordering is not a guarantee though. The only guarantee of a working # system here would require fine-grained ordering of all components based # on their dependencies. SRCDIR?= ${.CURDIR} .if !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .else SUBDIR= lib libexec .if make(install*) # Ensure libraries are installed before progressing. SUBDIR+=.WAIT .endif SUBDIR+=bin .if ${MK_CDDL} != "no" SUBDIR+=cddl .endif SUBDIR+=gnu include .if ${MK_KERBEROS} != "no" SUBDIR+=kerberos5 .endif .if ${MK_RESCUE} != "no" SUBDIR+=rescue .endif SUBDIR+=sbin .if ${MK_CRYPT} != "no" SUBDIR+=secure .endif .if !defined(NO_SHARE) SUBDIR+=share .endif SUBDIR+=sys usr.bin usr.sbin .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .if ${MK_OFED} != "no" SUBDIR+=contrib/ofed .endif # Local directories are last, since it is nice to at least get the base # system rebuilt before you do them. .for _DIR in ${LOCAL_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR # of a LOCAL_DIRS directory. This allows LOCAL_DIRS=foo and # LOCAL_LIB_DIRS=foo/lib to behave as expected. .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|} _REDUNDENT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} .endfor .for _DIR in ${LOCAL_LIB_DIRS} .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .else .warning ${_DIR} not added to SUBDIR list. See UPDATING 20141121. .endif .endfor # We must do etc/ last as it hooks into building the man whatis file # by calling 'makedb' in share/man. This is only relevant for # install/distribute so they build the whatis file after every manpage is # installed. .if make(install*) SUBDIR+=.WAIT .endif SUBDIR+=etc .endif # !empty(SUBDIR_OVERRIDE) .if defined(NOCLEAN) .warning NOCLEAN option is deprecated. Use NO_CLEAN instead. NO_CLEAN= ${NOCLEAN} .endif .if defined(NO_CLEANDIR) CLEANDIR= clean cleandepend .else CLEANDIR= cleandir .endif LOCAL_TOOL_DIRS?= PACKAGEDIR?= ${DESTDIR}/${DISTDIR} .if empty(SHELL:M*csh*) BUILDENV_SHELL?=${SHELL} .else BUILDENV_SHELL?=/bin/sh .endif SVN?= /usr/local/bin/svn SVNFLAGS?= -r HEAD MAKEOBJDIRPREFIX?= /usr/obj .if !defined(OSRELDATE) .if exists(/usr/include/osreldate.h) OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ /usr/include/osreldate.h .else OSRELDATE= 0 .endif .export OSRELDATE .endif # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION. .if !defined(VERSION) && !make(showconfig) REVISION!= ${MAKE} -C ${SRCDIR}/release -V REVISION BRANCH!= ${MAKE} -C ${SRCDIR}/release -V BRANCH SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h VERSION= FreeBSD ${REVISION}-${BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE} .export VERSION .endif KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ arm \ armeb/arm \ armv6/arm \ armv6hf/arm \ i386 \ i386/pc98 \ mips \ mipsel/mips \ mips64el/mips \ mips64/mips \ mipsn32el/mips \ mipsn32/mips \ powerpc \ powerpc64/powerpc \ riscv64/riscv \ sparc64 .if ${TARGET} == ${TARGET_ARCH} _t= ${TARGET} .else _t= ${TARGET_ARCH}/${TARGET} .endif .for _t in ${_t} .if empty(KNOWN_ARCHES:M${_t}) .error Unknown target ${TARGET_ARCH}:${TARGET}. .endif .endfor .if ${TARGET} == ${MACHINE} TARGET_CPUTYPE?=${CPUTYPE} .else TARGET_CPUTYPE?= .endif .if !empty(TARGET_CPUTYPE) _TARGET_CPUTYPE=${TARGET_CPUTYPE} .else _TARGET_CPUTYPE=dummy .endif # Skip for showconfig as it is just wasted time and may invoke auto.obj.mk. .if !make(showconfig) _CPUTYPE!= MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \ -f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE .if ${_CPUTYPE} != ${_TARGET_CPUTYPE} .error CPUTYPE global should be set with ?=. .endif .endif .if make(buildworld) BUILD_ARCH!= uname -p .if ${MACHINE_ARCH} != ${BUILD_ARCH} .error To cross-build, set TARGET_ARCH. .endif .endif .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING) OBJTREE= ${MAKEOBJDIRPREFIX} .else OBJTREE= ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH} .endif WORLDTMP= ${OBJTREE}${.CURDIR}/tmp BPATH= ${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin XPATH= ${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin STRICTTMPPATH= ${BPATH}:${XPATH} TMPPATH= ${STRICTTMPPATH}:${PATH} # # Avoid running mktemp(1) unless actually needed. # It may not be functional, e.g., due to new ABI # when in the middle of installing over this system. # .if make(distributeworld) || make(installworld) INSTALLTMP!= /usr/bin/mktemp -d -u -t install .endif # # Building a world goes through the following stages # # 1. legacy stage [BMAKE] # This stage is responsible for creating compatibility # shims that are needed by the bootstrap-tools, # build-tools and cross-tools stages. These are generally # APIs that tools from one of those three stages need to # build that aren't present on the host. # 1. bootstrap-tools stage [BMAKE] # This stage is responsible for creating programs that # are needed for backward compatibility reasons. They # are not built as cross-tools. # 2. build-tools stage [TMAKE] # This stage is responsible for creating the object # tree and building any tools that are needed during # the build process. Some programs are listed during # this phase because they build binaries to generate # files needed to build these programs. This stage also # builds the 'build-tools' target rather than 'all'. # 3. cross-tools stage [XMAKE] # This stage is responsible for creating any tools that # are needed for building the system. A cross-compiler is one # of them. This differs from build tools in two ways: # 1. the 'all' target is built rather than 'build-tools' # 2. these tools are installed into TMPPATH for stage 4. # 4. world stage [WMAKE] # This stage actually builds the world. # 5. install stage (optional) [IMAKE] # This stage installs a previously built world. # BOOTSTRAPPING?= 0 # Common environment for world related stages CROSSENV+= MAKEOBJDIRPREFIX=${OBJTREE} \ MACHINE_ARCH=${TARGET_ARCH} \ MACHINE=${TARGET} \ CPUTYPE=${TARGET_CPUTYPE} .if ${MK_GROFF} != "no" CROSSENV+= GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \ GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \ GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac .endif .if defined(TARGET_CFLAGS) CROSSENV+= ${TARGET_CFLAGS} .endif # bootstrap-tools stage BMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile BSARGS= DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no \ MK_INCLUDES=yes BMAKE= MAKEOBJDIRPREFIX=${WORLDTMP} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ ${BSARGS} # build-tools stage TMAKE= MAKEOBJDIRPREFIX=${OBJTREE} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ -DNO_LINT \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no # cross-tools stage XMAKE= TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no # kernel-tools stage KTMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} KTMAKE= TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \ ${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no -DNO_LINT MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no # world stage WMAKEENV= ${CROSSENV} \ _LDSCRIPTROOT= \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} # make hierarchy HMAKE= PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q} .if defined(NO_ROOT) HMAKE+= PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT .endif .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif # If we do not have a bootstrap binutils (because the in-tree one does not # support the target architecture), provide a default cross-binutils prefix. # This allows aarch64 builds, for example, to automatically use the # aarch64-binutils port or package. .if !make(showconfig) .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \ !defined(CROSS_BINUTILS_PREFIX) CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/ .if !exists(${CROSS_BINUTILS_PREFIX}) .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX. .endif .endif .endif XCOMPILERS= CC CXX CPP .for COMPILER in ${XCOMPILERS} .if defined(CROSS_COMPILER_PREFIX) X${COMPILER}?= ${CROSS_COMPILER_PREFIX}${${COMPILER}} .else X${COMPILER}?= ${${COMPILER}} .endif .endfor XBINUTILS= AS AR LD NM OBJCOPY OBJDUMP RANLIB SIZE STRINGS .for BINUTIL in ${XBINUTILS} .if defined(CROSS_BINUTILS_PREFIX) && \ exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}}) X${BINUTIL}?= ${CROSS_BINUTILS_PREFIX}${${BINUTIL}} .else X${BINUTIL}?= ${${BINUTIL}} .endif .endfor CROSSENV+= CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCFLAGS} ${XCXXFLAGS}" \ DEPFLAGS="${DEPFLAGS}" \ CPP="${XCPP} ${XCFLAGS}" \ AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \ OBJDUMP=${XOBJDUMP} OBJCOPY="${XOBJCOPY}" \ RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \ SIZE="${XSIZE}" .if ${XCC:N${CCACHE_BIN}:M/*} .if defined(CROSS_BINUTILS_PREFIX) # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a # directory, but the compiler will look in the right place for it's # tools so we don't need to tell it where to look. .if exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${CROSS_BINUTILS_PREFIX} .endif .else BFLAGS+= -B${WORLDTMP}/usr/bin .endif .if ${TARGET} == "arm" .if ${TARGET_ARCH:M*hf*} != "" TARGET_ABI= gnueabihf .else TARGET_ABI= gnueabi .endif .endif .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc XCFLAGS+= -isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib XCXXFLAGS+= -I${WORLDTMP}/usr/include/c++/v1 -std=gnu++11 -L${WORLDTMP}/../lib/libc++ # XXX: DEPFLAGS is a workaround for not properly passing CXXFLAGS to sub-makes # due to CXX="${XCXX} ${XCXXFLAGS}". bsd.dep.mk does use CXXFLAGS when # building C++ files so this can come out if passing CXXFLAGS down is fixed. DEPFLAGS+= -I${WORLDTMP}/usr/include/c++/v1 .else TARGET_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd11.0 XCFLAGS+= -target ${TARGET_TRIPLE} .endif XCFLAGS+= --sysroot=${WORLDTMP} ${BFLAGS} XCXXFLAGS+= --sysroot=${WORLDTMP} ${BFLAGS} .else .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${CROSS_BINUTILS_PREFIX} XCFLAGS+= ${BFLAGS} XCXXFLAGS+= ${BFLAGS} .endif .endif # ${XCC:M/*} WMAKE= ${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64" # 32 bit world LIB32_OBJTREE= ${OBJTREE}${.CURDIR}/world32 LIB32TMP= ${OBJTREE}${.CURDIR}/lib32 .if ${TARGET_ARCH} == "amd64" .if empty(TARGET_CPUTYPE) LIB32CPUFLAGS= -march=i686 -mmmx -msse -msse2 .else LIB32CPUFLAGS= -march=${TARGET_CPUTYPE} .endif LIB32WMAKEENV= MACHINE=i386 MACHINE_ARCH=i386 \ MACHINE_CPU="i686 mmx sse sse2" LIB32WMAKEFLAGS= \ AS="${XAS} --32" \ LD="${XLD} -m elf_i386_fbsd -Y P,${LIB32TMP}/usr/lib32" \ OBJCOPY="${XOBJCOPY}" .elif ${TARGET_ARCH} == "powerpc64" .if empty(TARGET_CPUTYPE) LIB32CPUFLAGS= -mcpu=powerpc .else LIB32CPUFLAGS= -mcpu=${TARGET_CPUTYPE} .endif LIB32WMAKEENV= MACHINE=powerpc MACHINE_ARCH=powerpc LIB32WMAKEFLAGS= \ LD="${XLD} -m elf32ppc_fbsd" \ OBJCOPY="${XOBJCOPY}" .endif LIB32FLAGS= -m32 ${LIB32CPUFLAGS} -DCOMPAT_32BIT \ -isystem ${LIB32TMP}/usr/include/ \ -L${LIB32TMP}/usr/lib32 \ -B${LIB32TMP}/usr/lib32 .if ${XCC:N${CCACHE_BIN}:M/*} LIB32FLAGS+= --sysroot=${WORLDTMP} .endif # Yes, the flags are redundant. LIB32WMAKEENV+= MAKEOBJDIRPREFIX=${LIB32_OBJTREE} \ _LDSCRIPTROOT=${LIB32TMP} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} \ LIBDIR=/usr/lib32 \ SHLIBDIR=/usr/lib32 \ DTRACE="${DTRACE} -32" LIB32WMAKEFLAGS+= CC="${XCC} ${LIB32FLAGS}" \ CXX="${XCXX} ${LIB32FLAGS}" \ DESTDIR=${LIB32TMP} \ -DCOMPAT_32BIT \ -DLIBRARIES_ONLY \ -DNO_CPU_CFLAGS \ MK_CTF=no \ -DNO_LINT \ MK_TESTS=no LIB32WMAKE= ${LIB32WMAKEENV} ${MAKE} ${LIB32WMAKEFLAGS} \ MK_MAN=no MK_HTML=no LIB32IMAKE= ${LIB32WMAKE:NINSTALL=*:NDESTDIR=*:N_LDSCRIPTROOT=*} \ MK_TOOLCHAIN=no ${IMAKE_INSTALL} .endif IMAKEENV= ${CROSSENV:N_LDSCRIPTROOT=*} IMAKE= ${IMAKEENV} ${MAKE} -f Makefile.inc1 \ ${IMAKE_INSTALL} ${IMAKE_MTREE} .if empty(.MAKEFLAGS:M-n) IMAKEENV+= PATH=${STRICTTMPPATH}:${INSTALLTMP} \ LD_LIBRARY_PATH=${INSTALLTMP} \ PATH_LOCALE=${INSTALLTMP}/locale IMAKE+= __MAKE_SHELL=${INSTALLTMP}/sh .else IMAKEENV+= PATH=${TMPPATH}:${INSTALLTMP} .endif .if defined(DB_FROM_SRC) INSTALLFLAGS+= -N ${.CURDIR}/etc MTREEFLAGS+= -N ${.CURDIR}/etc .endif _INSTALL_DDIR= ${DESTDIR}/${DISTDIR} INSTALL_DDIR= ${_INSTALL_DDIR:S://:/:g:C:/$::} .if defined(NO_ROOT) METALOG?= ${DESTDIR}/${DISTDIR}/METALOG IMAKE+= -DNO_ROOT METALOG=${METALOG} INSTALLFLAGS+= -U -M ${METALOG} -D ${INSTALL_DDIR} MTREEFLAGS+= -W .endif .if defined(DB_FROM_SRC) || defined(NO_ROOT) IMAKE_INSTALL= INSTALL="install ${INSTALLFLAGS}" IMAKE_MTREE= MTREE_CMD="mtree ${MTREEFLAGS}" .endif # kernel stage KMAKEENV= ${WMAKEENV} KMAKE= ${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME} # # buildworld # # Attempt to rebuild the entire system, with reasonable chance of # success, regardless of how old your existing system is. # _worldtmp: .PHONY .if ${.CURDIR:C/[^,]//g} != "" # The m4 build of sendmail files doesn't like it if ',' is used # anywhere in the path of it's files. @echo @echo "*** Error: path to source tree contains a comma ','" @echo false .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Rebuilding the temporary build tree" @echo "--------------------------------------------------------------" .if !defined(NO_CLEAN) rm -rf ${WORLDTMP} .if defined(LIB32TMP) rm -rf ${LIB32TMP} .endif .else rm -rf ${WORLDTMP}/legacy/usr/include # XXX - These three can depend on any header file. - rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/ioctl.c + rm -f ${OBJTREE}${.CURDIR}/lib/libsysdecode/ioctl.c rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c - rm -f ${OBJTREE}${.CURDIR}/usr.bin/truss/ioctl.c .endif .for _dir in \ lib usr legacy/bin legacy/usr mkdir -p ${WORLDTMP}/${_dir} .endfor mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .if ${MK_GROFF} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .endif mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/usr/include >/dev/null ln -sf ${.CURDIR}/sys ${WORLDTMP} .if ${MK_DEBUG_FILES} != "no" # We could instead disable debug files for these build stages mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/legacy/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/usr/lib >/dev/null .endif .if ${MK_LIB32} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${WORLDTMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" mkdir -p ${WORLDTMP}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .for _mtree in ${LOCAL_MTREE} mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null .endfor _legacy: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.1: legacy release compatibility shims" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} legacy _bootstrap-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.2: bootstrap tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools _cleanobj: .if !defined(NO_CLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR} .if defined(LIB32TMP) ${_+_}cd ${.CURDIR}; ${LIB32WMAKE} -f Makefile.inc1 ${CLEANDIR} .endif .endif _obj: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} obj _build-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${TMAKE} build-tools _cross-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3: cross tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools ${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools _includes: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.1: building includes" @echo "--------------------------------------------------------------" # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need # headers from default SUBDIR. Do SUBDIR_OVERRIDE includes last. ${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \ includes .if !empty(SUBDIR_OVERRIDE) && make(buildworld) ${_+_}cd ${.CURDIR}; ${WMAKE} SHARED=symlinks includes .endif _libraries: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.2: building libraries" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; \ ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \ MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries _depend: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.3: make dependencies" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} depend everything: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.4: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all .if defined(LIB32TMP) build32: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 5.1: building 32 bit shim libraries" @echo "--------------------------------------------------------------" mkdir -p ${LIB32TMP}/usr/include mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${LIB32TMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${LIB32TMP}/usr/include >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${LIB32TMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${LIB32TMP}/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${LIB32TMP}/usr/lib/debug/usr >/dev/null .endif mkdir -p ${WORLDTMP} ln -sf ${.CURDIR}/sys ${WORLDTMP} .for _t in obj includes ${_+_}cd ${.CURDIR}/include; ${LIB32WMAKE} DIRPRFX=include/ ${_t} ${_+_}cd ${.CURDIR}/lib; ${LIB32WMAKE} DIRPRFX=lib/ ${_t} .if ${MK_CDDL} != "no" ${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32WMAKE} DIRPRFX=cddl/lib/ ${_t} .endif ${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32WMAKE} DIRPRFX=gnu/lib/ ${_t} .if ${MK_CRYPT} != "no" ${_+_}cd ${.CURDIR}/secure/lib; ${LIB32WMAKE} DIRPRFX=secure/lib/ ${_t} .endif .if ${MK_KERBEROS} != "no" ${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32WMAKE} DIRPRFX=kerberos5/lib ${_t} .endif .endfor .for _dir in usr.bin/lex/lib ${_+_}cd ${.CURDIR}/${_dir}; ${LIB32WMAKE} DIRPRFX=${_dir}/ obj .endfor .for _dir in lib/ncurses/ncurses lib/ncurses/ncursesw lib/libmagic ${_+_}cd ${.CURDIR}/${_dir}; \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" \ MAKEOBJDIRPREFIX=${LIB32_OBJTREE} ${MAKE} SSP_CFLAGS= DESTDIR= \ DIRPRFX=${_dir}/ -DNO_LINT -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ build-tools .endfor ${_+_}cd ${.CURDIR}; \ ${LIB32WMAKE} -f Makefile.inc1 -DNO_FSCHG libraries .for _t in obj depend all ${_+_}cd ${.CURDIR}/libexec/rtld-elf; PROG=ld-elf32.so.1 ${LIB32WMAKE} \ -DNO_FSCHG DIRPRFX=libexec/rtld-elf/ ${_t} ${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32WMAKE} \ DIRPRFX=usr.bin/ldd ${_t} .endfor distribute32 install32: .MAKE .PHONY ${_+_}cd ${.CURDIR}/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .if ${MK_CDDL} != "no" ${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .endif ${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .if ${MK_CRYPT} != "no" ${_+_}cd ${.CURDIR}/secure/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .endif .if ${MK_KERBEROS} != "no" ${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .endif ${_+_}cd ${.CURDIR}/libexec/rtld-elf; \ PROG=ld-elf32.so.1 ${LIB32IMAKE} ${.TARGET:S/32$//} ${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32IMAKE} \ ${.TARGET:S/32$//} .endif WMAKE_TGTS= WMAKE_TGTS+= _worldtmp _legacy .if empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= _bootstrap-tools .endif WMAKE_TGTS+= _cleanobj _obj _build-tools _cross-tools WMAKE_TGTS+= _includes _libraries _depend everything .if defined(LIB32TMP) && ${MK_LIB32} != "no" && empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= build32 .endif buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue buildworld_prologue: @echo "--------------------------------------------------------------" @echo ">>> World build started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" buildworld_epilogue: @echo @echo "--------------------------------------------------------------" @echo ">>> World build completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" # # We need to have this as a target because the indirection between Makefile # and Makefile.inc1 causes the correct PATH to be used, rather than a # modification of the current environment's PATH. In addition, we need # to quote multiword values. # buildenvvars: .PHONY @echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@} .if ${.TARGETS:Mbuildenv} .if ${.MAKEFLAGS:M-j} .error The buildenv target is incompatible with -j .endif .endif BUILDENV_DIR?= ${.CURDIR} buildenv: .PHONY @echo Entering world for ${TARGET_ARCH}:${TARGET} .if ${BUILDENV_SHELL:M*zsh*} @echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE} .endif @cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \ || true TOOLCHAIN_TGTS= ${WMAKE_TGTS:N_depend:Neverything:Nbuild32} toolchain: ${TOOLCHAIN_TGTS} kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries} # # installcheck # # Checks to be sure system is ready for installworld/installkernel. # installcheck: _installcheck_world _installcheck_kernel _installcheck_world: _installcheck_kernel: # # Require DESTDIR to be set if installing for a different architecture or # using the user/group database in the source tree. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \ defined(DB_FROM_SRC) .if !make(distributeworld) _installcheck_world: __installcheck_DESTDIR _installcheck_kernel: __installcheck_DESTDIR __installcheck_DESTDIR: .if !defined(DESTDIR) || empty(DESTDIR) @echo "ERROR: Please set DESTDIR!"; \ false .endif .endif .endif .if !defined(DB_FROM_SRC) # # Check for missing UIDs/GIDs. # CHECK_UIDS= auditdistd CHECK_GIDS= audit .if ${MK_SENDMAIL} != "no" CHECK_UIDS+= smmsp CHECK_GIDS+= smmsp .endif .if ${MK_PF} != "no" CHECK_UIDS+= proxy CHECK_GIDS+= proxy authpf .endif .if ${MK_UNBOUND} != "no" CHECK_UIDS+= unbound CHECK_GIDS+= unbound .endif _installcheck_world: __installcheck_UGID __installcheck_UGID: .for uid in ${CHECK_UIDS} @if ! `id -u ${uid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .for gid in ${CHECK_GIDS} @if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .endif # # Required install tools to be saved in a scratch dir for safety. # .if ${MK_ZONEINFO} != "no" _zoneinfo= zic tzsetup .endif ITOOLS= [ awk cap_mkdb cat chflags chmod chown cmp cp \ date echo egrep find grep id install ${_install-info} \ ln make mkdir mtree mv pwd_mkdb \ rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \ ${LOCAL_ITOOLS} # Needed for share/man .if ${MK_MAN} != "no" ITOOLS+=makewhatis .endif # # distributeworld # # Distributes everything compiled by a `buildworld'. # # installworld # # Installs everything compiled by a 'buildworld'. # # Non-base distributions produced by the base system EXTRA_DISTRIBUTIONS= doc .if defined(LIB32TMP) && ${MK_LIB32} != "no" EXTRA_DISTRIBUTIONS+= lib32 .endif .if ${MK_TESTS} != "no" EXTRA_DISTRIBUTIONS+= tests .endif DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 distributeworld installworld: _installcheck_world mkdir -p ${INSTALLTMP} progs=$$(for prog in ${ITOOLS}; do \ if progpath=`which $$prog`; then \ echo $$progpath; \ else \ echo "Required tool $$prog not found in PATH." >&2; \ exit 1; \ fi; \ done); \ libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \ while read line; do \ set -- $$line; \ if [ "$$2 $$3" != "not found" ]; then \ echo $$2; \ else \ echo "Required library $$1 not found." >&2; \ exit 1; \ fi; \ done); \ cp $$libs $$progs ${INSTALLTMP} cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale .if defined(NO_ROOT) echo "#${MTREE_MAGIC}" > ${METALOG} .endif .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} -mkdir ${DESTDIR}/${DISTDIR}/${dist} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null .endif .if ${MK_LIB32} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" && ${dist} == "tests" -mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .if defined(NO_ROOT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \ sed -e 's#^\./#./${dist}/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \ sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG} .if ${MK_LIB32} != "no" ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib32.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} .endif .endif .endfor -mkdir ${DESTDIR}/${DISTDIR}/base ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediatly before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist} | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.meta .endfor .for dist in ${DEBUG_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediatly before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.debug.meta .endfor .endif .endif packageworld: .for dist in base ${EXTRA_DISTRIBUTIONS} .if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug \ @${DESTDIR}/${DISTDIR}/${dist}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug . | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .endif .endfor .for dist in ${DEBUG_DISTRIBUTIONS} . if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvLf - usr/lib/debug | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . endif .endfor # # reinstall # # If you have a build server, you can NFS mount the source and obj directories # and do a 'make reinstall' on the *client* to install new binaries from the # most recent server build. # reinstall: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(LIB32TMP) && ${MK_LIB32} != "no" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install32 .endif redistribute: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Distributing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute .if defined(LIB32TMP) && ${MK_LIB32} != "no" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute32 \ DISTRIBUTION=lib32 .endif distrib-dirs: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} distribution: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} ${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \ METALOG=${METALOG} installconfig # # buildkernel and installkernel # # Which kernels to build and/or install is specified by setting # KERNCONF. If not defined a GENERIC kernel is built/installed. # Only the existing (depending TARGET) config files are used # for building kernels and only the first of these is designated # as the one being installed. # # Note that we have to use TARGET instead of TARGET_ARCH when # we're in kernel-land. Since only TARGET_ARCH is (expected) to # be set to cross-build, we have to make sure TARGET is set # properly. .if defined(KERNFAST) NO_KERNELCLEAN= t NO_KERNELCONFIG= t NO_KERNELDEPEND= t NO_KERNELOBJ= t # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah .if !defined(KERNCONF) && ${KERNFAST} != "1" KERNCONF=${KERNFAST} .endif .endif .if ${TARGET_ARCH} == "powerpc64" KERNCONF?= GENERIC64 .else KERNCONF?= GENERIC .endif INSTKERNNAME?= kernel KERNSRCDIR?= ${.CURDIR}/sys KRNLCONFDIR= ${KERNSRCDIR}/${TARGET}/conf KRNLOBJDIR= ${OBJTREE}${KERNSRCDIR} KERNCONFDIR?= ${KRNLCONFDIR} BUILDKERNELS= INSTALLKERNEL= .if defined(NO_INSTALLKERNEL) # All of the BUILDKERNELS loops start at index 1. BUILDKERNELS+= dummy .endif .for _kernel in ${KERNCONF} .if exists(${KERNCONFDIR}/${_kernel}) BUILDKERNELS+= ${_kernel} .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL) INSTALLKERNEL= ${_kernel} .endif .endif .endfor ${WMAKE_TGTS:N_worldtmp:Nbuild32} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY # # buildkernel # # Builds all kernels defined by BUILDKERNELS. # buildkernel: .MAKE .PHONY .if empty(BUILDKERNELS:Ndummy) @echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \ false .endif @echo .for _kernel in ${BUILDKERNELS:Ndummy} @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" @echo "===> ${_kernel}" mkdir -p ${KRNLOBJDIR} .if !defined(NO_KERNELCONFIG) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1: configuring the kernel" @echo "--------------------------------------------------------------" cd ${KRNLCONFDIR}; \ PATH=${TMPPATH} \ config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \ -I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}' .endif .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR} .endif .if !defined(NO_KERNELOBJ) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools .if !defined(NO_KERNELDEPEND) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: making dependencies" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} depend -DNO_MODULES_OBJ .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.2: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" .endfor # # installkernel, etc. # # Install the kernel defined by INSTALLKERNEL # installkernel installkernel.debug \ reinstallkernel reinstallkernel.debug: _installcheck_kernel .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} .endif .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${_kernel}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//} .endfor .endif distributekernel distributekernel.debug: .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif mkdir -p ${DESTDIR}/${DISTDIR} .if defined(NO_ROOT) echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta .endif cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \ DESTDIR=${INSTALL_DDIR}/kernel \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta .endif cd ${KRNLOBJDIR}/${_kernel}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \ KERNEL=${INSTKERNNAME}.${_kernel} \ DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) sed -e 's|^./kernel|.|' \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta .endif .endfor .endif packagekernel: .if defined(NO_ROOT) .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .else .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .endif # # doxygen # # Build the API documentation with doxygen # doxygen: .PHONY @if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \ echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all # # update # # Update the source tree(s), by running svn/svnup to update to the # latest copy. # update: .if (defined(CVS_UPDATE) || defined(SUP_UPDATE)) && !defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo "CVS_UPDATE and SUP_UPDATE are no longer supported." @echo "Please see: https://wiki.freebsd.org/CvsIsDeprecated" @echo "--------------------------------------------------------------" @exit 1 .endif .if defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo ">>> Updating ${.CURDIR} using Subversion" @echo "--------------------------------------------------------------" @(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS}) .endif # # ------------------------------------------------------------------------ # # From here onwards are utility targets used by the 'make world' and # related targets. If your 'world' breaks, you may like to try to fix # the problem and manually run the following targets to attempt to # complete the build. Beware, this is *not* guaranteed to work, you # need to have a pretty good grip on the current state of the system # to attempt to manually finish it. If in doubt, 'make world' again. # # # legacy: Build compatibility shims for the next three targets. This is a # minimal set of tools and shims necessary to compensate for older systems # which don't have the APIs required by the targets built in bootstrap-tools, # build-tools or cross-tools. # # ELF Tool Chain libraries are needed for ELF tools and dtrace tools. .if ${BOOTSTRAPPING} < 1100006 _elftoolchain_libs= lib/libelf lib/libdwarf .endif legacy: .if ${BOOTSTRAPPING} < 800107 && ${BOOTSTRAPPING} != 0 @echo "ERROR: Source upgrades from versions prior to 8.0 are not supported."; \ false .endif .for _tool in tools/build ${_elftoolchain_libs} ${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install .endfor # # bootstrap-tools: Build tools needed for compatibility. These are binaries that # are built to build other binaries in the system. However, the focus of these # binaries is usually quite narrow. Bootstrap tools use the host's compiler and # libraries, augmented by -legacy. # _bt= _bootstrap-tools .if ${MK_GAMES} != "no" _strfile= usr.bin/fortune/strfile .endif .if ${MK_GCC} != "no" && ${MK_CXX} != "no" _gperf= gnu/usr.bin/gperf .endif .if ${MK_GROFF} != "no" _groff= gnu/usr.bin/groff \ usr.bin/soelim .endif .if ${MK_VT} != "no" _vtfontcvt= usr.bin/vtfontcvt .endif .if ${BOOTSTRAPPING} < 900002 _sed= usr.bin/sed .endif -.if ${BOOTSTRAPPING} < 1000002 +.if ${BOOTSTRAPPING} < 1000033 _libopenbsd= lib/libopenbsd _m4= usr.bin/m4 +_lex= usr.bin/lex ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd +${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif .if ${BOOTSTRAPPING} < 1000026 _nmtree= lib/libnetbsd \ usr.sbin/nmtree ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd .endif .if ${BOOTSTRAPPING} < 1000027 _cat= bin/cat -.endif - -.if ${BOOTSTRAPPING} < 1000033 -_lex= usr.bin/lex - -${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support # r285986 crunchen: use STRIPBIN rather than STRIP .if ${BOOTSTRAPPING} < 1100078 _crunch= usr.sbin/crunch .endif .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041 _awk= usr.bin/awk .endif _yacc= lib/liby \ usr.bin/yacc ${_bt}-usr.bin/yacc: ${_bt}-lib/liby .if ${MK_BSNMP} != "no" _gensnmptree= usr.sbin/bsnmpd/gensnmptree .endif # We need to build tblgen when we're building clang either as # the bootstrap compiler, or as the part of the normal build. .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" _clang_tblgen= \ lib/clang/libllvmsupport \ lib/clang/libllvmtablegen \ usr.bin/clang/llvm-tblgen \ usr.bin/clang/clang-tblgen ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport .endif # Default to building the GPL DTC, but build the BSDL one if users explicitly # request it. _dtc= usr.bin/dtc .if ${MK_GPL_DTC} != "no" _dtc= gnu/usr.bin/dtc .endif .if ${MK_KERBEROS} != "no" _kerberos5_bootstrap_tools= \ kerberos5/tools/make-roken \ kerberos5/lib/libroken \ kerberos5/lib/libvers \ kerberos5/tools/asn1_compile \ kerberos5/tools/slc \ usr.bin/compile_et .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g} .endif .if ${MK_MANDOCDB} != "no" _libopenbsd?= lib/libopenbsd _makewhatis= lib/libsqlite3 \ usr.bin/mandoc ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd ${_bt}-lib/libsqlite3 .else _makewhatis=usr.bin/makewhatis .endif bootstrap-tools: .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. # Try to bound the building of the bootstrap-tool to just the # FreeBSD versions that need the tool built at this stage of the build. .for _tool in \ ${_clang_tblgen} \ ${_kerberos5_bootstrap_tools} \ ${_strfile} \ ${_gperf} \ ${_groff} \ ${_dtc} \ ${_awk} \ ${_cat} \ usr.bin/lorder \ ${_libopenbsd} \ ${_makewhatis} \ usr.bin/rpcgen \ ${_sed} \ ${_yacc} \ ${_m4} \ ${_lex} \ usr.bin/xinstall \ ${_gensnmptree} \ usr.sbin/config \ ${_crunch} \ ${_nmtree} \ ${_vtfontcvt} \ usr.bin/localedef ${_bt}-${_tool}: .PHONY .MAKE ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install bootstrap-tools: ${_bt}-${_tool} .endfor # # build-tools: Build special purpose build tools # .if !defined(NO_SHARE) _share= share/syscons/scrnmaps .endif .if ${MK_GCC} != "no" _gcc_tools= gnu/usr.bin/cc/cc_tools .endif .if ${MK_RESCUE} != "no" # rescue includes programs that have build-tools targets _rescue=rescue/rescue .endif .for _tool in \ bin/csh \ bin/sh \ ${LOCAL_TOOL_DIRS} \ lib/ncurses/ncurses \ lib/ncurses/ncursesw \ ${_rescue} \ ${_share} \ usr.bin/awk \ lib/libmagic \ usr.bin/mkesdb_static \ usr.bin/mkcsmapper_static \ usr.bin/vi/catalog build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ build-tools build-tools: build-tools_${_tool} .endfor .for _tool in \ ${_gcc_tools} build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all build-tools: build-tools_${_tool} .endfor # # kernel-tools: Build kernel-building tools # kernel-tools: mkdir -p ${MAKEOBJDIRPREFIX}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${MAKEOBJDIRPREFIX}/usr >/dev/null # # cross-tools: All the tools needed to build the rest of the system after # we get done with the earlier stages. It is the last set of tools needed # to begin building the target binaries. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" _btxld= usr.sbin/btxld .endif .endif # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures # resulting from missing bug fixes or ELF Toolchain updates. .if ${MK_CDDL} != "no" _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \ cddl/usr.bin/ctfmerge .endif # If we're given an XAS, don't build binutils. .if ${XAS:M/*} == "" .if ${MK_BINUTILS_BOOTSTRAP} != "no" _binutils= gnu/usr.bin/binutils .endif .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" _elftctools= lib/libelftc \ usr.bin/elfcopy \ usr.bin/nm \ usr.bin/size \ usr.bin/strings # These are not required by the build, but can be useful for developers who # cross-build on a FreeBSD 10 host: _elftctools+= usr.bin/addr2line .endif .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" # If cross-building with an external binutils we still need to build strip for # the target (for at least crunchide). _elftctools= lib/libelftc \ usr.bin/elfcopy .endif # If an full path to an external cross compiler is given, don't build # a cross compiler. .if ${XCC:N${CCACHE_BIN}:M/*} == "" && ${MK_CROSS_COMPILER} != "no" .if ${MK_CLANG_BOOTSTRAP} != "no" _clang= usr.bin/clang _clang_libs= lib/clang .endif .if ${MK_GCC_BOOTSTRAP} != "no" _cc= gnu/usr.bin/cc .endif .endif .if ${MK_USB} != "no" _usb_tools= sys/boot/usb/tools .endif cross-tools: .MAKE .PHONY .for _tool in \ ${_clang_libs} \ ${_clang} \ ${_binutils} \ ${_elftctools} \ ${_dtrace_tools} \ ${_cc} \ ${_btxld} \ ${_crunchide} \ ${_usb_tools} ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install .endfor NXBDESTDIR= ${OBJTREE}/nxb-bin NXBENV= MAKEOBJDIRPREFIX=${OBJTREE}/nxb \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin NXBMAKE= ${NXBENV} ${MAKE} \ LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \ CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_DEBUG_FILES=no # native-xtools is the current target for qemu-user cross builds of ports # via poudriere and the imgact_binmisc kernel module. # For non-clang enabled targets that are still using the in tree gcc # we must build a gperf binary for one instance of its Makefiles. On # clang-enabled systems, the gperf binary is obsolete. native-xtools: .PHONY .if ${MK_GCC_BOOTSTRAP} != "no" mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin ${_+_}@${ECHODIR} "===> ${_gperf} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_gperf}; \ ${NXBMAKE} DIRPRFX=${_gperf}/ obj; \ ${NXBMAKE} DIRPRFX=${_gperf}/ depend; \ ${NXBMAKE} DIRPRFX=${_gperf}/ all; \ ${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install .endif mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${NXBDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${NXBDESTDIR}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${NXBDESTDIR}/usr/lib >/dev/null .endif .for _tool in \ bin/cat \ bin/chmod \ bin/cp \ bin/csh \ bin/echo \ bin/expr \ bin/hostname \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/ps \ bin/realpath \ bin/rm \ bin/rmdir \ bin/sh \ bin/sleep \ ${_clang_tblgen} \ usr.bin/ar \ ${_binutils} \ ${_elftctools} \ ${_cc} \ ${_gcc_tools} \ ${_clang_libs} \ ${_clang} \ sbin/md5 \ sbin/sysctl \ gnu/usr.bin/diff \ usr.bin/awk \ usr.bin/basename \ usr.bin/bmake \ usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/dirname \ usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ usr.bin/gzip \ usr.bin/id \ usr.bin/lex \ usr.bin/lorder \ usr.bin/mktemp \ usr.bin/mt \ usr.bin/patch \ usr.bin/sed \ usr.bin/sort \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ usr.bin/true \ usr.bin/uniq \ usr.bin/unzip \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xz \ usr.bin/yacc \ usr.sbin/chown ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${NXBMAKE} DIRPRFX=${_tool}/ obj; \ ${NXBMAKE} DIRPRFX=${_tool}/ depend; \ ${NXBMAKE} DIRPRFX=${_tool}/ all; \ ${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install .endfor # # hierarchy - ensure that all the needed directories are present # hierarchy hier: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs # # libraries - build all libraries, and install them under ${DESTDIR}. # # The list of libraries with dependents (${_prebuild_libs}) and their # interdependencies (__L) are built automatically by the # ${.CURDIR}/tools/make_libdeps.sh script. # libraries: .MAKE .PHONY ${_+_}cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 _prereq_libs; \ ${MAKE} -f Makefile.inc1 _startup_libs; \ ${MAKE} -f Makefile.inc1 _prebuild_libs; \ ${MAKE} -f Makefile.inc1 _generic_libs # # static libgcc.a prerequisite for shared libc # _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt # These dependencies are not automatically generated: # # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before # all shared libraries for ELF. # _startup_libs= gnu/lib/csu _startup_libs+= lib/csu _startup_libs+= gnu/lib/libgcc _startup_libs+= lib/libcompiler_rt _startup_libs+= lib/libc _startup_libs+= lib/libc_nonshared .if ${MK_LIBCPLUSPLUS} != "no" _startup_libs+= lib/libcxxrt .endif gnu/lib/libgcc__L: lib/libc__L gnu/lib/libgcc__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: gnu/lib/libgcc__L .endif _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_kerberos5_lib_libhdb} \ ${_kerberos5_lib_libheimbase} \ ${_kerberos5_lib_libheimntlm} \ ${_libsqlite3} \ ${_kerberos5_lib_libheimipcc} \ ${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \ ${_kerberos5_lib_libroken} \ ${_kerberos5_lib_libwind} \ lib/libbz2 ${_libcom_err} lib/libcrypt \ lib/libelf lib/libexpat \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ ${_lib_libcapsicum} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam ${_lib_libthr} \ ${_lib_libradius} lib/libsbuf lib/libtacplus \ lib/libgeom \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} \ ${_cddl_lib_libctf} \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_lib_libldns} \ ${_secure_lib_libssh} ${_secure_lib_libssl} \ gnu/lib/libdialog .if ${MK_GNUCXX} != "no" _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++ gnu/lib/libstdc++__L: lib/msun__L gnu/lib/libsupc++__L: gnu/lib/libstdc++__L .endif .if ${MK_LIBCPLUSPLUS} != "no" _prebuild_libs+= lib/libc++ .endif lib/libgeom__L: lib/libexpat__L lib/libkvm__L: lib/libelf__L .if ${MK_LIBTHR} != "no" _lib_libthr= lib/libthr .endif .if ${MK_RADIUS_SUPPORT} != "no" _lib_libradius= lib/libradius .endif .if ${MK_OFED} != "no" _ofed_lib= contrib/ofed/usr.lib/ .endif .if ${MK_CASPER} != "no" _lib_libcapsicum=lib/libcapsicum .endif lib/libcapsicum__L: lib/libnv__L lib/libpjdlog__L: lib/libutil__L lib/liblzma__L: lib/libthr__L _generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib} .for _DIR in ${LOCAL_LIB_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) _generic_libs+= ${_DIR} .endif .endfor lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: lib/libgeom__L cddl/lib/libctf__L: lib/libz__L .endif # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built # on select architectures though (see cddl/lib/Makefile) .if ${MACHINE_CPUARCH} != "sparc64" _prebuild_libs+= lib/libproc lib/librtld_db .endif .if ${MK_CRYPT} != "no" .if ${MK_OPENSSL} != "no" _secure_lib_libcrypto= secure/lib/libcrypto _secure_lib_libssl= secure/lib/libssl lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L .if ${MK_LDNS} != "no" _lib_libldns= lib/libldns lib/libldns__L: secure/lib/libcrypto__L .endif .if ${MK_OPENSSH} != "no" _secure_lib_libssh= secure/lib/libssh secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L .if ${MK_LDNS} != "no" secure/lib/libssh__L: lib/libldns__L .endif .if ${MK_KERBEROS_SUPPORT} != "no" secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libmd__L kerberos5/lib/libroken__L .endif .endif .endif _secure_lib= secure/lib .endif .if ${MK_KERBEROS} != "no" kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \ kerberos5/lib/libwind__L lib/libsqlite3__L kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \ kerberos5/lib/libroken__L kerberos5/lib/libwind__L \ kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L kerberos5/lib/libroken__L: lib/libcrypt__L kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libheimbase__L: lib/libthr__L kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L .endif lib/libsqlite3__L: lib/libthr__L .if ${MK_GSSAPI} != "no" _lib_libgssapi= lib/libgssapi .endif .if ${MK_KERBEROS} != "no" _kerberos5_lib= kerberos5/lib _kerberos5_lib_libasn1= kerberos5/lib/libasn1 _kerberos5_lib_libhdb= kerberos5/lib/libhdb _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5 _kerberos5_lib_libhx509= kerberos5/lib/libhx509 _kerberos5_lib_libroken= kerberos5/lib/libroken _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm _libsqlite3= lib/libsqlite3 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc _kerberos5_lib_libwind= kerberos5/lib/libwind _libcom_err= lib/libcom_err .endif .if ${MK_NIS} != "no" _lib_libypclnt= lib/libypclnt .endif .if ${MK_OPENSSL} == "no" lib/libradius__L: lib/libmd__L .endif lib/libproc__L: \ ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L .if ${MK_CXX} != "no" .if ${MK_LIBCPLUSPLUS} != "no" lib/libproc__L: lib/libcxxrt__L .else # This implies MK_GNUCXX != "no"; see lib/libproc lib/libproc__L: gnu/lib/libsupc++__L .endif .endif gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L .for _lib in ${_prereq_libs} ${_lib}__PL: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ install .endif .endfor .for _lib in ${_startup_libs} ${_prebuild_libs:Nlib/libpam} ${_generic_libs} ${_lib}__L: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install .endif .endfor # libpam is special: we need to build static PAM modules before # static PAM library, and dynamic PAM library before dynamic PAM # modules. lib/libpam__L: .PHONY .MAKE ${_+_}@${ECHODIR} "===> lib/libpam (obj,depend,all,install)"; \ cd ${.CURDIR}/lib/libpam; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ depend; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \ -D_NO_LIBPAM_SO_YET all; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \ -D_NO_LIBPAM_SO_YET install _prereq_libs: ${_prereq_libs:S/$/__PL/} _startup_libs: ${_startup_libs:S/$/__L/} _prebuild_libs: ${_prebuild_libs:S/$/__L/} _generic_libs: ${_generic_libs:S/$/__L/} # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from # 'everything' with _PARALLEL_SUBDIR_OK set. This is because it is unlikely # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in # parallel. This is safe for the world stage of buildworld though since it has # already built libraries in a proper order and installed includes into # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to # avoid trashing a system if it crashes mid-install. .if !make(all) || defined(_PARALLEL_SUBDIR_OK) SUBDIR_PARALLEL= .endif .include .if make(check-old) || make(check-old-dirs) || \ make(check-old-files) || make(check-old-libs) || \ make(delete-old) || make(delete-old-dirs) || \ make(delete-old-files) || make(delete-old-libs) # # check for / delete old files section # .include "ObsoleteFiles.inc" OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \ else you can not start such an application. Consult UPDATING for more \ information regarding how to cope with the removal/revision bump of a \ specific library." .if !defined(BATCH_DELETE_OLD_FILES) RM_I=-i .else RM_I=-v .endif delete-old-files: @echo ">>> Removing old files (only deletes safe to delete libs)" # Ask for every old file if the user really wants to remove it. # It's annoying, but better safe than sorry. # NB: We cannot pass the list of OLD_FILES as a parameter because the # argument list will get too long. Using .for/.endfor make "loops" will make # the Makefile parser segfault. @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done # Remove catpages without corresponding manpages. @exec 3<&0; \ find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ rm ${RM_I} $${catpage} <&3; \ fi; \ done @echo ">>> Old files removed" check-old-files: @echo ">>> Checking for old files" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done # Check for catpages without corresponding manpages. @find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ echo $${catpage}; \ fi; \ done delete-old-libs: @echo ">>> Removing old libraries" @echo "${OLD_LIBS_MESSAGE}" | fmt @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done @echo ">>> Old libraries removed" check-old-libs: @echo ">>> Checking for old libraries" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done delete-old-dirs: @echo ">>> Removing old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done @echo ">>> Old directories removed" check-old-dirs: @echo ">>> Checking for old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir}"; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done delete-old: delete-old-files delete-old-dirs @echo "To remove old libraries run '${MAKE} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs @echo "To remove old files and directories run '${MAKE} delete-old'." @echo "To remove old libraries run '${MAKE} delete-old-libs'." .endif # # showconfig - show build configuration. # showconfig: @(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \ ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u .if !empty(KRNLOBJDIR) && !empty(KERNCONF) DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/ .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE) .if exists(${KERNCONFDIR}/${KERNCONF}) FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \ '${KERNCONFDIR}/${KERNCONF}' ; echo .endif .endif .endif .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH}) DTBOUTPUTPATH= ${.CURDIR} .endif # # Build 'standalone' Device Tree Blob # builddtb: @PATH=${TMPPATH} MACHINE=${TARGET} \ ${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \ "${FDT_DTS_FILE}" ${DTBOUTPUTPATH} ############### # cleanworld # In the following, the first 'rm' in a series will usually remove all # files and directories. If it does not, then there are probably some # files with file flags set, so this unsets them and tries the 'rm' a # second time. There are situations where this target will be cleaning # some directories via more than one method, but that duplication is # needed to correctly handle all the possible situations. Removing all # files without file flags set in the first 'rm' instance saves time, # because 'chflags' will need to operate on fewer files afterwards. # # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be # created by bsd.obj.mk, except that we don't want to .include that file # in this makefile. # BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR} cleanworld: .PHONY .if exists(${BW_CANONICALOBJDIR}/) -rm -rf ${BW_CANONICALOBJDIR}/* -chflags -R 0 ${BW_CANONICALOBJDIR} rm -rf ${BW_CANONICALOBJDIR}/* .endif .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR} # To be safe in this case, fall back to a 'make cleandir' ${_+_}@cd ${.CURDIR}; ${MAKE} cleandir .endif .if defined(TARGET) && defined(TARGET_ARCH) .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH} XDEV_CPUTYPE?=${CPUTYPE} .else XDEV_CPUTYPE?=${TARGET_CPUTYPE} .endif NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \ MK_MAN=no MK_NLS=no MK_PROFILE=no \ MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ CPUTYPE=${XDEV_CPUTYPE} XDDIR=${TARGET_ARCH}-freebsd XDTP?=/usr/${XDDIR} .if ${XDTP:N/*} .error XDTP variable should be an absolute path .endif CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \ INSTALL="sh ${.CURDIR}/tools/install.sh" CDENV= ${CDBENV} \ TOOLS_PREFIX=${XDTP} CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \ --sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \ -B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \ CPP="${CPP} ${CD2CFLAGS}" \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} CDTMP= ${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN} CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN} XDDESTDIR=${DESTDIR}/${XDTP} .if !defined(OSREL) OSREL!= uname -r | sed -e 's/[-(].*//' .endif .ORDER: xdev-build xdev-install xdev-links xdev: xdev-build xdev-install .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools _xb-worldtmp: .PHONY mkdir -p ${CDTMP}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${CDTMP}/usr >/dev/null _xb-bootstrap-tools: .PHONY .for _tool in \ ${_clang_tblgen} \ ${_gperf} ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ depend; \ ${CDMAKE} DIRPRFX=${_tool}/ all; \ ${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install .endfor _xb-build-tools: .PHONY ${_+_}@cd ${.CURDIR}; \ ${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools _xb-cross-tools: .PHONY .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,depend,all)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ depend; \ ${CDMAKE} DIRPRFX=${_tool}/ all .endfor _xi-mtree: .PHONY ${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}" mkdir -p ${XDDESTDIR} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${XDDESTDIR} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${XDDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${XDDESTDIR}/usr/include >/dev/null .if ${MK_LIB32} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${XDDESTDIR}/usr >/dev/null .endif .if ${MK_TESTS} != "no" mkdir -p ${XDDESTDIR}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${XDDESTDIR}${TESTSBASE} >/dev/null .endif .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries _xi-cross-tools: .PHONY @echo "_xi-cross-tools" .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR} .endfor _xi-includes: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \ DESTDIR=${XDDESTDIR} _xi-libraries: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \ DESTDIR=${XDDESTDIR} xdev-links: .PHONY ${_+_}cd ${XDDESTDIR}/usr/bin; \ mkdir -p ../../../../usr/bin; \ for i in *; do \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}-$$i; \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \ done .else xdev xdev-build xdev-install xdev-links: @echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target" .endif Index: projects/clang380-import/bin/sh/eval.c =================================================================== --- projects/clang380-import/bin/sh/eval.c (revision 293686) +++ projects/clang380-import/bin/sh/eval.c (revision 293687) @@ -1,1382 +1,1382 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)eval.c 8.9 (Berkeley) 6/8/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include /* For WIFSIGNALED(status) */ #include /* * Evaluate a command. */ #include "shell.h" #include "nodes.h" #include "syntax.h" #include "expand.h" #include "parser.h" #include "jobs.h" #include "eval.h" #include "builtins.h" #include "options.h" #include "exec.h" #include "redir.h" #include "input.h" #include "output.h" #include "trap.h" #include "var.h" #include "memalloc.h" #include "error.h" #include "show.h" #include "mystring.h" #ifndef NO_HISTORY #include "myhistedit.h" #endif int evalskip; /* set if we are skipping commands */ int skipcount; /* number of levels to skip */ static int loopnest; /* current loop nesting level */ int funcnest; /* depth of function calls */ static int builtin_flags; /* evalcommand flags for builtins */ char *commandname; struct arglist *cmdenviron; int exitstatus; /* exit status of last command */ int oexitstatus; /* saved exit status */ static void evalloop(union node *, int); static void evalfor(union node *, int); static union node *evalcase(union node *); static void evalsubshell(union node *, int); static void evalredir(union node *, int); static void exphere(union node *, struct arglist *); static void expredir(union node *); static void evalpipe(union node *); static int is_valid_fast_cmdsubst(union node *n); static void evalcommand(union node *, int, struct backcmd *); static void prehash(union node *); /* * Called to reset things after an exception. */ void reseteval(void) { evalskip = 0; loopnest = 0; } /* * The eval command. */ int evalcmd(int argc, char **argv) { char *p; char *concat; char **ap; if (argc > 1) { p = argv[1]; if (argc > 2) { STARTSTACKSTR(concat); ap = argv + 2; for (;;) { STPUTS(p, concat); if ((p = *ap++) == NULL) break; STPUTC(' ', concat); } STPUTC('\0', concat); p = grabstackstr(concat); } evalstring(p, builtin_flags); } else exitstatus = 0; return exitstatus; } /* * Execute a command or commands contained in a string. */ void evalstring(const char *s, int flags) { union node *n; struct stackmark smark; int flags_exit; int any; flags_exit = flags & EV_EXIT; flags &= ~EV_EXIT; any = 0; setstackmark(&smark); setinputstring(s, 1); while ((n = parsecmd(0)) != NEOF) { if (n != NULL && !nflag) { if (flags_exit && preadateof()) evaltree(n, flags | EV_EXIT); else evaltree(n, flags); any = 1; if (evalskip) break; } popstackmark(&smark); setstackmark(&smark); } popfile(); popstackmark(&smark); if (!any) exitstatus = 0; if (flags_exit) exraise(EXEXIT); } /* * Evaluate a parse tree. The value is left in the global variable * exitstatus. */ void evaltree(union node *n, int flags) { int do_etest; union node *next; struct stackmark smark; setstackmark(&smark); do_etest = 0; if (n == NULL) { TRACE(("evaltree(NULL) called\n")); exitstatus = 0; goto out; } do { next = NULL; #ifndef NO_HISTORY displayhist = 1; /* show history substitutions done with fc */ #endif TRACE(("evaltree(%p: %d) called\n", (void *)n, n->type)); switch (n->type) { case NSEMI: evaltree(n->nbinary.ch1, flags & ~EV_EXIT); if (evalskip) goto out; next = n->nbinary.ch2; break; case NAND: evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip || exitstatus != 0) { goto out; } next = n->nbinary.ch2; break; case NOR: evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip || exitstatus == 0) goto out; next = n->nbinary.ch2; break; case NREDIR: evalredir(n, flags); break; case NSUBSHELL: evalsubshell(n, flags); do_etest = !(flags & EV_TESTED); break; case NBACKGND: evalsubshell(n, flags); break; case NIF: { evaltree(n->nif.test, EV_TESTED); if (evalskip) goto out; if (exitstatus == 0) next = n->nif.ifpart; else if (n->nif.elsepart) next = n->nif.elsepart; else exitstatus = 0; break; } case NWHILE: case NUNTIL: evalloop(n, flags & ~EV_EXIT); break; case NFOR: evalfor(n, flags & ~EV_EXIT); break; case NCASE: next = evalcase(n); break; case NCLIST: next = n->nclist.body; break; case NCLISTFALLTHRU: if (n->nclist.body) { evaltree(n->nclist.body, flags & ~EV_EXIT); if (evalskip) goto out; } next = n->nclist.next; break; case NDEFUN: defun(n->narg.text, n->narg.next); exitstatus = 0; break; case NNOT: evaltree(n->nnot.com, EV_TESTED); if (evalskip) goto out; exitstatus = !exitstatus; break; case NPIPE: evalpipe(n); do_etest = !(flags & EV_TESTED); break; case NCMD: evalcommand(n, flags, (struct backcmd *)NULL); do_etest = !(flags & EV_TESTED); break; default: out1fmt("Node type = %d\n", n->type); flushout(&output); break; } n = next; popstackmark(&smark); setstackmark(&smark); } while (n != NULL); out: popstackmark(&smark); if (pendingsig) dotrap(); if (eflag && exitstatus != 0 && do_etest) exitshell(exitstatus); if (flags & EV_EXIT) exraise(EXEXIT); } static void evalloop(union node *n, int flags) { int status; loopnest++; status = 0; for (;;) { if (!evalskip) evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip) { if (evalskip == SKIPCONT && --skipcount <= 0) { evalskip = 0; continue; } if (evalskip == SKIPBREAK && --skipcount <= 0) evalskip = 0; if (evalskip == SKIPRETURN) status = exitstatus; break; } if (n->type == NWHILE) { if (exitstatus != 0) break; } else { if (exitstatus == 0) break; } evaltree(n->nbinary.ch2, flags); status = exitstatus; } loopnest--; exitstatus = status; } static void evalfor(union node *n, int flags) { struct arglist arglist; union node *argp; int i; int status; emptyarglist(&arglist); for (argp = n->nfor.args ; argp ; argp = argp->narg.next) { oexitstatus = exitstatus; expandarg(argp, &arglist, EXP_FULL | EXP_TILDE); } loopnest++; status = 0; for (i = 0; i < arglist.count; i++) { setvar(n->nfor.var, arglist.args[i], 0); evaltree(n->nfor.body, flags); status = exitstatus; if (evalskip) { if (evalskip == SKIPCONT && --skipcount <= 0) { evalskip = 0; continue; } if (evalskip == SKIPBREAK && --skipcount <= 0) evalskip = 0; break; } } loopnest--; exitstatus = status; } /* * Evaluate a case statement, returning the selected tree. * * The exit status needs care to get right. */ static union node * evalcase(union node *n) { union node *cp; union node *patp; struct arglist arglist; emptyarglist(&arglist); oexitstatus = exitstatus; expandarg(n->ncase.expr, &arglist, EXP_TILDE); for (cp = n->ncase.cases ; cp ; cp = cp->nclist.next) { for (patp = cp->nclist.pattern ; patp ; patp = patp->narg.next) { if (casematch(patp, arglist.args[0])) { while (cp->nclist.next && cp->type == NCLISTFALLTHRU && cp->nclist.body == NULL) cp = cp->nclist.next; if (cp->nclist.next && cp->type == NCLISTFALLTHRU) return (cp); if (cp->nclist.body == NULL) exitstatus = 0; return (cp->nclist.body); } } } exitstatus = 0; return (NULL); } /* * Kick off a subshell to evaluate a tree. */ static void evalsubshell(union node *n, int flags) { struct job *jp; int backgnd = (n->type == NBACKGND); oexitstatus = exitstatus; expredir(n->nredir.redirect); if ((!backgnd && flags & EV_EXIT && !have_traps()) || forkshell(jp = makejob(n, 1), n, backgnd) == 0) { if (backgnd) flags &=~ EV_TESTED; redirect(n->nredir.redirect, 0); evaltree(n->nredir.n, flags | EV_EXIT); /* never returns */ } else if (! backgnd) { INTOFF; exitstatus = waitforjob(jp, (int *)NULL); INTON; } else exitstatus = 0; } /* * Evaluate a redirected compound command. */ static void evalredir(union node *n, int flags) { struct jmploc jmploc; struct jmploc *savehandler; volatile int in_redirect = 1; oexitstatus = exitstatus; expredir(n->nredir.redirect); savehandler = handler; if (setjmp(jmploc.loc)) { int e; handler = savehandler; e = exception; popredir(); if (e == EXERROR || e == EXEXEC) { if (in_redirect) { exitstatus = 2; return; } } longjmp(handler->loc, 1); } else { INTOFF; handler = &jmploc; redirect(n->nredir.redirect, REDIR_PUSH); in_redirect = 0; INTON; evaltree(n->nredir.n, flags); } INTOFF; handler = savehandler; popredir(); INTON; } static void exphere(union node *redir, struct arglist *fn) { struct jmploc jmploc; struct jmploc *savehandler; struct localvar *savelocalvars; int need_longjmp = 0; unsigned char saveoptreset; redir->nhere.expdoc = ""; savelocalvars = localvars; localvars = NULL; saveoptreset = shellparam.reset; forcelocal++; savehandler = handler; if (setjmp(jmploc.loc)) need_longjmp = exception != EXERROR && exception != EXEXEC; else { handler = &jmploc; expandarg(redir->nhere.doc, fn, 0); redir->nhere.expdoc = fn->args[0]; INTOFF; } handler = savehandler; forcelocal--; poplocalvars(); localvars = savelocalvars; shellparam.reset = saveoptreset; if (need_longjmp) longjmp(handler->loc, 1); INTON; } /* * Compute the names of the files in a redirection list. */ static void expredir(union node *n) { union node *redir; for (redir = n ; redir ; redir = redir->nfile.next) { struct arglist fn; emptyarglist(&fn); switch (redir->type) { case NFROM: case NTO: case NFROMTO: case NAPPEND: case NCLOBBER: expandarg(redir->nfile.fname, &fn, EXP_TILDE); redir->nfile.expfname = fn.args[0]; break; case NFROMFD: case NTOFD: if (redir->ndup.vname) { expandarg(redir->ndup.vname, &fn, EXP_TILDE); fixredir(redir, fn.args[0], 1); } break; case NXHERE: exphere(redir, &fn); break; } } } /* * Evaluate a pipeline. All the processes in the pipeline are children * of the process creating the pipeline. (This differs from some versions * of the shell, which make the last process in a pipeline the parent * of all the rest.) */ static void evalpipe(union node *n) { struct job *jp; struct nodelist *lp; int pipelen; int prevfd; int pip[2]; TRACE(("evalpipe(%p) called\n", (void *)n)); pipelen = 0; for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) pipelen++; INTOFF; jp = makejob(n, pipelen); prevfd = -1; for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) { prehash(lp->n); pip[1] = -1; if (lp->next) { if (pipe(pip) < 0) { if (prevfd >= 0) close(prevfd); error("Pipe call failed: %s", strerror(errno)); } } if (forkshell(jp, lp->n, n->npipe.backgnd) == 0) { INTON; if (prevfd > 0) { dup2(prevfd, 0); close(prevfd); } if (pip[1] >= 0) { if (!(prevfd >= 0 && pip[0] == 0)) close(pip[0]); if (pip[1] != 1) { dup2(pip[1], 1); close(pip[1]); } } evaltree(lp->n, EV_EXIT); } if (prevfd >= 0) close(prevfd); prevfd = pip[0]; if (pip[1] != -1) close(pip[1]); } INTON; if (n->npipe.backgnd == 0) { INTOFF; exitstatus = waitforjob(jp, (int *)NULL); TRACE(("evalpipe: job done exit status %d\n", exitstatus)); INTON; } else exitstatus = 0; } static int is_valid_fast_cmdsubst(union node *n) { return (n->type == NCMD); } /* * Execute a command inside back quotes. If it's a builtin command, we * want to save its output in a block obtained from malloc. Otherwise * we fork off a subprocess and get the output of the command via a pipe. * Should be called with interrupts off. */ void evalbackcmd(union node *n, struct backcmd *result) { int pip[2]; struct job *jp; struct stackmark smark; struct jmploc jmploc; struct jmploc *savehandler; struct localvar *savelocalvars; unsigned char saveoptreset; result->fd = -1; result->buf = NULL; result->nleft = 0; result->jp = NULL; if (n == NULL) { exitstatus = 0; return; } setstackmark(&smark); exitstatus = oexitstatus; if (is_valid_fast_cmdsubst(n)) { savelocalvars = localvars; localvars = NULL; saveoptreset = shellparam.reset; forcelocal++; savehandler = handler; if (setjmp(jmploc.loc)) { if (exception == EXERROR || exception == EXEXEC) exitstatus = 2; else if (exception != 0) { handler = savehandler; forcelocal--; poplocalvars(); localvars = savelocalvars; shellparam.reset = saveoptreset; longjmp(handler->loc, 1); } } else { handler = &jmploc; evalcommand(n, EV_BACKCMD, result); } handler = savehandler; forcelocal--; poplocalvars(); localvars = savelocalvars; shellparam.reset = saveoptreset; } else { if (pipe(pip) < 0) error("Pipe call failed: %s", strerror(errno)); jp = makejob(n, 1); if (forkshell(jp, n, FORK_NOJOB) == 0) { FORCEINTON; close(pip[0]); if (pip[1] != 1) { dup2(pip[1], 1); close(pip[1]); } evaltree(n, EV_EXIT); } close(pip[1]); result->fd = pip[0]; result->jp = jp; } popstackmark(&smark); TRACE(("evalbackcmd done: fd=%d buf=%p nleft=%d jp=%p\n", result->fd, result->buf, result->nleft, result->jp)); } static int mustexpandto(const char *argtext, const char *mask) { for (;;) { if (*argtext == CTLQUOTEMARK || *argtext == CTLQUOTEEND) { argtext++; continue; } if (*argtext == CTLESC) argtext++; else if (BASESYNTAX[(int)*argtext] == CCTL) return (0); if (*argtext != *mask) return (0); if (*argtext == '\0') return (1); argtext++; mask++; } } static int isdeclarationcmd(struct narg *arg) { int have_command = 0; if (arg == NULL) return (0); while (mustexpandto(arg->text, "command")) { have_command = 1; arg = &arg->next->narg; if (arg == NULL) return (0); /* * To also allow "command -p" and "command --" as part of * a declaration command, add code here. * We do not do this, as ksh does not do it either and it * is not required by POSIX. */ } return (mustexpandto(arg->text, "export") || mustexpandto(arg->text, "readonly") || (mustexpandto(arg->text, "local") && (have_command || !isfunc("local")))); } static void xtracecommand(struct arglist *varlist, int argc, char **argv) { char sep = 0; const char *text, *p, *ps4; int i; ps4 = expandstr(ps4val()); out2str(ps4 != NULL ? ps4 : ps4val()); for (i = 0; i < varlist->count; i++) { text = varlist->args[i]; if (sep != 0) out2c(' '); p = strchr(text, '='); if (p != NULL) { p++; outbin(text, p - text, out2); out2qstr(p); } else out2qstr(text); sep = ' '; } for (i = 0; i < argc; i++) { text = argv[i]; if (sep != 0) out2c(' '); out2qstr(text); sep = ' '; } out2c('\n'); flushout(&errout); } /* * Check if a builtin can safely be executed in the same process, * even though it should be in a subshell (command substitution). * Note that jobid, jobs, times and trap can show information not * available in a child process; this is deliberate. * The arguments should already have been expanded. */ static int safe_builtin(int idx, int argc, char **argv) { if (idx == BLTINCMD || idx == COMMANDCMD || idx == ECHOCMD || idx == FALSECMD || idx == JOBIDCMD || idx == JOBSCMD || idx == KILLCMD || idx == PRINTFCMD || idx == PWDCMD || idx == TESTCMD || idx == TIMESCMD || idx == TRUECMD || idx == TYPECMD) return (1); if (idx == EXPORTCMD || idx == TRAPCMD || idx == ULIMITCMD || idx == UMASKCMD) return (argc <= 1 || (argc == 2 && argv[1][0] == '-')); if (idx == SETCMD) return (argc <= 1 || (argc == 2 && (argv[1][0] == '-' || argv[1][0] == '+') && argv[1][1] == 'o' && argv[1][2] == '\0')); return (0); } /* * Execute a simple command. * Note: This may or may not return if (flags & EV_EXIT). */ static void evalcommand(union node *cmd, int flags, struct backcmd *backcmd) { union node *argp; struct arglist arglist; struct arglist varlist; char **argv; int argc; char **envp; int varflag; int mode; int pip[2]; struct cmdentry cmdentry; struct job *jp; struct jmploc jmploc; struct jmploc *savehandler; char *savecmdname; struct shparam saveparam; struct localvar *savelocalvars; struct parsefile *savetopfile; volatile int e; char *lastarg; int realstatus; int do_clearcmdentry; const char *path = pathval(); int i; /* First expand the arguments. */ TRACE(("evalcommand(%p, %d) called\n", (void *)cmd, flags)); emptyarglist(&arglist); emptyarglist(&varlist); varflag = 1; jp = NULL; do_clearcmdentry = 0; oexitstatus = exitstatus; exitstatus = 0; /* Add one slot at the beginning for tryexec(). */ appendarglist(&arglist, nullstr); for (argp = cmd->ncmd.args ; argp ; argp = argp->narg.next) { if (varflag && isassignment(argp->narg.text)) { expandarg(argp, varflag == 1 ? &varlist : &arglist, EXP_VARTILDE); continue; } else if (varflag == 1) varflag = isdeclarationcmd(&argp->narg) ? 2 : 0; expandarg(argp, &arglist, EXP_FULL | EXP_TILDE); } appendarglist(&arglist, nullstr); expredir(cmd->ncmd.redirect); argc = arglist.count - 2; argv = &arglist.args[1]; argv[argc] = NULL; lastarg = NULL; if (iflag && funcnest == 0 && argc > 0) lastarg = argv[argc - 1]; /* Print the command if xflag is set. */ if (xflag) xtracecommand(&varlist, argc, argv); /* Now locate the command. */ if (argc == 0) { /* Variable assignment(s) without command */ cmdentry.cmdtype = CMDBUILTIN; cmdentry.u.index = BLTINCMD; cmdentry.special = 0; } else { static const char PATH[] = "PATH="; int cmd_flags = 0, bltinonly = 0; /* * Modify the command lookup path, if a PATH= assignment * is present */ for (i = 0; i < varlist.count; i++) if (strncmp(varlist.args[i], PATH, sizeof(PATH) - 1) == 0) { path = varlist.args[i] + sizeof(PATH) - 1; /* * On `PATH=... command`, we need to make * sure that the command isn't using the * non-updated hash table of the outer PATH * setting and we need to make sure that * the hash table isn't filled with items * from the temporary setting. * * It would be better to forbit using and * updating the table while this command * runs, by the command finding mechanism * is heavily integrated with hash handling, * so we just delete the hash before and after * the command runs. Partly deleting like * changepatch() does doesn't seem worth the * bookinging effort, since most such runs add * directories in front of the new PATH. */ clearcmdentry(); do_clearcmdentry = 1; } for (;;) { if (bltinonly) { cmdentry.u.index = find_builtin(*argv, &cmdentry.special); if (cmdentry.u.index < 0) { cmdentry.u.index = BLTINCMD; argv--; argc++; break; } } else find_command(argv[0], &cmdentry, cmd_flags, path); /* implement the bltin and command builtins here */ if (cmdentry.cmdtype != CMDBUILTIN) break; if (cmdentry.u.index == BLTINCMD) { if (argc == 1) break; argv++; argc--; bltinonly = 1; } else if (cmdentry.u.index == COMMANDCMD) { if (argc == 1) break; if (!strcmp(argv[1], "-p")) { if (argc == 2) break; if (argv[2][0] == '-') { if (strcmp(argv[2], "--")) break; if (argc == 3) break; argv += 3; argc -= 3; } else { argv += 2; argc -= 2; } path = _PATH_STDPATH; clearcmdentry(); do_clearcmdentry = 1; } else if (!strcmp(argv[1], "--")) { if (argc == 2) break; argv += 2; argc -= 2; } else if (argv[1][0] == '-') break; else { argv++; argc--; } cmd_flags |= DO_NOFUNC; bltinonly = 0; } else break; } /* * Special builtins lose their special properties when * called via 'command'. */ if (cmd_flags & DO_NOFUNC) cmdentry.special = 0; } /* Fork off a child process if necessary. */ if (((cmdentry.cmdtype == CMDNORMAL || cmdentry.cmdtype == CMDUNKNOWN) && ((flags & EV_EXIT) == 0 || have_traps())) || ((flags & EV_BACKCMD) != 0 && (cmdentry.cmdtype != CMDBUILTIN || !safe_builtin(cmdentry.u.index, argc, argv)))) { jp = makejob(cmd, 1); mode = FORK_FG; if (flags & EV_BACKCMD) { mode = FORK_NOJOB; if (pipe(pip) < 0) error("Pipe call failed: %s", strerror(errno)); } if (cmdentry.cmdtype == CMDNORMAL && cmd->ncmd.redirect == NULL && varlist.count == 0 && (mode == FORK_FG || mode == FORK_NOJOB) && !disvforkset() && !iflag && !mflag) { vforkexecshell(jp, argv, environment(), path, cmdentry.u.index, flags & EV_BACKCMD ? pip : NULL); goto parent; } if (forkshell(jp, cmd, mode) != 0) goto parent; /* at end of routine */ if (flags & EV_BACKCMD) { FORCEINTON; close(pip[0]); if (pip[1] != 1) { dup2(pip[1], 1); close(pip[1]); } flags &= ~EV_BACKCMD; } flags |= EV_EXIT; } /* This is the child process if a fork occurred. */ /* Execute the command. */ if (cmdentry.cmdtype == CMDFUNCTION) { #ifdef DEBUG trputs("Shell function: "); trargs(argv); #endif saveparam = shellparam; shellparam.malloc = 0; shellparam.reset = 1; shellparam.nparam = argc - 1; shellparam.p = argv + 1; shellparam.optp = NULL; shellparam.optnext = NULL; INTOFF; savelocalvars = localvars; localvars = NULL; reffunc(cmdentry.u.func); savehandler = handler; if (setjmp(jmploc.loc)) { - freeparam(&shellparam); - shellparam = saveparam; popredir(); unreffunc(cmdentry.u.func); poplocalvars(); localvars = savelocalvars; + freeparam(&shellparam); + shellparam = saveparam; funcnest--; handler = savehandler; longjmp(handler->loc, 1); } handler = &jmploc; funcnest++; redirect(cmd->ncmd.redirect, REDIR_PUSH); INTON; for (i = 0; i < varlist.count; i++) mklocal(varlist.args[i]); exitstatus = oexitstatus; evaltree(getfuncnode(cmdentry.u.func), flags & (EV_TESTED | EV_EXIT)); INTOFF; unreffunc(cmdentry.u.func); poplocalvars(); localvars = savelocalvars; freeparam(&shellparam); shellparam = saveparam; handler = savehandler; funcnest--; popredir(); INTON; if (evalskip == SKIPRETURN) { evalskip = 0; skipcount = 0; } if (jp) exitshell(exitstatus); } else if (cmdentry.cmdtype == CMDBUILTIN) { #ifdef DEBUG trputs("builtin command: "); trargs(argv); #endif mode = (cmdentry.u.index == EXECCMD)? 0 : REDIR_PUSH; if (flags == EV_BACKCMD) { memout.nleft = 0; memout.nextc = memout.buf; memout.bufsize = 64; mode |= REDIR_BACKQ; } savecmdname = commandname; savetopfile = getcurrentfile(); cmdenviron = &varlist; e = -1; savehandler = handler; if (setjmp(jmploc.loc)) { e = exception; if (e == EXINT) exitstatus = SIGINT+128; else if (e != EXEXIT) exitstatus = 2; goto cmddone; } handler = &jmploc; redirect(cmd->ncmd.redirect, mode); outclearerror(out1); /* * If there is no command word, redirection errors should * not be fatal but assignment errors should. */ if (argc == 0) cmdentry.special = 1; listsetvar(cmdenviron, cmdentry.special ? 0 : VNOSET); if (argc > 0) bltinsetlocale(); commandname = argv[0]; argptr = argv + 1; nextopt_optptr = NULL; /* initialize nextopt */ builtin_flags = flags; exitstatus = (*builtinfunc[cmdentry.u.index])(argc, argv); flushall(); if (outiserror(out1)) { warning("write error on stdout"); if (exitstatus == 0 || exitstatus == 1) exitstatus = 2; } cmddone: if (argc > 0) bltinunsetlocale(); cmdenviron = NULL; out1 = &output; out2 = &errout; freestdout(); handler = savehandler; commandname = savecmdname; if (jp) exitshell(exitstatus); if (flags == EV_BACKCMD) { backcmd->buf = memout.buf; backcmd->nleft = memout.nextc - memout.buf; memout.buf = NULL; } if (cmdentry.u.index != EXECCMD) popredir(); if (e != -1) { if ((e != EXERROR && e != EXEXEC) || cmdentry.special) exraise(e); popfilesupto(savetopfile); if (flags != EV_BACKCMD) FORCEINTON; } } else { #ifdef DEBUG trputs("normal command: "); trargs(argv); #endif redirect(cmd->ncmd.redirect, 0); for (i = 0; i < varlist.count; i++) setvareq(varlist.args[i], VEXPORT|VSTACK); envp = environment(); shellexec(argv, envp, path, cmdentry.u.index); /*NOTREACHED*/ } goto out; parent: /* parent process gets here (if we forked) */ if (mode == FORK_FG) { /* argument to fork */ INTOFF; exitstatus = waitforjob(jp, &realstatus); INTON; if (iflag && loopnest > 0 && WIFSIGNALED(realstatus)) { evalskip = SKIPBREAK; skipcount = loopnest; } } else if (mode == FORK_NOJOB) { backcmd->fd = pip[0]; close(pip[1]); backcmd->jp = jp; } out: if (lastarg) setvar("_", lastarg, 0); if (do_clearcmdentry) clearcmdentry(); } /* * Search for a command. This is called before we fork so that the * location of the command will be available in the parent as well as * the child. The check for "goodname" is an overly conservative * check that the name will not be subject to expansion. */ static void prehash(union node *n) { struct cmdentry entry; if (n && n->type == NCMD && n->ncmd.args) if (goodname(n->ncmd.args->narg.text)) find_command(n->ncmd.args->narg.text, &entry, 0, pathval()); } /* * Builtin commands. Builtin commands whose functions are closely * tied to evaluation are implemented here. */ /* * No command given, a bltin command with no arguments, or a bltin command * with an invalid name. */ int bltincmd(int argc, char **argv) { if (argc > 1) { out2fmt_flush("%s: not found\n", argv[1]); return 127; } /* * Preserve exitstatus of a previous possible redirection * as POSIX mandates */ return exitstatus; } /* * Handle break and continue commands. Break, continue, and return are * all handled by setting the evalskip flag. The evaluation routines * above all check this flag, and if it is set they start skipping * commands rather than executing them. The variable skipcount is * the number of loops to break/continue, or the number of function * levels to return. (The latter is always 1.) It should probably * be an error to break out of more loops than exist, but it isn't * in the standard shell so we don't make it one here. */ int breakcmd(int argc, char **argv) { long n; char *end; if (argc > 1) { /* Allow arbitrarily large numbers. */ n = strtol(argv[1], &end, 10); if (!is_digit(argv[1][0]) || *end != '\0') error("Illegal number: %s", argv[1]); } else n = 1; if (n > loopnest) n = loopnest; if (n > 0) { evalskip = (**argv == 'c')? SKIPCONT : SKIPBREAK; skipcount = n; } return 0; } /* * The `command' command. */ int commandcmd(int argc __unused, char **argv __unused) { const char *path; int ch; int cmd = -1; path = bltinlookup("PATH", 1); while ((ch = nextopt("pvV")) != '\0') { switch (ch) { case 'p': path = _PATH_STDPATH; break; case 'v': cmd = TYPECMD_SMALLV; break; case 'V': cmd = TYPECMD_BIGV; break; } } if (cmd != -1) { if (*argptr == NULL || argptr[1] != NULL) error("wrong number of arguments"); return typecmd_impl(2, argptr - 1, cmd, path); } if (*argptr != NULL) error("commandcmd bad call"); /* * Do nothing successfully if no command was specified; * ksh also does this. */ return 0; } /* * The return command. */ int returncmd(int argc, char **argv) { int ret = argc > 1 ? number(argv[1]) : oexitstatus; evalskip = SKIPRETURN; skipcount = 1; return ret; } int falsecmd(int argc __unused, char **argv __unused) { return 1; } int truecmd(int argc __unused, char **argv __unused) { return 0; } int execcmd(int argc, char **argv) { int i; /* * Because we have historically not supported any options, * only treat "--" specially. */ if (argc > 1 && strcmp(argv[1], "--") == 0) argc--, argv++; if (argc > 1) { iflag = 0; /* exit on error */ mflag = 0; optschanged(); for (i = 0; i < cmdenviron->count; i++) setvareq(cmdenviron->args[i], VEXPORT|VSTACK); shellexec(argv + 1, environment(), pathval(), 0); } return 0; } int timescmd(int argc __unused, char **argv __unused) { struct rusage ru; long shumins, shsmins, chumins, chsmins; double shusecs, shssecs, chusecs, chssecs; if (getrusage(RUSAGE_SELF, &ru) < 0) return 1; shumins = ru.ru_utime.tv_sec / 60; shusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.; shsmins = ru.ru_stime.tv_sec / 60; shssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.; if (getrusage(RUSAGE_CHILDREN, &ru) < 0) return 1; chumins = ru.ru_utime.tv_sec / 60; chusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.; chsmins = ru.ru_stime.tv_sec / 60; chssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.; out1fmt("%ldm%.3fs %ldm%.3fs\n%ldm%.3fs %ldm%.3fs\n", shumins, shusecs, shsmins, shssecs, chumins, chusecs, chsmins, chssecs); return 0; } Index: projects/clang380-import/bin/sh/tests/builtins/Makefile =================================================================== --- projects/clang380-import/bin/sh/tests/builtins/Makefile (revision 293686) +++ projects/clang380-import/bin/sh/tests/builtins/Makefile (revision 293687) @@ -1,173 +1,174 @@ # $FreeBSD$ .include TESTSDIR= ${TESTSBASE}/bin/sh/${.CURDIR:T} .PATH: ${.CURDIR:H} ATF_TESTS_SH= functional_test FILESDIR= ${TESTSDIR} FILES= alias.0 alias.0.stdout FILES+= alias.1 alias.1.stderr FILES+= alias3.0 alias3.0.stdout FILES+= alias4.0 FILES+= break1.0 FILES+= break2.0 break2.0.stdout FILES+= break3.0 FILES+= break4.4 FILES+= break5.4 FILES+= break6.0 FILES+= builtin1.0 FILES+= case1.0 FILES+= case2.0 FILES+= case3.0 FILES+= case4.0 FILES+= case5.0 FILES+= case6.0 FILES+= case7.0 FILES+= case8.0 FILES+= case9.0 FILES+= case10.0 FILES+= case11.0 FILES+= case12.0 FILES+= case13.0 FILES+= case14.0 FILES+= case15.0 FILES+= case16.0 FILES+= case17.0 FILES+= case18.0 FILES+= case19.0 FILES+= case20.0 FILES+= cd1.0 FILES+= cd2.0 FILES+= cd3.0 FILES+= cd4.0 FILES+= cd5.0 FILES+= cd6.0 FILES+= cd7.0 FILES+= cd8.0 FILES+= cd9.0 cd9.0.stdout FILES+= command1.0 FILES+= command2.0 FILES+= command3.0 FILES+= command3.0.stdout FILES+= command4.0 FILES+= command5.0 FILES+= command5.0.stdout FILES+= command6.0 FILES+= command6.0.stdout FILES+= command7.0 FILES+= command8.0 FILES+= command9.0 FILES+= command10.0 FILES+= command11.0 FILES+= command12.0 FILES+= dot1.0 FILES+= dot2.0 FILES+= dot3.0 FILES+= dot4.0 FILES+= eval1.0 FILES+= eval2.0 FILES+= eval3.0 FILES+= eval4.0 FILES+= eval5.0 FILES+= eval6.0 FILES+= eval7.0 FILES+= eval8.7 FILES+= exec1.0 FILES+= exec2.0 FILES+= exit1.0 FILES+= exit2.8 FILES+= exit3.0 FILES+= export1.0 FILES+= fc1.0 FILES+= fc2.0 FILES+= for1.0 FILES+= for2.0 FILES+= for3.0 FILES+= getopts1.0 getopts1.0.stdout FILES+= getopts2.0 getopts2.0.stdout FILES+= getopts3.0 FILES+= getopts4.0 FILES+= getopts5.0 FILES+= getopts6.0 FILES+= getopts7.0 FILES+= getopts8.0 getopts8.0.stdout FILES+= getopts9.0 getopts9.0.stdout FILES+= getopts10.0 FILES+= hash1.0 hash1.0.stdout FILES+= hash2.0 hash2.0.stdout FILES+= hash3.0 hash3.0.stdout FILES+= hash4.0 FILES+= jobid1.0 FILES+= jobid2.0 FILES+= kill1.0 kill2.0 FILES+= lineno.0 lineno.0.stdout FILES+= lineno2.0 FILES+= lineno3.0 lineno3.0.stdout FILES+= local1.0 FILES+= local2.0 FILES+= local3.0 FILES+= local4.0 +FILES+= local5.0 .if ${MK_NLS} != "no" FILES+= locale1.0 .endif FILES+= printf1.0 FILES+= printf2.0 FILES+= printf3.0 FILES+= printf4.0 FILES+= read1.0 read1.0.stdout FILES+= read2.0 FILES+= read3.0 read3.0.stdout FILES+= read4.0 read4.0.stdout FILES+= read5.0 FILES+= read6.0 FILES+= read7.0 FILES+= read8.0 FILES+= read9.0 FILES+= return1.0 FILES+= return2.1 FILES+= return3.1 FILES+= return4.0 FILES+= return5.0 FILES+= return6.4 FILES+= return7.4 FILES+= return8.0 FILES+= set1.0 FILES+= set2.0 FILES+= trap1.0 FILES+= trap10.0 FILES+= trap11.0 FILES+= trap12.0 FILES+= trap13.0 FILES+= trap14.0 FILES+= trap15.0 FILES+= trap16.0 FILES+= trap2.0 FILES+= trap3.0 FILES+= trap4.0 FILES+= trap5.0 FILES+= trap6.0 FILES+= trap7.0 FILES+= trap8.0 FILES+= trap9.0 FILES+= type1.0 type1.0.stderr FILES+= type2.0 FILES+= type3.0 FILES+= unalias.0 FILES+= var-assign.0 FILES+= var-assign2.0 FILES+= wait1.0 FILES+= wait2.0 FILES+= wait3.0 FILES+= wait4.0 FILES+= wait5.0 FILES+= wait6.0 FILES+= wait7.0 FILES+= wait8.0 FILES+= wait9.127 FILES+= wait10.0 .include Index: projects/clang380-import/bin/sh/tests/builtins/local5.0 =================================================================== --- projects/clang380-import/bin/sh/tests/builtins/local5.0 (nonexistent) +++ projects/clang380-import/bin/sh/tests/builtins/local5.0 (revision 293687) @@ -0,0 +1,15 @@ +# $FreeBSD$ + +f() { + local PATH IFS elem + IFS=: + for elem in ''$PATH''; do + PATH=/var/empty/$elem:$PATH + done + ls -d / >/dev/null +} + +p1=$(command -v ls) +f +p2=$(command -v ls) +[ "$p1" = "$p2" ] Property changes on: projects/clang380-import/bin/sh/tests/builtins/local5.0 ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/bin/sh/var.c =================================================================== --- projects/clang380-import/bin/sh/var.c (revision 293686) +++ projects/clang380-import/bin/sh/var.c (revision 293687) @@ -1,954 +1,965 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)var.c 8.3 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include /* * Shell variables. */ #include #include #include "shell.h" #include "output.h" #include "expand.h" #include "nodes.h" /* for other headers */ #include "eval.h" /* defines cmdenviron */ #include "exec.h" #include "syntax.h" #include "options.h" #include "mail.h" #include "var.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "parser.h" #include "builtins.h" #ifndef NO_HISTORY #include "myhistedit.h" #endif #define VTABSIZE 39 struct varinit { struct var *var; int flags; const char *text; void (*func)(const char *); }; #ifndef NO_HISTORY struct var vhistsize; struct var vterm; #endif struct var vifs; struct var vmail; struct var vmpath; struct var vpath; struct var vps1; struct var vps2; struct var vps4; static struct var voptind; struct var vdisvfork; struct localvar *localvars; int forcelocal; static const struct varinit varinit[] = { #ifndef NO_HISTORY { &vhistsize, VUNSET, "HISTSIZE=", sethistsize }, #endif { &vifs, 0, "IFS= \t\n", NULL }, { &vmail, VUNSET, "MAIL=", NULL }, { &vmpath, VUNSET, "MAILPATH=", NULL }, { &vpath, 0, "PATH=" _PATH_DEFPATH, changepath }, /* * vps1 depends on uid */ { &vps2, 0, "PS2=> ", NULL }, { &vps4, 0, "PS4=+ ", NULL }, #ifndef NO_HISTORY { &vterm, VUNSET, "TERM=", setterm }, #endif { &voptind, 0, "OPTIND=1", getoptsreset }, { &vdisvfork, VUNSET, "SH_DISABLE_VFORK=", NULL }, { NULL, 0, NULL, NULL } }; static struct var *vartab[VTABSIZE]; static const char *const locale_names[7] = { "LC_COLLATE", "LC_CTYPE", "LC_MONETARY", "LC_NUMERIC", "LC_TIME", "LC_MESSAGES", NULL }; static const int locale_categories[7] = { LC_COLLATE, LC_CTYPE, LC_MONETARY, LC_NUMERIC, LC_TIME, LC_MESSAGES, 0 }; static int varequal(const char *, const char *); static struct var *find_var(const char *, struct var ***, int *); static int localevar(const char *); static void setvareq_const(const char *s, int flags); extern char **environ; /* * This routine initializes the builtin variables and imports the environment. * It is called when the shell is initialized. */ void initvar(void) { char ppid[20]; const struct varinit *ip; struct var *vp; struct var **vpp; char **envp; for (ip = varinit ; (vp = ip->var) != NULL ; ip++) { if (find_var(ip->text, &vpp, &vp->name_len) != NULL) continue; vp->next = *vpp; *vpp = vp; vp->text = __DECONST(char *, ip->text); vp->flags = ip->flags | VSTRFIXED | VTEXTFIXED; vp->func = ip->func; } /* * PS1 depends on uid */ if (find_var("PS1", &vpp, &vps1.name_len) == NULL) { vps1.next = *vpp; *vpp = &vps1; vps1.text = __DECONST(char *, geteuid() ? "PS1=$ " : "PS1=# "); vps1.flags = VSTRFIXED|VTEXTFIXED; } fmtstr(ppid, sizeof(ppid), "%d", (int)getppid()); setvarsafe("PPID", ppid, 0); for (envp = environ ; *envp ; envp++) { if (strchr(*envp, '=')) { setvareq(*envp, VEXPORT|VTEXTFIXED); } } setvareq_const("OPTIND=1", 0); } /* * Safe version of setvar, returns 1 on success 0 on failure. */ int setvarsafe(const char *name, const char *val, int flags) { struct jmploc jmploc; struct jmploc *const savehandler = handler; int err = 0; int inton; inton = is_int_on(); if (setjmp(jmploc.loc)) err = 1; else { handler = &jmploc; setvar(name, val, flags); } handler = savehandler; SETINTON(inton); return err; } /* * Set the value of a variable. The flags argument is stored with the * flags of the variable. If val is NULL, the variable is unset. */ void setvar(const char *name, const char *val, int flags) { const char *p; size_t len; size_t namelen; size_t vallen; char *nameeq; int isbad; isbad = 0; p = name; if (! is_name(*p)) isbad = 1; p++; for (;;) { if (! is_in_name(*p)) { if (*p == '\0' || *p == '=') break; isbad = 1; } p++; } namelen = p - name; if (isbad) error("%.*s: bad variable name", (int)namelen, name); len = namelen + 2; /* 2 is space for '=' and '\0' */ if (val == NULL) { flags |= VUNSET; vallen = 0; } else { vallen = strlen(val); len += vallen; } INTOFF; nameeq = ckmalloc(len); memcpy(nameeq, name, namelen); nameeq[namelen] = '='; if (val) memcpy(nameeq + namelen + 1, val, vallen + 1); else nameeq[namelen + 1] = '\0'; setvareq(nameeq, flags); INTON; } static int localevar(const char *s) { const char *const *ss; if (*s != 'L') return 0; if (varequal(s + 1, "ANG")) return 1; if (strncmp(s + 1, "C_", 2) != 0) return 0; if (varequal(s + 3, "ALL")) return 1; for (ss = locale_names; *ss ; ss++) if (varequal(s + 3, *ss + 3)) return 1; return 0; } /* * Sets/unsets an environment variable from a pointer that may actually be a * pointer into environ where the string should not be manipulated. */ static void change_env(const char *s, int set) { char *eqp; char *ss; INTOFF; ss = savestr(s); if ((eqp = strchr(ss, '=')) != NULL) *eqp = '\0'; if (set && eqp != NULL) (void) setenv(ss, eqp + 1, 1); else (void) unsetenv(ss); ckfree(ss); INTON; return; } /* * Same as setvar except that the variable and value are passed in * the first argument as name=value. Since the first argument will * be actually stored in the table, it should not be a string that * will go away. */ void setvareq(char *s, int flags) { struct var *vp, **vpp; int nlen; if (aflag) flags |= VEXPORT; if (forcelocal && !(flags & (VNOSET | VNOLOCAL))) mklocal(s); vp = find_var(s, &vpp, &nlen); if (vp != NULL) { if (vp->flags & VREADONLY) { if ((flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(s); error("%.*s: is read only", vp->name_len, vp->text); } if (flags & VNOSET) { if ((flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(s); return; } INTOFF; if (vp->func && (flags & VNOFUNC) == 0) (*vp->func)(s + vp->name_len + 1); if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); vp->flags &= ~(VTEXTFIXED|VSTACK|VUNSET); vp->flags |= flags; vp->text = s; /* * We could roll this to a function, to handle it as * a regular variable function callback, but why bother? * * Note: this assumes iflag is not set to 1 initially. * As part of initvar(), this is called before arguments * are looked at. */ if ((vp == &vmpath || (vp == &vmail && ! mpathset())) && iflag == 1) chkmail(1); if ((vp->flags & VEXPORT) && localevar(s)) { change_env(s, 1); (void) setlocale(LC_ALL, ""); updatecharset(); } INTON; return; } /* not found */ if (flags & VNOSET) { if ((flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(s); return; } INTOFF; vp = ckmalloc(sizeof (*vp)); vp->flags = flags; vp->text = s; vp->name_len = nlen; vp->next = *vpp; vp->func = NULL; *vpp = vp; if ((vp->flags & VEXPORT) && localevar(s)) { change_env(s, 1); (void) setlocale(LC_ALL, ""); updatecharset(); } INTON; } static void setvareq_const(const char *s, int flags) { setvareq(__DECONST(char *, s), flags | VTEXTFIXED); } /* * Process a linked list of variable assignments. */ void listsetvar(struct arglist *list, int flags) { int i; INTOFF; for (i = 0; i < list->count; i++) setvareq(savestr(list->args[i]), flags); INTON; } /* * Find the value of a variable. Returns NULL if not set. */ char * lookupvar(const char *name) { struct var *v; v = find_var(name, NULL, NULL); if (v == NULL || v->flags & VUNSET) return NULL; return v->text + v->name_len + 1; } /* * Search the environment of a builtin command. If the second argument * is nonzero, return the value of a variable even if it hasn't been * exported. */ char * bltinlookup(const char *name, int doall) { struct var *v; char *result; int i; result = NULL; if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) { if (varequal(cmdenviron->args[i], name)) result = strchr(cmdenviron->args[i], '=') + 1; } if (result != NULL) return result; v = find_var(name, NULL, NULL); if (v == NULL || v->flags & VUNSET || (!doall && (v->flags & VEXPORT) == 0)) return NULL; return v->text + v->name_len + 1; } /* * Set up locale for a builtin (LANG/LC_* assignments). */ void bltinsetlocale(void) { int act = 0; char *loc, *locdef; int i; if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) { if (localevar(cmdenviron->args[i])) { act = 1; break; } } if (!act) return; loc = bltinlookup("LC_ALL", 0); INTOFF; if (loc != NULL) { setlocale(LC_ALL, loc); INTON; updatecharset(); return; } locdef = bltinlookup("LANG", 0); for (i = 0; locale_names[i] != NULL; i++) { loc = bltinlookup(locale_names[i], 0); if (loc == NULL) loc = locdef; if (loc != NULL) setlocale(locale_categories[i], loc); } INTON; updatecharset(); } /* * Undo the effect of bltinlocaleset(). */ void bltinunsetlocale(void) { int i; INTOFF; if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) { if (localevar(cmdenviron->args[i])) { setlocale(LC_ALL, ""); updatecharset(); return; } } INTON; } /* * Update the localeisutf8 flag. */ void updatecharset(void) { char *charset; charset = nl_langinfo(CODESET); localeisutf8 = !strcmp(charset, "UTF-8"); } void initcharset(void) { updatecharset(); initial_localeisutf8 = localeisutf8; } /* * Generate a list of exported variables. This routine is used to construct * the third argument to execve when executing a program. */ char ** environment(void) { int nenv; struct var **vpp; struct var *vp; char **env, **ep; nenv = 0; for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) { for (vp = *vpp ; vp ; vp = vp->next) if (vp->flags & VEXPORT) nenv++; } ep = env = stalloc((nenv + 1) * sizeof *env); for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) { for (vp = *vpp ; vp ; vp = vp->next) if (vp->flags & VEXPORT) *ep++ = vp->text; } *ep = NULL; return env; } static int var_compare(const void *a, const void *b) { const char *const *sa, *const *sb; sa = a; sb = b; /* * This compares two var=value strings which creates a different * order from what you would probably expect. POSIX is somewhat * ambiguous on what should be sorted exactly. */ return strcoll(*sa, *sb); } /* * Command to list all variables which are set. This is invoked from the * set command when it is called without any options or operands. */ int showvarscmd(int argc __unused, char **argv __unused) { struct var **vpp; struct var *vp; const char *s; const char **vars; int i, n; /* * POSIX requires us to sort the variables. */ n = 0; for (vpp = vartab; vpp < vartab + VTABSIZE; vpp++) { for (vp = *vpp; vp; vp = vp->next) { if (!(vp->flags & VUNSET)) n++; } } INTOFF; vars = ckmalloc(n * sizeof(*vars)); i = 0; for (vpp = vartab; vpp < vartab + VTABSIZE; vpp++) { for (vp = *vpp; vp; vp = vp->next) { if (!(vp->flags & VUNSET)) vars[i++] = vp->text; } } qsort(vars, n, sizeof(*vars), var_compare); for (i = 0; i < n; i++) { /* * Skip improper variable names so the output remains usable as * shell input. */ if (!isassignment(vars[i])) continue; s = strchr(vars[i], '='); s++; outbin(vars[i], s - vars[i], out1); out1qstr(s); out1c('\n'); } ckfree(vars); INTON; return 0; } /* * The export and readonly commands. */ int exportcmd(int argc __unused, char **argv) { struct var **vpp; struct var *vp; char **ap; char *name; char *p; char *cmdname; int ch, values; int flag = argv[0][0] == 'r'? VREADONLY : VEXPORT; cmdname = argv[0]; values = 0; while ((ch = nextopt("p")) != '\0') { switch (ch) { case 'p': values = 1; break; } } if (values && *argptr != NULL) error("-p requires no arguments"); if (*argptr != NULL) { for (ap = argptr; (name = *ap) != NULL; ap++) { if ((p = strchr(name, '=')) != NULL) { p++; } else { vp = find_var(name, NULL, NULL); if (vp != NULL) { vp->flags |= flag; if ((vp->flags & VEXPORT) && localevar(vp->text)) { change_env(vp->text, 1); (void) setlocale(LC_ALL, ""); updatecharset(); } continue; } } setvar(name, p, flag); } } else { for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) { for (vp = *vpp ; vp ; vp = vp->next) { if (vp->flags & flag) { if (values) { /* * Skip improper variable names * so the output remains usable * as shell input. */ if (!isassignment(vp->text)) continue; out1str(cmdname); out1c(' '); } if (values && !(vp->flags & VUNSET)) { outbin(vp->text, vp->name_len + 1, out1); out1qstr(vp->text + vp->name_len + 1); } else outbin(vp->text, vp->name_len, out1); out1c('\n'); } } } } return 0; } /* * The "local" command. */ int localcmd(int argc __unused, char **argv __unused) { char *name; nextopt(""); if (! in_function()) error("Not in a function"); while ((name = *argptr++) != NULL) { mklocal(name); } return 0; } /* * Make a variable a local variable. When a variable is made local, it's * value and flags are saved in a localvar structure. The saved values * will be restored when the shell function returns. We handle the name * "-" as a special case. */ void mklocal(char *name) { struct localvar *lvp; struct var **vpp; struct var *vp; INTOFF; lvp = ckmalloc(sizeof (struct localvar)); if (name[0] == '-' && name[1] == '\0') { lvp->text = ckmalloc(sizeof optval); memcpy(lvp->text, optval, sizeof optval); vp = NULL; } else { vp = find_var(name, &vpp, NULL); if (vp == NULL) { if (strchr(name, '=')) setvareq(savestr(name), VSTRFIXED | VNOLOCAL); else setvar(name, NULL, VSTRFIXED | VNOLOCAL); vp = *vpp; /* the new variable */ lvp->text = NULL; lvp->flags = VUNSET; } else { lvp->text = vp->text; lvp->flags = vp->flags; vp->flags |= VSTRFIXED|VTEXTFIXED; if (name[vp->name_len] == '=') setvareq(savestr(name), VNOLOCAL); } } lvp->vp = vp; lvp->next = localvars; localvars = lvp; INTON; } /* * Called after a function returns. */ void poplocalvars(void) { struct localvar *lvp; struct var *vp; + int islocalevar; INTOFF; while ((lvp = localvars) != NULL) { localvars = lvp->next; vp = lvp->vp; if (vp == NULL) { /* $- saved */ memcpy(optval, lvp->text, sizeof optval); ckfree(lvp->text); optschanged(); } else if ((lvp->flags & (VUNSET|VSTRFIXED)) == VUNSET) { (void)unsetvar(vp->text); } else { + islocalevar = (vp->flags | lvp->flags) & VEXPORT && + localevar(lvp->text); if ((vp->flags & VTEXTFIXED) == 0) ckfree(vp->text); vp->flags = lvp->flags; vp->text = lvp->text; + if (vp->func) + (*vp->func)(vp->text + vp->name_len + 1); + if (islocalevar) { + change_env(vp->text, vp->flags & VEXPORT && + (vp->flags & VUNSET) == 0); + setlocale(LC_ALL, ""); + updatecharset(); + } } ckfree(lvp); } INTON; } int setvarcmd(int argc, char **argv) { if (argc <= 2) return unsetcmd(argc, argv); else if (argc == 3) setvar(argv[1], argv[2], 0); else error("too many arguments"); return 0; } /* * The unset builtin command. */ int unsetcmd(int argc __unused, char **argv __unused) { char **ap; int i; int flg_func = 0; int flg_var = 0; int ret = 0; while ((i = nextopt("vf")) != '\0') { if (i == 'f') flg_func = 1; else flg_var = 1; } if (flg_func == 0 && flg_var == 0) flg_var = 1; INTOFF; for (ap = argptr; *ap ; ap++) { if (flg_func) ret |= unsetfunc(*ap); if (flg_var) ret |= unsetvar(*ap); } INTON; return ret; } /* * Unset the specified variable. * Called with interrupts off. */ int unsetvar(const char *s) { struct var **vpp; struct var *vp; vp = find_var(s, &vpp, NULL); if (vp == NULL) return (0); if (vp->flags & VREADONLY) return (1); if (vp->text[vp->name_len + 1] != '\0') setvar(s, "", 0); if ((vp->flags & VEXPORT) && localevar(vp->text)) { change_env(s, 0); setlocale(LC_ALL, ""); updatecharset(); } vp->flags &= ~VEXPORT; vp->flags |= VUNSET; if ((vp->flags & VSTRFIXED) == 0) { if ((vp->flags & VTEXTFIXED) == 0) ckfree(vp->text); *vpp = vp->next; ckfree(vp); } return (0); } /* * Returns true if the two strings specify the same variable. The first * variable name is terminated by '='; the second may be terminated by * either '=' or '\0'. */ static int varequal(const char *p, const char *q) { while (*p == *q++) { if (*p++ == '=') return 1; } if (*p == '=' && *(q - 1) == '\0') return 1; return 0; } /* * Search for a variable. * 'name' may be terminated by '=' or a NUL. * vppp is set to the pointer to vp, or the list head if vp isn't found * lenp is set to the number of characters in 'name' */ static struct var * find_var(const char *name, struct var ***vppp, int *lenp) { unsigned int hashval; int len; struct var *vp, **vpp; const char *p = name; hashval = 0; while (*p && *p != '=') hashval = 2 * hashval + (unsigned char)*p++; len = p - name; if (lenp) *lenp = len; vpp = &vartab[hashval % VTABSIZE]; if (vppp) *vppp = vpp; for (vp = *vpp ; vp ; vpp = &vp->next, vp = *vpp) { if (vp->name_len != len) continue; if (memcmp(vp->text, name, len) != 0) continue; if (vppp) *vppp = vpp; return vp; } return NULL; } Index: projects/clang380-import/contrib/hyperv/tools/hv_kvp_daemon.c =================================================================== --- projects/clang380-import/contrib/hyperv/tools/hv_kvp_daemon.c (revision 293686) +++ projects/clang380-import/contrib/hyperv/tools/hv_kvp_daemon.c (revision 293687) @@ -1,1517 +1,1517 @@ /*- * Copyright (c) 2014 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_kvp.h" typedef uint8_t __u8; typedef uint16_t __u16; typedef uint32_t __u32; typedef uint64_t __u64; /* * ENUM Data */ enum key_index { FullyQualifiedDomainName = 0, IntegrationServicesVersion, /*This key is serviced in the kernel*/ NetworkAddressIPv4, NetworkAddressIPv6, OSBuildNumber, OSName, OSMajorVersion, OSMinorVersion, OSVersion, ProcessorArchitecture }; enum { IPADDR = 0, NETMASK, GATEWAY, DNS }; /* Global Variables */ /* * The structure for operation handlers. */ struct kvp_op_hdlr { int kvp_op_key; void (*kvp_op_init)(void); int (*kvp_op_exec)(struct hv_kvp_msg *kvp_op_msg, void *data); }; static struct kvp_op_hdlr kvp_op_hdlrs[HV_KVP_OP_COUNT]; /* OS information */ static const char *os_name = ""; static const char *os_major = ""; static const char *os_minor = ""; static const char *processor_arch; static const char *os_build; static const char *lic_version = "BSD Pre-Release version"; static struct utsname uts_buf; /* Global flags */ static int is_daemon = 1; static int is_debugging = 0; #define KVP_LOG(priority, format, args...) do { \ if (is_debugging == 1) { \ if (is_daemon == 1) \ syslog(priority, format, ## args); \ else \ printf(format, ## args); \ } else { \ if (priority < LOG_DEBUG) { \ if (is_daemon == 1) \ syslog(priority, format, ## args); \ else \ printf(format, ## args); \ } \ } \ } while(0) /* * For KVP pool file */ #define MAX_FILE_NAME 100 #define ENTRIES_PER_BLOCK 50 struct kvp_record { char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE]; char value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; }; struct kvp_pool { int pool_fd; int num_blocks; struct kvp_record *records; int num_records; char fname[MAX_FILE_NAME]; }; static struct kvp_pool kvp_pools[HV_KVP_POOL_COUNT]; static void kvp_acquire_lock(int pool) { struct flock fl = { 0, 0, 0, F_WRLCK, SEEK_SET, 0 }; fl.l_pid = getpid(); if (fcntl(kvp_pools[pool].pool_fd, F_SETLKW, &fl) == -1) { KVP_LOG(LOG_ERR, "Failed to acquire the lock pool: %d", pool); exit(EXIT_FAILURE); } } static void kvp_release_lock(int pool) { struct flock fl = { 0, 0, 0, F_UNLCK, SEEK_SET, 0 }; fl.l_pid = getpid(); if (fcntl(kvp_pools[pool].pool_fd, F_SETLK, &fl) == -1) { perror("fcntl"); KVP_LOG(LOG_ERR, "Failed to release the lock pool: %d\n", pool); exit(EXIT_FAILURE); } } /* * Write in-memory copy of KVP to pool files */ static void kvp_update_file(int pool) { FILE *filep; size_t bytes_written; kvp_acquire_lock(pool); filep = fopen(kvp_pools[pool].fname, "w"); if (!filep) { kvp_release_lock(pool); KVP_LOG(LOG_ERR, "Failed to open file, pool: %d\n", pool); exit(EXIT_FAILURE); } bytes_written = fwrite(kvp_pools[pool].records, sizeof(struct kvp_record), kvp_pools[pool].num_records, filep); if (ferror(filep) || fclose(filep)) { kvp_release_lock(pool); KVP_LOG(LOG_ERR, "Failed to write file, pool: %d\n", pool); exit(EXIT_FAILURE); } kvp_release_lock(pool); } /* * Read KVPs from pool files and store in memory */ static void kvp_update_mem_state(int pool) { FILE *filep; size_t records_read = 0; struct kvp_record *record = kvp_pools[pool].records; struct kvp_record *readp; int num_blocks = kvp_pools[pool].num_blocks; int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK; kvp_acquire_lock(pool); filep = fopen(kvp_pools[pool].fname, "r"); if (!filep) { kvp_release_lock(pool); KVP_LOG(LOG_ERR, "Failed to open file, pool: %d\n", pool); exit(EXIT_FAILURE); } for ( ; ; ) { readp = &record[records_read]; records_read += fread(readp, sizeof(struct kvp_record), ENTRIES_PER_BLOCK * num_blocks, filep); if (ferror(filep)) { KVP_LOG(LOG_ERR, "Failed to read file, pool: %d\n", pool); exit(EXIT_FAILURE); } if (!feof(filep)) { /* * Have more data to read. Expand the memory. */ num_blocks++; record = realloc(record, alloc_unit * num_blocks); if (record == NULL) { KVP_LOG(LOG_ERR, "malloc failed\n"); exit(EXIT_FAILURE); } continue; } break; } kvp_pools[pool].num_blocks = num_blocks; kvp_pools[pool].records = record; kvp_pools[pool].num_records = records_read; fclose(filep); kvp_release_lock(pool); } static int kvp_file_init(void) { int fd; FILE *filep; size_t records_read; char *fname; struct kvp_record *record; struct kvp_record *readp; int num_blocks; int i; int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK; if (mkdir("/var/db/hyperv/pool", S_IRUSR | S_IWUSR | S_IROTH) < 0 && (errno != EEXIST && errno != EISDIR)) { KVP_LOG(LOG_ERR, " Failed to create /var/db/hyperv/pool\n"); exit(EXIT_FAILURE); } for (i = 0; i < HV_KVP_POOL_COUNT; i++) { fname = kvp_pools[i].fname; records_read = 0; num_blocks = 1; snprintf(fname, MAX_FILE_NAME, "/var/db/hyperv/pool/.kvp_pool_%d", i); fd = open(fname, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IROTH); if (fd == -1) { return (1); } filep = fopen(fname, "r"); if (!filep) { close(fd); return (1); } record = malloc(alloc_unit * num_blocks); if (record == NULL) { close(fd); fclose(filep); return (1); } for ( ; ; ) { readp = &record[records_read]; records_read += fread(readp, sizeof(struct kvp_record), ENTRIES_PER_BLOCK, filep); if (ferror(filep)) { KVP_LOG(LOG_ERR, "Failed to read file, pool: %d\n", i); exit(EXIT_FAILURE); } if (!feof(filep)) { /* * More data to read. */ num_blocks++; record = realloc(record, alloc_unit * num_blocks); if (record == NULL) { close(fd); fclose(filep); return (1); } continue; } break; } kvp_pools[i].pool_fd = fd; kvp_pools[i].num_blocks = num_blocks; kvp_pools[i].records = record; kvp_pools[i].num_records = records_read; fclose(filep); } return (0); } static int kvp_key_delete(int pool, __u8 *key, int key_size) { int i; int j, k; int num_records; struct kvp_record *record; KVP_LOG(LOG_DEBUG, "kvp_key_delete: pool = %d, " "key = %s\n", pool, key); /* Update in-memory state */ kvp_update_mem_state(pool); num_records = kvp_pools[pool].num_records; record = kvp_pools[pool].records; for (i = 0; i < num_records; i++) { if (memcmp(key, record[i].key, key_size)) { continue; } KVP_LOG(LOG_DEBUG, "Found delete key in pool %d.\n", pool); /* * We found a match at the end; Just update the number of * entries and we are done. */ if (i == num_records) { kvp_pools[pool].num_records--; kvp_update_file(pool); return (0); } /* * We found a match in the middle; Move the remaining * entries up. */ j = i; k = j + 1; for ( ; k < num_records; k++) { strcpy(record[j].key, record[k].key); strcpy(record[j].value, record[k].value); j++; } kvp_pools[pool].num_records--; kvp_update_file(pool); return (0); } KVP_LOG(LOG_DEBUG, "Not found delete key in pool %d.\n", pool); return (1); } static int kvp_key_add_or_modify(int pool, __u8 *key, __u32 key_size, __u8 *value, __u32 value_size) { int i; int num_records; struct kvp_record *record; int num_blocks; KVP_LOG(LOG_DEBUG, "kvp_key_add_or_modify: pool = %d, " "key = %s, value = %s\n,", pool, key, value); if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) { KVP_LOG(LOG_ERR, "kvp_key_add_or_modify: returning 1\n"); return (1); } /* Update the in-memory state. */ kvp_update_mem_state(pool); num_records = kvp_pools[pool].num_records; record = kvp_pools[pool].records; num_blocks = kvp_pools[pool].num_blocks; for (i = 0; i < num_records; i++) { if (memcmp(key, record[i].key, key_size)) { continue; } /* * Key exists. Just update the value and we are done. */ memcpy(record[i].value, value, value_size); kvp_update_file(pool); return (0); } /* * Key doesn't exist; Add a new KVP. */ if (num_records == (ENTRIES_PER_BLOCK * num_blocks)) { /* Increase the size of the recodrd array. */ record = realloc(record, sizeof(struct kvp_record) * ENTRIES_PER_BLOCK * (num_blocks + 1)); if (record == NULL) { return (1); } kvp_pools[pool].num_blocks++; } memcpy(record[i].value, value, value_size); memcpy(record[i].key, key, key_size); kvp_pools[pool].records = record; kvp_pools[pool].num_records++; kvp_update_file(pool); return (0); } static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, int value_size) { int i; int num_records; struct kvp_record *record; KVP_LOG(LOG_DEBUG, "kvp_get_value: pool = %d, key = %s\n,", pool, key); if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) { return (1); } /* Update the in-memory state first. */ kvp_update_mem_state(pool); num_records = kvp_pools[pool].num_records; record = kvp_pools[pool].records; for (i = 0; i < num_records; i++) { if (memcmp(key, record[i].key, key_size)) { continue; } /* Found the key */ memcpy(value, record[i].value, value_size); return (0); } return (1); } static int kvp_pool_enumerate(int pool, int idx, __u8 *key, int key_size, __u8 *value, int value_size) { struct kvp_record *record; KVP_LOG(LOG_DEBUG, "kvp_pool_enumerate: pool = %d, index = %d\n,", pool, idx); /* First update our in-memory state first. */ kvp_update_mem_state(pool); record = kvp_pools[pool].records; /* Index starts with 0 */ if (idx >= kvp_pools[pool].num_records) { return (1); } memcpy(key, record[idx].key, key_size); memcpy(value, record[idx].value, value_size); return (0); } static void kvp_get_os_info(void) { char *p; uname(&uts_buf); os_build = uts_buf.release; os_name = uts_buf.sysname; processor_arch = uts_buf.machine; /* * Win7 host expects the build string to be of the form: x.y.z * Strip additional information we may have. */ p = strchr(os_build, '-'); if (p) { *p = '\0'; } /* * We don't have any other information about the FreeBSD os. */ return; } /* * Given the interface name, return the MAC address. */ static char * kvp_if_name_to_mac(char *if_name) { char *mac_addr = NULL; struct ifaddrs *ifaddrs_ptr; struct ifaddrs *head_ifaddrs_ptr; struct sockaddr_dl *sdl; int status; status = getifaddrs(&ifaddrs_ptr); if (status >= 0) { head_ifaddrs_ptr = ifaddrs_ptr; do { sdl = (struct sockaddr_dl *)(uintptr_t)ifaddrs_ptr->ifa_addr; if ((sdl->sdl_type == IFT_ETHER) && (strcmp(ifaddrs_ptr->ifa_name, if_name) == 0)) { mac_addr = strdup(ether_ntoa((struct ether_addr *)(LLADDR(sdl)))); break; } } while ((ifaddrs_ptr = ifaddrs_ptr->ifa_next) != NULL); freeifaddrs(head_ifaddrs_ptr); } return (mac_addr); } /* * Given the MAC address, return the interface name. */ static char * kvp_mac_to_if_name(char *mac) { char *if_name = NULL; struct ifaddrs *ifaddrs_ptr; struct ifaddrs *head_ifaddrs_ptr; struct sockaddr_dl *sdl; int status; char *buf_ptr, *p; status = getifaddrs(&ifaddrs_ptr); if (status >= 0) { head_ifaddrs_ptr = ifaddrs_ptr; do { sdl = (struct sockaddr_dl *)(uintptr_t)ifaddrs_ptr->ifa_addr; if (sdl->sdl_type == IFT_ETHER) { buf_ptr = strdup(ether_ntoa((struct ether_addr *)(LLADDR(sdl)))); if (buf_ptr != NULL) { for (p = buf_ptr; *p != '\0'; p++) *p = toupper(*p); if (strncmp(buf_ptr, mac, strlen(mac)) == 0) { /* Caller will free the memory */ if_name = strdup(ifaddrs_ptr->ifa_name); free(buf_ptr); break; } else free(buf_ptr); } } } while ((ifaddrs_ptr = ifaddrs_ptr->ifa_next) != NULL); freeifaddrs(head_ifaddrs_ptr); } return (if_name); } static void kvp_process_ipconfig_file(char *cmd, char *config_buf, size_t len, size_t element_size, int offset) { char buf[256]; char *p; char *x; FILE *file; /* * First execute the command. */ file = popen(cmd, "r"); if (file == NULL) { return; } if (offset == 0) { memset(config_buf, 0, len); } while ((p = fgets(buf, sizeof(buf), file)) != NULL) { if ((len - strlen(config_buf)) < (element_size + 1)) { break; } x = strchr(p, '\n'); *x = '\0'; strlcat(config_buf, p, len); strlcat(config_buf, ";", len); } pclose(file); } static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { char cmd[512]; char dhcp_info[128]; char *p; FILE *file; /* * Retrieve the IPV4 address of default gateway. */ snprintf(cmd, sizeof(cmd), "netstat -rn | grep %s | awk '/default/ {print $2 }'", if_name); /* * Execute the command to gather gateway IPV4 info. */ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); /* * Retrieve the IPV6 address of default gateway. */ snprintf(cmd, sizeof(cmd), "netstat -rn inet6 | grep %s | awk '/default/ {print $2 }", if_name); /* * Execute the command to gather gateway IPV6 info. */ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); /* * we just invoke an external script to get the DNS info. * * Following is the expected format of the information from the script: * * ipaddr1 (nameserver1) * ipaddr2 (nameserver2) * . * . */ /* Scripts are stored in /usr/libexec/hyperv/ directory */ snprintf(cmd, sizeof(cmd), "%s", "sh /usr/libexec/hyperv/hv_get_dns_info"); /* * Execute the command to get DNS info. */ kvp_process_ipconfig_file(cmd, (char *)buffer->dns_addr, (MAX_IP_ADDR_SIZE * 2), INET_ADDRSTRLEN, 0); /* * Invoke an external script to get the DHCP state info. * The parameter to the script is the interface name. * Here is the expected output: * * Enabled: DHCP enabled. */ snprintf(cmd, sizeof(cmd), "%s %s", "sh /usr/libexec/hyperv/hv_get_dhcp_info", if_name); file = popen(cmd, "r"); if (file == NULL) { return; } p = fgets(dhcp_info, sizeof(dhcp_info), file); if (p == NULL) { pclose(file); return; } if (!strncmp(p, "Enabled", 7)) { buffer->dhcp_enabled = 1; } else{ buffer->dhcp_enabled = 0; } pclose(file); } static unsigned int hweight32(unsigned int *w) { unsigned int res = *w - ((*w >> 1) & 0x55555555); res = (res & 0x33333333) + ((res >> 2) & 0x33333333); res = (res + (res >> 4)) & 0x0F0F0F0F; res = res + (res >> 8); return ((res + (res >> 16)) & 0x000000FF); } static int kvp_process_ip_address(void *addrp, int family, char *buffer, int length, int *offset) { struct sockaddr_in *addr; struct sockaddr_in6 *addr6; int addr_length; char tmp[50]; const char *str; if (family == AF_INET) { addr = (struct sockaddr_in *)addrp; str = inet_ntop(family, &addr->sin_addr, tmp, 50); addr_length = INET_ADDRSTRLEN; } else { addr6 = (struct sockaddr_in6 *)addrp; str = inet_ntop(family, &addr6->sin6_addr.s6_addr, tmp, 50); addr_length = INET6_ADDRSTRLEN; } if ((length - *offset) < addr_length + 1) { return (HV_KVP_E_FAIL); } if (str == NULL) { strlcpy(buffer, "inet_ntop failed\n", length); return (HV_KVP_E_FAIL); } if (*offset == 0) { strlcpy(buffer, tmp, length); } else{ strlcat(buffer, tmp, length); } strlcat(buffer, ";", length); *offset += strlen(str) + 1; return (0); } static int kvp_get_ip_info(int family, char *if_name, int op, void *out_buffer, size_t length) { struct ifaddrs *ifap; struct ifaddrs *curp; int offset = 0; int sn_offset = 0; int error = 0; char *buffer; size_t buffer_length; struct hv_kvp_ipaddr_value *ip_buffer = NULL; char cidr_mask[5]; int weight; int i; unsigned int *w = NULL; char *sn_str; size_t sn_str_length; struct sockaddr_in6 *addr6; if (op == HV_KVP_OP_ENUMERATE) { buffer = out_buffer; buffer_length = length; } else { ip_buffer = out_buffer; buffer = (char *)ip_buffer->ip_addr; buffer_length = sizeof(ip_buffer->ip_addr); ip_buffer->addr_family = 0; } if (getifaddrs(&ifap)) { strlcpy(buffer, "getifaddrs failed\n", buffer_length); return (HV_KVP_E_FAIL); } curp = ifap; while (curp != NULL) { if (curp->ifa_addr == NULL) { curp = curp->ifa_next; continue; } if ((if_name != NULL) && (strncmp(curp->ifa_name, if_name, strlen(if_name)))) { /* * We want info about a specific interface; * just continue. */ curp = curp->ifa_next; continue; } /* * We support two address families: AF_INET and AF_INET6. * If family value is 0, we gather both supported * address families; if not we gather info on * the specified address family. */ if ((family != 0) && (curp->ifa_addr->sa_family != family)) { curp = curp->ifa_next; continue; } if ((curp->ifa_addr->sa_family != AF_INET) && (curp->ifa_addr->sa_family != AF_INET6)) { curp = curp->ifa_next; continue; } if (op == HV_KVP_OP_GET_IP_INFO) { /* * Get the info other than the IP address. */ if (curp->ifa_addr->sa_family == AF_INET) { ip_buffer->addr_family |= ADDR_FAMILY_IPV4; /* * Get subnet info. */ error = kvp_process_ip_address( curp->ifa_netmask, AF_INET, (char *) ip_buffer->sub_net, length, &sn_offset); if (error) { goto kvp_get_ip_info_ipaddr; } } else { ip_buffer->addr_family |= ADDR_FAMILY_IPV6; /* * Get subnet info in CIDR format. */ weight = 0; sn_str = (char *)ip_buffer->sub_net; sn_str_length = sizeof(ip_buffer->sub_net); addr6 = (struct sockaddr_in6 *)(uintptr_t) curp->ifa_netmask; w = (unsigned int *)(uintptr_t)addr6->sin6_addr.s6_addr; for (i = 0; i < 4; i++) { weight += hweight32(&w[i]); } snprintf(cidr_mask, sizeof(cidr_mask), "/%d", weight); if ((length - sn_offset) < (strlen(cidr_mask) + 1)) { goto kvp_get_ip_info_ipaddr; } if (sn_offset == 0) { strlcpy(sn_str, cidr_mask, sn_str_length); } else{ strlcat(sn_str, cidr_mask, sn_str_length); } strlcat((char *)ip_buffer->sub_net, ";", sn_str_length); sn_offset += strlen(sn_str) + 1; } /* * Collect other ip configuration info. */ kvp_get_ipconfig_info(if_name, ip_buffer); } kvp_get_ip_info_ipaddr: error = kvp_process_ip_address(curp->ifa_addr, curp->ifa_addr->sa_family, buffer, length, &offset); if (error) { goto kvp_get_ip_info_done; } curp = curp->ifa_next; } kvp_get_ip_info_done: freeifaddrs(ifap); return (error); } static int kvp_write_file(FILE *f, const char *s1, const char *s2, const char *s3) { int ret; ret = fprintf(f, "%s%s%s%s\n", s1, s2, "=", s3); if (ret < 0) { return (HV_KVP_E_FAIL); } return (0); } static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) { int error = 0; char if_file[128]; FILE *file; char cmd[512]; char *mac_addr; /* * FreeBSD - Configuration File */ snprintf(if_file, sizeof(if_file), "%s%s", "/var/db/hyperv", "hv_set_ip_data"); file = fopen(if_file, "w"); if (file == NULL) { KVP_LOG(LOG_ERR, "FreeBSD Failed to open config file\n"); return (HV_KVP_E_FAIL); } /* * Write out the MAC address. */ mac_addr = kvp_if_name_to_mac(if_name); if (mac_addr == NULL) { error = HV_KVP_E_FAIL; goto kvp_set_ip_info_error; } /* MAC Address */ error = kvp_write_file(file, "HWADDR", "", mac_addr); if (error) { goto kvp_set_ip_info_error; } /* Interface Name */ error = kvp_write_file(file, "IF_NAME", "", if_name); if (error) { goto kvp_set_ip_info_error; } /* IP Address */ error = kvp_write_file(file, "IP_ADDR", "", (char *)new_val->ip_addr); if (error) { goto kvp_set_ip_info_error; } /* Subnet Mask */ error = kvp_write_file(file, "SUBNET", "", (char *)new_val->sub_net); if (error) { goto kvp_set_ip_info_error; } /* Gateway */ error = kvp_write_file(file, "GATEWAY", "", (char *)new_val->gate_way); if (error) { goto kvp_set_ip_info_error; } /* DNS */ error = kvp_write_file(file, "DNS", "", (char *)new_val->dns_addr); if (error) { goto kvp_set_ip_info_error; } /* DHCP */ if (new_val->dhcp_enabled) { error = kvp_write_file(file, "DHCP", "", "1"); } else{ error = kvp_write_file(file, "DHCP", "", "0"); } if (error) { goto kvp_set_ip_info_error; } free(mac_addr); fclose(file); /* * Invoke the external script with the populated * configuration file. */ snprintf(cmd, sizeof(cmd), "%s %s", "sh /usr/libexec/hyperv/hv_set_ifconfig", if_file); system(cmd); return (0); kvp_set_ip_info_error: KVP_LOG(LOG_ERR, "Failed to write config file\n"); free(mac_addr); fclose(file); return (error); } static int kvp_get_domain_name(char *buffer, int length) { struct addrinfo hints, *info; int error = 0; gethostname(buffer, length); memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET; /* Get only ipv4 addrinfo. */ hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_CANONNAME; error = getaddrinfo(buffer, NULL, &hints, &info); if (error != 0) { strlcpy(buffer, "getaddrinfo failed\n", length); return (error); } strlcpy(buffer, info->ai_canonname, length); freeaddrinfo(info); return (error); } static int kvp_op_getipinfo(struct hv_kvp_msg *op_msg, void *data __unused) { struct hv_kvp_ipaddr_value *ip_val; char *if_name; assert(op_msg != NULL); KVP_LOG(LOG_DEBUG, "In kvp_op_getipinfo.\n"); ip_val = &op_msg->body.kvp_ip_val; op_msg->hdr.error = HV_KVP_S_OK; if_name = kvp_mac_to_if_name((char *)ip_val->adapter_id); if (if_name == NULL) { /* No interface found with the mac address. */ op_msg->hdr.error = HV_KVP_E_FAIL; goto kvp_op_getipinfo_done; } op_msg->hdr.error = kvp_get_ip_info(0, if_name, HV_KVP_OP_GET_IP_INFO, ip_val, (MAX_IP_ADDR_SIZE * 2)); free(if_name); kvp_op_getipinfo_done: return(op_msg->hdr.error); } static int kvp_op_setipinfo(struct hv_kvp_msg *op_msg, void *data __unused) { struct hv_kvp_ipaddr_value *ip_val; char *if_name; assert(op_msg != NULL); KVP_LOG(LOG_DEBUG, "In kvp_op_setipinfo.\n"); ip_val = &op_msg->body.kvp_ip_val; op_msg->hdr.error = HV_KVP_S_OK; if_name = (char *)ip_val->adapter_id; if (if_name == NULL) { /* No adapter provided. */ op_msg->hdr.error = HV_KVP_GUID_NOTFOUND; goto kvp_op_setipinfo_done; } op_msg->hdr.error = kvp_set_ip_info(if_name, ip_val); kvp_op_setipinfo_done: return(op_msg->hdr.error); } static int kvp_op_setgetdel(struct hv_kvp_msg *op_msg, void *data) { struct kvp_op_hdlr *op_hdlr = (struct kvp_op_hdlr *)data; int error = 0; int op_pool; assert(op_msg != NULL); assert(op_hdlr != NULL); op_pool = op_msg->hdr.kvp_hdr.pool; op_msg->hdr.error = HV_KVP_S_OK; switch(op_hdlr->kvp_op_key) { case HV_KVP_OP_SET: if (op_pool == HV_KVP_POOL_AUTO) { /* Auto Pool is not writeable from host side. */ error = 1; KVP_LOG(LOG_ERR, "Ilegal to write to pool %d from host\n", op_pool); } else { error = kvp_key_add_or_modify(op_pool, op_msg->body.kvp_set.data.key, op_msg->body.kvp_set.data.key_size, op_msg->body.kvp_set.data.msg_value.value, op_msg->body.kvp_set.data.value_size); } break; case HV_KVP_OP_GET: error = kvp_get_value(op_pool, op_msg->body.kvp_get.data.key, op_msg->body.kvp_get.data.key_size, op_msg->body.kvp_get.data.msg_value.value, op_msg->body.kvp_get.data.value_size); break; case HV_KVP_OP_DELETE: if (op_pool == HV_KVP_POOL_AUTO) { /* Auto Pool is not writeable from host side. */ error = 1; KVP_LOG(LOG_ERR, "Ilegal to change pool %d from host\n", op_pool); } else { error = kvp_key_delete(op_pool, op_msg->body.kvp_delete.key, op_msg->body.kvp_delete.key_size); } break; default: break; } if (error != 0) op_msg->hdr.error = HV_KVP_S_CONT; return(error); } static int kvp_op_enumerate(struct hv_kvp_msg *op_msg, void *data __unused) { char *key_name, *key_value; int error = 0; int op_pool; int op; assert(op_msg != NULL); op = op_msg->hdr.kvp_hdr.operation; op_pool = op_msg->hdr.kvp_hdr.pool; op_msg->hdr.error = HV_KVP_S_OK; /* * If the pool is not HV_KVP_POOL_AUTO, read from the appropriate * pool and return the KVP according to the index requested. */ if (op_pool != HV_KVP_POOL_AUTO) { if (kvp_pool_enumerate(op_pool, op_msg->body.kvp_enum_data.index, op_msg->body.kvp_enum_data.data.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE, op_msg->body.kvp_enum_data.data.msg_value.value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) { op_msg->hdr.error = HV_KVP_S_CONT; error = -1; } goto kvp_op_enumerate_done; } key_name = (char *)op_msg->body.kvp_enum_data.data.key; key_value = (char *)op_msg->body.kvp_enum_data.data.msg_value.value; switch (op_msg->body.kvp_enum_data.index) { case FullyQualifiedDomainName: kvp_get_domain_name(key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "FullyQualifiedDomainName"); break; case IntegrationServicesVersion: strcpy(key_name, "IntegrationServicesVersion"); strlcpy(key_value, lic_version, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); break; case NetworkAddressIPv4: kvp_get_ip_info(AF_INET, NULL, HV_KVP_OP_ENUMERATE, key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "NetworkAddressIPv4"); break; case NetworkAddressIPv6: kvp_get_ip_info(AF_INET6, NULL, HV_KVP_OP_ENUMERATE, key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "NetworkAddressIPv6"); break; case OSBuildNumber: strlcpy(key_value, os_build, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "OSBuildNumber"); break; case OSName: strlcpy(key_value, os_name, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "OSName"); break; case OSMajorVersion: strlcpy(key_value, os_major, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "OSMajorVersion"); break; case OSMinorVersion: strlcpy(key_value, os_minor, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "OSMinorVersion"); break; case OSVersion: strlcpy(key_value, os_build, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "OSVersion"); break; case ProcessorArchitecture: strlcpy(key_value, processor_arch, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "ProcessorArchitecture"); break; default: #ifdef DEBUG KVP_LOG(LOG_ERR, "Auto pool Index %d not found.\n", op_msg->body.kvp_enum_data.index); #endif op_msg->hdr.error = HV_KVP_S_CONT; error = -1; break; } kvp_op_enumerate_done: return(error); } /* * Load handler, and call init routine if provided. */ static int kvp_op_load(int key, void (*init)(void), int (*exec)(struct hv_kvp_msg *, void *)) { int error = 0; if (key < 0 || key >= HV_KVP_OP_COUNT) { KVP_LOG(LOG_ERR, "Operation key out of supported range\n"); error = -1; goto kvp_op_load_done; } kvp_op_hdlrs[key].kvp_op_key = key; kvp_op_hdlrs[key].kvp_op_init = init; kvp_op_hdlrs[key].kvp_op_exec = exec; if (kvp_op_hdlrs[key].kvp_op_init != NULL) kvp_op_hdlrs[key].kvp_op_init(); kvp_op_load_done: return(error); } /* * Initialize the operation hanlders. */ static int kvp_ops_init(void) { int i; /* Set the initial values. */ for (i = 0; i < HV_KVP_OP_COUNT; i++) { kvp_op_hdlrs[i].kvp_op_key = -1; kvp_op_hdlrs[i].kvp_op_init = NULL; kvp_op_hdlrs[i].kvp_op_exec = NULL; } return(kvp_op_load(HV_KVP_OP_GET, NULL, kvp_op_setgetdel) | kvp_op_load(HV_KVP_OP_SET, NULL, kvp_op_setgetdel) | kvp_op_load(HV_KVP_OP_DELETE, NULL, kvp_op_setgetdel) | kvp_op_load(HV_KVP_OP_ENUMERATE, kvp_get_os_info, kvp_op_enumerate) | kvp_op_load(HV_KVP_OP_GET_IP_INFO, NULL, kvp_op_getipinfo) | kvp_op_load(HV_KVP_OP_SET_IP_INFO, NULL, kvp_op_setipinfo)); } int main(int argc, char *argv[]) { struct hv_kvp_msg *hv_kvp_dev_buf; struct hv_kvp_msg *hv_msg; struct pollfd hv_kvp_poll_fd[1]; int op, pool; int hv_kvp_dev_fd, error, len, r; int ch; while ((ch = getopt(argc, argv, "dn")) != -1) { switch (ch) { case 'n': /* Run as regular process for debugging purpose. */ is_daemon = 0; break; case 'd': /* Generate debugging output */ is_debugging = 1; break; default: break; } } openlog("HV_KVP", 0, LOG_USER); /* Become daemon first. */ if (is_daemon == 1) daemon(1, 0); else KVP_LOG(LOG_DEBUG, "Run as regular process.\n"); KVP_LOG(LOG_INFO, "HV_KVP starting; pid is: %d\n", getpid()); /* Communication buffer hv_kvp_dev_buf */ hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf)); /* Buffer for daemon internal use */ hv_msg = malloc(sizeof(*hv_msg)); /* Memory allocation failed */ if (hv_kvp_dev_buf == NULL || hv_msg == NULL) { KVP_LOG(LOG_ERR, "Failed to allocate memory for hv buffer\n"); exit(EXIT_FAILURE); } /* Initialize op handlers */ if (kvp_ops_init() != 0) { KVP_LOG(LOG_ERR, "Failed to initizlize operation handlers\n"); exit(EXIT_FAILURE); } if (kvp_file_init()) { KVP_LOG(LOG_ERR, "Failed to initialize the pools\n"); exit(EXIT_FAILURE); } /* Open the Character Device */ hv_kvp_dev_fd = open("/dev/hv_kvp_dev", O_RDWR); if (hv_kvp_dev_fd < 0) { KVP_LOG(LOG_ERR, "open /dev/hv_kvp_dev failed; error: %d %s\n", errno, strerror(errno)); exit(EXIT_FAILURE); } /* Initialize the struct for polling the char device */ hv_kvp_poll_fd[0].fd = hv_kvp_dev_fd; hv_kvp_poll_fd[0].events = (POLLIN | POLLRDNORM); /* Register the daemon to the KVP driver */ memset(hv_kvp_dev_buf, 0, sizeof(*hv_kvp_dev_buf)); hv_kvp_dev_buf->hdr.kvp_hdr.operation = HV_KVP_OP_REGISTER; len = write(hv_kvp_dev_fd, hv_kvp_dev_buf, sizeof(*hv_kvp_dev_buf)); for (;;) { - r = poll (hv_kvp_poll_fd, 1, 100); + r = poll (hv_kvp_poll_fd, 1, INFTIM); KVP_LOG(LOG_DEBUG, "poll returned r = %d, revent = 0x%x\n", r, hv_kvp_poll_fd[0].revents); if (r == 0 || (r < 0 && errno == EAGAIN) || (r < 0 && errno == EINTR)) { /* Nothing to read */ continue; } if (r < 0) { /* * For pread return failure other than EAGAIN, * we want to exit. */ KVP_LOG(LOG_ERR, "Poll failed.\n"); perror("poll"); exit(EIO); } /* Read from character device */ len = pread(hv_kvp_dev_fd, hv_kvp_dev_buf, sizeof(*hv_kvp_dev_buf), 0); if (len < 0) { KVP_LOG(LOG_ERR, "Read failed.\n"); perror("pread"); exit(EIO); } if (len != sizeof(struct hv_kvp_msg)) { KVP_LOG(LOG_ERR, "read len is: %d\n", len); continue; } /* Copy hv_kvp_dev_buf to hv_msg */ memcpy(hv_msg, hv_kvp_dev_buf, sizeof(*hv_msg)); /* * We will use the KVP header information to pass back * the error from this daemon. So, first save the op * and pool info to local variables. */ op = hv_msg->hdr.kvp_hdr.operation; pool = hv_msg->hdr.kvp_hdr.pool; if (op < 0 || op >= HV_KVP_OP_COUNT || kvp_op_hdlrs[op].kvp_op_exec == NULL) { KVP_LOG(LOG_WARNING, "Unsupported operation OP = %d\n", op); hv_msg->hdr.error = HV_ERROR_NOT_SUPPORTED; } else { /* * Call the operateion handler's execution routine. */ error = kvp_op_hdlrs[op].kvp_op_exec(hv_msg, (void *)&kvp_op_hdlrs[op]); if (error != 0 && hv_msg->hdr.error != HV_KVP_S_CONT) KVP_LOG(LOG_WARNING, "Operation failed OP = %d, error = 0x%x\n", op, error); } /* * Send the value back to the kernel. The response is * already in the receive buffer. */ hv_kvp_done: len = pwrite(hv_kvp_dev_fd, hv_msg, sizeof(*hv_kvp_dev_buf), 0); if (len != sizeof(struct hv_kvp_msg)) { KVP_LOG(LOG_ERR, "write len is: %d\n", len); goto hv_kvp_done; } } } Index: projects/clang380-import/gnu/lib/libgcc/Makefile =================================================================== --- projects/clang380-import/gnu/lib/libgcc/Makefile (revision 293686) +++ projects/clang380-import/gnu/lib/libgcc/Makefile (revision 293687) @@ -1,369 +1,416 @@ # $FreeBSD$ GCCDIR= ${.CURDIR}/../../../contrib/gcc GCCLIB= ${.CURDIR}/../../../contrib/gcclibs +COMPILERRTDIR= ${.CURDIR}/../../../contrib/compiler-rt +UNWINDINCDIR= ${.CURDIR}/../../../contrib/llvm/projects/libunwind/include +UNWINDSRCDIR= ${.CURDIR}/../../../contrib/llvm/projects/libunwind/src SHLIB_NAME= libgcc_s.so.1 SHLIBDIR?= /lib .include # # libgcc is linked in last and thus cannot depend on ssp symbols coming # from earlier libraries. Disable stack protection for this library. # MK_SSP= no .include "${.CURDIR}/../../usr.bin/cc/Makefile.tgt" .if ${TARGET_CPUARCH} == "arm" CFLAGS+= -DTARGET_ARM_EABI .endif .PATH: ${GCCDIR}/config/${GCC_CPU} ${GCCDIR}/config ${GCCDIR} CFLAGS+= -DIN_GCC -DIN_LIBGCC2 -D__GCC_FLOAT_NOT_NEEDED \ -DHAVE_GTHR_DEFAULT \ -I${GCCLIB}/include \ -I${GCCDIR}/config -I${GCCDIR} -I. \ -I${.CURDIR}/../../usr.bin/cc/cc_tools LDFLAGS+= -nodefaultlibs LIBADD+= c OBJS= # added to below in various ways depending on TARGET_CPUARCH #--------------------------------------------------------------------------- # # Library members defined in libgcc2.c. # When upgrading GCC, obtain the following list from mklibgcc.in # LIB2FUNCS= _muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 \ _cmpdi2 _ucmpdi2 \ _enable_execute_stack _trampoline __main _absvsi2 _absvdi2 _addvsi3 \ _addvdi3 _subvsi3 _subvdi3 _mulvsi3 _mulvdi3 _negvsi2 _negvdi2 _ctors \ _ffssi2 _ffsdi2 _clz _clzsi2 _clzdi2 _ctzsi2 _ctzdi2 _popcount_tab \ _popcountsi2 _popcountdi2 _paritysi2 _paritydi2 _powisf2 _powidf2 \ _powixf2 _powitf2 _mulsc3 _muldc3 _mulxc3 _multc3 _divsc3 _divdc3 \ _divxc3 _divtc3 _bswapsi2 _bswapdi2 .if ${COMPILER_TYPE} != "clang" || ${TARGET_CPUARCH} != "arm" LIB2FUNCS+= _clear_cache .endif # The floating-point conversion routines that involve a single-word integer. .for mode in sf df xf LIB2FUNCS+= _fixuns${mode}si .endfor # Likewise double-word routines. .if ${TARGET_CPUARCH} != "aarch64" && ${TARGET_CPUARCH} != "arm" # These are implemented in an ARM specific file but will not be filtered out .for mode in sf df xf tf LIB2FUNCS+= _fix${mode}di _fixuns${mode}di LIB2FUNCS+= _floatdi${mode} _floatundi${mode} .endfor .endif LIB2ADD = $(LIB2FUNCS_EXTRA) LIB2ADD_ST = $(LIB2FUNCS_STATIC_EXTRA) # Additional sources to handle exceptions; overridden by targets as needed. +.if ${MK_LLVM_LIBUNWIND} != "no" + +.PATH: ${COMPILERRTDIR}/lib/builtins +.PATH: ${UNWINDSRCDIR} +LIB2ADDEH = gcc_personality_v0.c \ + int_util.c \ + Unwind-EHABI.cpp \ + Unwind-sjlj.c \ + UnwindLevel1-gcc-ext.c \ + UnwindLevel1.c \ + UnwindRegistersRestore.S \ + UnwindRegistersSave.S \ + libunwind.cpp + +CFLAGS+= -I${UNWINDINCDIR} -I${.CURDIR} +.if empty(CXXFLAGS:M-std=*) +CXXFLAGS+= -std=c++11 +.endif +CXXFLAGS+= -fno-rtti + +.else # MK_LLVM_LIBUNWIND + +.if ${TARGET_CPUARCH} == "arm" +LIB2ADDEH = unwind-arm.c libunwind.S pr-support.c unwind-c.c +.else LIB2ADDEH = unwind-dw2.c unwind-dw2-fde-glibc.c unwind-sjlj.c gthr-gnat.c \ unwind-c.c +.endif + +.endif # MK_LLVM_LIBUNWIND + LIB2ADDEHSTATIC = $(LIB2ADDEH) LIB2ADDEHSHARED = $(LIB2ADDEH) # List of extra C and assembler files to add to static and shared libgcc2. # Assembler files should have names ending in `.asm'. LIB2FUNCS_EXTRA = # List of extra C and assembler files to add to static libgcc2. # Assembler files should have names ending in `.asm'. LIB2FUNCS_STATIC_EXTRA = # Defined in libgcc2.c, included only in the static library. # KAN: Excluded _sf_to_tf and _df_to_tf as TPBIT_FUNCS are not # built on any of our platforms. LIB2FUNCS_ST = _eprintf __gcc_bcmp FPBIT_FUNCS = _pack_sf _unpack_sf _addsub_sf _mul_sf _div_sf \ _fpcmp_parts_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \ _lt_sf _le_sf _unord_sf _si_to_sf _sf_to_si _negate_sf _make_sf \ _sf_to_df _thenan_sf _sf_to_usi _usi_to_sf DPBIT_FUNCS = _pack_df _unpack_df _addsub_df _mul_df _div_df \ _fpcmp_parts_df _compare_df _eq_df _ne_df _gt_df _ge_df \ _lt_df _le_df _unord_df _si_to_df _df_to_si _negate_df _make_df \ _df_to_sf _thenan_df _df_to_usi _usi_to_df TPBIT_FUNCS = _pack_tf _unpack_tf _addsub_tf _mul_tf _div_tf \ _fpcmp_parts_tf _compare_tf _eq_tf _ne_tf _gt_tf _ge_tf \ _lt_tf _le_tf _unord_tf _si_to_tf _tf_to_si _negate_tf _make_tf \ _tf_to_df _tf_to_sf _thenan_tf _tf_to_usi _usi_to_tf # These might cause a divide overflow trap and so are compiled with # unwinder info. LIB2_DIVMOD_FUNCS = _divdi3 _moddi3 _udivdi3 _umoddi3 _udiv_w_sdiv _udivmoddi4 #----------------------------------------------------------------------- # # Platform specific bits. # When upgrading GCC, get the following definitions from config//t-* # .if ${TARGET_CPUARCH} == "arm" # from config/arm/t-strongarm-elf CFLAGS+= -Dinhibit_libc -fno-inline CFLAGS.clang+= -fheinous-gnu-extensions LIB1ASMSRC = lib1funcs.asm LIB1ASMFUNCS = _dvmd_tls _bb_init_func -LIB2ADDEH = unwind-arm.c libunwind.S pr-support.c unwind-c.c # Some compilers generate __aeabi_ functions libgcc_s is missing LIBADD+= compiler_rt .endif .if ${TARGET_CPUARCH} == mips LIB2FUNCS_EXTRA = floatunsidf.c floatunsisf.c # ABIs other than o32 need this .if ${TARGET_ARCH} != "mips" && ${TARGET_ARCH} != "mipsel" LIB2FUNCS_EXTRA+= floatdidf.c fixunsdfsi.c LIB2FUNCS_EXTRA+= floatdisf.c floatundidf.c LIB2FUNCS_EXTRA+= fixsfdi.c floatundisf.c LIB2FUNCS_EXTRA+= fixdfdi.c fixunssfsi.c .endif .endif .if ${TARGET_ARCH} == "powerpc" # from config/rs6000/t-ppccomm LIB2FUNCS_EXTRA = tramp.asm LIB2FUNCS_STATIC_EXTRA = eabi.asm .endif .if ${TARGET_ARCH} == "powerpc64" # from config/rs6000/t-ppccomm LIB2FUNCS_EXTRA = tramp.asm .endif .if ${TARGET_CPUARCH} == "sparc64" # from config/sparc/t-elf LIB1ASMSRC = lb1spc.asm LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 .endif #----------------------------------------------------------------------- # Remove any objects from LIB2FUNCS and LIB2_DIVMOD_FUNCS that are # defined as optimized assembly code in LIB1ASMFUNCS. .if defined(LIB1ASMFUNCS) .for sym in ${LIB1ASMFUNCS} LIB2FUNCS:= ${LIB2FUNCS:S/${sym}//g} LIB2_DIVMOD_FUNCS:= ${LIB2_DIVMOD_FUNCS:S/${sym}//g} .endfor .endif -COMMONHDRS= tm.h tconfig.h options.h unwind.h gthr-default.h +COMMONHDRS= tm.h tconfig.h options.h gthr-default.h +.if ${MK_LLVM_LIBUNWIND} == no +COMMONHDRS+= unwind.h +.endif #----------------------------------------------------------------------- # # Helpful shortcuts for compiler invocations. # HIDE = -fvisibility=hidden -DHIDE_EXPORTS CC_T = ${CC} -c ${CFLAGS} ${HIDE} -fPIC CC_P = ${CC} -c ${CFLAGS} ${HIDE} -p -fPIC CC_S = ${CC} -c ${CFLAGS} ${PICFLAG} -DSHARED +CXX_T = ${CXX} -c ${CXXFLAGS} ${HIDE} -fPIC +CXX_P = ${CXX} -c ${CXXFLAGS} ${HIDE} -p -fPIC +CXX_S = ${CXX} -c ${CXXFLAGS} ${PICFLAG} -DSHARED #----------------------------------------------------------------------- # # Functions from libgcc2.c # STD_CFLAGS = DIV_CFLAGS = -fexceptions -fnon-call-exceptions STD_FUNCS = ${LIB2FUNCS} DIV_FUNCS = ${LIB2_DIVMOD_FUNCS} STD_CFILE = libgcc2.c DIV_CFILE = libgcc2.c OBJ_GRPS = STD DIV #----------------------------------------------------------------------- # # Floating point emulation functions # .if ${TARGET_CPUARCH} == "armNOT_YET" || \ ${TARGET_CPUARCH} == "powerpc" || ${TARGET_CPUARCH} == "sparc64" FPBIT_CFLAGS = -DFINE_GRAINED_LIBRARIES -DFLOAT DPBIT_CFLAGS = -DFINE_GRAINED_LIBRARIES FPBIT_CFILE = config/fp-bit.c DPBIT_CFILE = config/fp-bit.c OBJ_GRPS += FPBIT DPBIT .endif #----------------------------------------------------------------------- # # Generic build rules for object groups defined above # .for T in ${OBJ_GRPS} ${T}_OBJS_T = ${${T}_FUNCS:S/$/.o/} ${T}_OBJS_P = ${${T}_FUNCS:S/$/.po/} ${T}_OBJS_S = ${${T}_FUNCS:S/$/.So/} OBJS += ${${T}_FUNCS:S/$/.o/} ${${T}_OBJS_T}: ${${T}_CFILE} ${COMMONHDRS} ${CC_T} ${${T}_CFLAGS} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c} ${${T}_OBJS_P}: ${${T}_CFILE} ${COMMONHDRS} ${CC_P} ${${T}_CFLAGS} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c} ${${T}_OBJS_S}: ${${T}_CFILE} ${COMMONHDRS} ${CC_S} ${${T}_CFLAGS} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c} .endfor #----------------------------------------------------------------------- # # Extra objects coming from separate files # .if !empty(LIB2ADD) OBJS += ${LIB2ADD:R:S/$/.o/} SOBJS += ${LIB2ADD:R:S/$/.So/} POBJS += ${LIB2ADD:R:S/$/.po/} .endif #----------------------------------------------------------------------- # # Objects that should be in static library only. # SYMS_ST = ${LIB2FUNCS_ST} ${LIB2ADD_ST} STAT_OBJS_T = ${SYMS_ST:S/$/.o/} STAT_OBJS_P = ${SYMS_ST:S/$/.po/} STATICOBJS = ${SYMS_ST:S/$/.o/} ${STAT_OBJS_T}: ${STD_CFILE} ${COMMONHDRS} ${CC_T} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c} ${STAT_OBJS_P}: ${STD_CFILE} ${COMMONHDRS} ${CC_P} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c} #----------------------------------------------------------------------- # # Assembler files. # .if defined(LIB1ASMSRC) ASM_T = ${LIB1ASMFUNCS:S/$/.o/} ASM_P = ${LIB1ASMFUNCS:S/$/.po/} ASM_S = ${LIB1ASMFUNCS:S/$/.So/} ASM_V = ${LIB1ASMFUNCS:S/$/.vis/} OBJS += ${LIB1ASMFUNCS:S/$/.o/} ${ASM_T}: ${LIB1ASMSRC} ${.PREFIX}.vis ${CC} -x assembler-with-cpp -c ${CFLAGS} -DL${.PREFIX} \ -o ${.TARGET} -include ${.PREFIX}.vis ${.ALLSRC:N*.h:N*.vis} ${ASM_P}: ${LIB1ASMSRC} ${.PREFIX}.vis ${CC} -x assembler-with-cpp -p -c ${CFLAGS} -DL${.PREFIX} \ -o ${.TARGET} -include ${.PREFIX}.vis ${.ALLSRC:N*.h:N*.vis} ${ASM_S}: ${LIB1ASMSRC} ${CC} -x assembler-with-cpp -c ${PICFLAG} ${CFLAGS} -DL${.PREFIX} \ -o ${.TARGET} ${.ALLSRC:N*.h} ${ASM_V}: ${LIB1ASMSRC} ${CC} -x assembler-with-cpp -c ${CFLAGS} -DL${.PREFIX} \ -o ${.PREFIX}.vo ${.ALLSRC:N*.h} ( ${NM} -pg ${.PREFIX}.vo | \ awk 'NF == 3 && $$2 !~ /^[UN]$$/ { print "\t.hidden ", $$3 }'\ ) > ${.TARGET} CLEANFILES += ${ASM_V} ${ASM_V:R:S/$/.vo/} .endif #----------------------------------------------------------------------- # # Exception handling / unwinding support. # EH_OBJS_T = ${LIB2ADDEHSTATIC:R:S/$/.o/} EH_OBJS_P = ${LIB2ADDEHSTATIC:R:S/$/.po/} EH_OBJS_S = ${LIB2ADDEHSHARED:R:S/$/.So/} EH_CFLAGS = -fexceptions -D__GLIBC__=3 -DElfW=__ElfN SOBJS += ${EH_OBJS_S} -.for _src in ${LIB2ADDEHSTATIC} +.for _src in ${LIB2ADDEHSTATIC:M*.c} ${_src:R:S/$/.o/}: ${_src} ${COMMONHDRS} ${CC_T} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} ${_src:R:S/$/.po/}: ${_src} ${COMMONHDRS} ${CC_P} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} .endfor -.for _src in ${LIB2ADDEHSHARED} +.for _src in ${LIB2ADDEHSTATIC:M*.cpp} +${_src:R:S/$/.o/}: ${_src} ${COMMONHDRS} + ${CXX_T} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} +${_src:R:S/$/.po/}: ${_src} ${COMMONHDRS} + ${CXX_P} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} +.endfor +.for _src in ${LIB2ADDEHSHARED:M*.c} ${_src:R:S/$/.So/}: ${_src} ${COMMONHDRS} ${CC_S} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} +.endfor +.for _src in ${LIB2ADDEHSHARED:M*.cpp} +${_src:R:S/$/.So/}: ${_src} ${COMMONHDRS} + ${CXX_S} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} .endfor #----------------------------------------------------------------------- # # Generated headers # ${COMMONHDRS}: ${.CURDIR}/../../usr.bin/cc/cc_tools/Makefile (cd ${.CURDIR}; ${MAKE} -f ${.ALLSRC} MFILE=${.ALLSRC} GCCDIR=${GCCDIR} ${.TARGET}) CLEANFILES += ${COMMONHDRS} CLEANFILES += cs-*.h option* #----------------------------------------------------------------------- # # Build symbol version map # SHLIB_MKMAP = ${GCCDIR}/mkmap-symver.awk SHLIB_MKMAP_OPTS = SHLIB_MAPFILES = ${GCCDIR}/libgcc-std.ver .if ${TARGET_CPUARCH} == "arm" SHLIB_MAPFILES += ${GCCDIR}/config/arm/libgcc-bpabi.ver .endif VERSION_MAP = libgcc.map libgcc.map: ${SHLIB_MKMAP} ${SHLIB_MAPFILES} ${SOBJS} ${OBJS:R:S/$/.So/} ( ${NM} -pg ${SOBJS};echo %% ; \ cat ${SHLIB_MAPFILES} \ | sed -e '/^[ ]*#/d' \ -e 's/^%\(if\|else\|elif\|endif\|define\)/#\1/' \ | ${CC} ${CFLAGS} -E -xassembler-with-cpp -; \ ) | awk -f ${SHLIB_MKMAP} ${SHLIB_MKMAP_OPTS} > ${.TARGET} CLEANFILES += libgcc.map #----------------------------------------------------------------------- # # Build additional static libgcc_eh[_p].a libraries. # libgcc_eh.a: ${EH_OBJS_T} @${ECHO} building static gcc_eh library @rm -f ${.TARGET} @${AR} ${ARFLAGS} ${.TARGET} `lorder ${EH_OBJS_T} | tsort -q` ${RANLIB} ${RANLIBFLAGS} ${.TARGET} _LIBS+= libgcc_eh.a .if ${MK_PROFILE} != "no" libgcc_eh_p.a: ${EH_OBJS_P} @${ECHO} building profiled gcc_eh library @rm -f ${.TARGET} @${AR} ${ARFLAGS} ${.TARGET} `lorder ${EH_OBJS_P} | tsort -q` ${RANLIB} ${RANLIBFLAGS} ${.TARGET} _LIBS+= libgcc_eh_p.a .endif _libinstall: _lib-eh-install _lib-eh-install: .if ${MK_INSTALLLIB} != "no" ${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} libgcc_eh.a ${DESTDIR}${LIBDIR} .endif .if ${MK_PROFILE} != "no" ${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} libgcc_eh_p.a ${DESTDIR}${LIBDIR} .endif CLEANFILES+= libgcc_eh.a libgcc_eh_p.a ${EH_OBJS_T} ${EH_OBJS_P} .include .SUFFIXES: .vis .vo Index: projects/clang380-import/gnu/lib =================================================================== --- projects/clang380-import/gnu/lib (revision 293686) +++ projects/clang380-import/gnu/lib (revision 293687) Property changes on: projects/clang380-import/gnu/lib ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/gnu/lib:r292913-293685 Index: projects/clang380-import/include/limits.h =================================================================== --- projects/clang380-import/include/limits.h (revision 293686) +++ projects/clang380-import/include/limits.h (revision 293687) @@ -1,144 +1,147 @@ /*- * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)limits.h 8.2 (Berkeley) 1/4/94 * $FreeBSD$ */ #ifndef _LIMITS_H_ #define _LIMITS_H_ #include #if __POSIX_VISIBLE #define _POSIX_ARG_MAX 4096 #define _POSIX_LINK_MAX 8 #define _POSIX_MAX_CANON 255 #define _POSIX_MAX_INPUT 255 #define _POSIX_NAME_MAX 14 #define _POSIX_PIPE_BUF 512 #define _POSIX_SSIZE_MAX 32767 #define _POSIX_STREAM_MAX 8 #if __POSIX_VISIBLE >= 200112 #define _POSIX_CHILD_MAX 25 #define _POSIX_NGROUPS_MAX 8 #define _POSIX_OPEN_MAX 20 #define _POSIX_PATH_MAX 256 #define _POSIX_TZNAME_MAX 6 #else #define _POSIX_CHILD_MAX 6 #define _POSIX_NGROUPS_MAX 0 #define _POSIX_OPEN_MAX 16 #define _POSIX_PATH_MAX 255 #define _POSIX_TZNAME_MAX 3 #endif +#if __POSIX_VISIBLE >= 200112 #define BC_BASE_MAX 99 /* max ibase/obase values in bc(1) */ #define BC_DIM_MAX 2048 /* max array elements in bc(1) */ #define BC_SCALE_MAX 99 /* max scale value in bc(1) */ #define BC_STRING_MAX 1000 /* max const string length in bc(1) */ +#define CHARCLASS_NAME_MAX 14 /* max character class name size */ #define COLL_WEIGHTS_MAX 10 /* max weights for order keyword */ #define EXPR_NEST_MAX 32 /* max expressions nested in expr(1) */ #define LINE_MAX 2048 /* max bytes in an input line */ #define RE_DUP_MAX 255 /* max RE's in interval notation */ #define _POSIX2_BC_BASE_MAX 99 #define _POSIX2_BC_DIM_MAX 2048 #define _POSIX2_BC_SCALE_MAX 99 #define _POSIX2_BC_STRING_MAX 1000 +#define _POSIX2_CHARCLASS_NAME_MAX 14 +#define _POSIX2_COLL_WEIGHTS_MAX 2 #define _POSIX2_EQUIV_CLASS_MAX 2 #define _POSIX2_EXPR_NEST_MAX 32 #define _POSIX2_LINE_MAX 2048 #define _POSIX2_RE_DUP_MAX 255 #endif +#endif #if __POSIX_VISIBLE >= 199309 #define _POSIX_AIO_LISTIO_MAX 2 #define _POSIX_AIO_MAX 1 #define _POSIX_DELAYTIMER_MAX 32 #define _POSIX_MQ_OPEN_MAX 8 #define _POSIX_MQ_PRIO_MAX 32 #define _POSIX_RTSIG_MAX 8 #define _POSIX_SEM_NSEMS_MAX 256 #define _POSIX_SEM_VALUE_MAX 32767 #define _POSIX_SIGQUEUE_MAX 32 #define _POSIX_TIMER_MAX 32 #define _POSIX_CLOCKRES_MIN 20000000 #endif #if __POSIX_VISIBLE >= 199506 #define _POSIX_THREAD_DESTRUCTOR_ITERATIONS 4 #define _POSIX_THREAD_KEYS_MAX 128 #define _POSIX_THREAD_THREADS_MAX 64 #endif #if __POSIX_VISIBLE >= 200112 #define _POSIX_HOST_NAME_MAX 255 #define _POSIX_LOGIN_NAME_MAX 9 #define _POSIX_SS_REPL_MAX 4 #define _POSIX_SYMLINK_MAX 255 #define _POSIX_SYMLOOP_MAX 8 #define _POSIX_TRACE_EVENT_NAME_MAX 30 #define _POSIX_TRACE_NAME_MAX 8 #define _POSIX_TRACE_SYS_MAX 8 #define _POSIX_TRACE_USER_EVENT_MAX 32 #define _POSIX_TTY_NAME_MAX 9 -#define _POSIX2_CHARCLASS_NAME_MAX 14 -#define _POSIX2_COLL_WEIGHTS_MAX 2 #define _POSIX_RE_DUP_MAX _POSIX2_RE_DUP_MAX #endif #if __XSI_VISIBLE || __POSIX_VISIBLE >= 200809 #define NL_ARGMAX 99 /* max # of position args for printf */ #define NL_MSGMAX 32767 #define NL_SETMAX 255 #define NL_TEXTMAX 2048 #endif #if __XSI_VISIBLE #define _XOPEN_IOV_MAX 16 #define _XOPEN_NAME_MAX 255 #define _XOPEN_PATH_MAX 1024 #define PASS_MAX 128 /* _PASSWORD_LEN from */ #define NL_LANGMAX 31 /* max LANG name length */ #define NL_NMAX 1 #endif #define MB_LEN_MAX 6 /* 31-bit UTF-8 */ #include #if __POSIX_VISIBLE #include #endif #endif /* !_LIMITS_H_ */ Index: projects/clang380-import/include =================================================================== --- projects/clang380-import/include (revision 293686) +++ projects/clang380-import/include (revision 293687) Property changes on: projects/clang380-import/include ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/include:r293280-293685 Index: projects/clang380-import/lib/csu/arm/Makefile =================================================================== --- projects/clang380-import/lib/csu/arm/Makefile (revision 293686) +++ projects/clang380-import/lib/csu/arm/Makefile (revision 293687) @@ -1,46 +1,47 @@ # $FreeBSD$ .PATH: ${.CURDIR}/../common SRCS= crt1.c crti.S crtn.S OBJS= ${SRCS:N*.h:R:S/$/.o/g} OBJS+= Scrt1.o gcrt1.o CFLAGS+= -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include +STATIC_CFLAGS+= -mlong-calls FILES= ${OBJS} FILESMODE= ${LIBMODE} FILESOWN= ${LIBOWN} FILESGRP= ${LIBGRP} FILESDIR= ${LIBDIR} # These FILES qualify as libraries for the purpose of LIBRARIES_ONLY. .undef LIBRARIES_ONLY CLEANFILES= ${OBJS} CLEANFILES+= crt1.s gcrt1.s Scrt1.s # See the comment in lib/csu/common/crtbrand.c for the reason crt1.c is not # directly compiled to .o files. crt1.s: crt1.c - ${CC} ${CFLAGS} -S -o ${.TARGET} ${.CURDIR}/crt1.c + ${CC} ${CFLAGS} ${STATIC_CFLAGS} -S -o ${.TARGET} ${.CURDIR}/crt1.c sed ${SED_FIX_NOTE} ${.TARGET} crt1.o: crt1.s ${CC} ${ACFLAGS} -c -o ${.TARGET} crt1.s gcrt1.s: crt1.c - ${CC} ${CFLAGS} -DGCRT -S -o ${.TARGET} ${.CURDIR}/crt1.c + ${CC} ${CFLAGS} ${STATIC_CFLAGS} -DGCRT -S -o ${.TARGET} ${.CURDIR}/crt1.c sed ${SED_FIX_NOTE} ${.TARGET} gcrt1.o: gcrt1.s ${CC} ${ACFLAGS} -c -o ${.TARGET} gcrt1.s Scrt1.s: crt1.c ${CC} ${CFLAGS} -fPIC -DPIC -S -o ${.TARGET} ${.CURDIR}/crt1.c sed ${SED_FIX_NOTE} ${.TARGET} Scrt1.o: Scrt1.s ${CC} ${ACFLAGS} -c -o ${.TARGET} Scrt1.s .include Index: projects/clang380-import/lib/libc/sys/sendfile.2 =================================================================== --- projects/clang380-import/lib/libc/sys/sendfile.2 (revision 293686) +++ projects/clang380-import/lib/libc/sys/sendfile.2 (revision 293687) @@ -1,318 +1,377 @@ .\" Copyright (c) 2003, David G. Lawrence .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice unmodified, this list of conditions, and the following .\" disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd January 7, 2010 +.Dd January 7, 2016 .Dt SENDFILE 2 .Os .Sh NAME .Nm sendfile .Nd send a file to a socket .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/types.h .In sys/socket.h .In sys/uio.h .Ft int .Fo sendfile .Fa "int fd" "int s" "off_t offset" "size_t nbytes" .Fa "struct sf_hdtr *hdtr" "off_t *sbytes" "int flags" .Fc .Sh DESCRIPTION The .Fn sendfile system call -sends a regular file specified by descriptor +sends a regular file or shared memory object specified by descriptor .Fa fd out a stream socket specified by descriptor .Fa s . .Pp The .Fa offset argument specifies where to begin in the file. Should .Fa offset fall beyond the end of file, the system will return success and report 0 bytes sent as described below. The .Fa nbytes argument specifies how many bytes of the file should be sent, with 0 having the special meaning of send until the end of file has been reached. .Pp An optional header and/or trailer can be sent before and after the file data by specifying a pointer to a .Vt "struct sf_hdtr" , which has the following structure: .Pp .Bd -literal -offset indent -compact struct sf_hdtr { struct iovec *headers; /* pointer to header iovecs */ int hdr_cnt; /* number of header iovecs */ struct iovec *trailers; /* pointer to trailer iovecs */ int trl_cnt; /* number of trailer iovecs */ }; .Ed .Pp The .Fa headers and .Fa trailers pointers, if .Pf non- Dv NULL , point to arrays of .Vt "struct iovec" structures. See the .Fn writev system call for information on the iovec structure. The number of iovecs in these arrays is specified by .Fa hdr_cnt and .Fa trl_cnt . .Pp If .Pf non- Dv NULL , the system will write the total number of bytes sent on the socket to the variable pointed to by .Fa sbytes . .Pp -The +The least significant 16 bits of .Fa flags argument is a bitmap of these values: -.Bl -item -offset indent -.It -.Dv SF_NODISKIO . -This flag causes any -.Fn sendfile -call which would block on disk I/O to instead -return -.Er EBUSY . -Busy servers may benefit by transferring requests that would -block to a separate I/O worker thread. -.It -.Dv SF_MNOWAIT . -Do not wait for some kernel resource to become available, -in particular, -.Vt mbuf -and -.Vt sf_buf . -The flag does not make the -.Fn sendfile -syscall truly non-blocking, since other resources are still allocated -in a blocking fashion. -.It -.Dv SF_SYNC . +.Bl -tag -offset indent +.It Dv SF_NODISKIO +This flag causes .Nm +to return +.Er EBUSY +instead of blocking when a busy page is encountered. +This rare situation can happen if some other process is now working +with the same region of the file. +It is advised to retry the operation after a short period. +.Pp +Note that in older +.Fx +versions the +.Dv SF_NODISKIO +had slightly different notion. +The flag prevented +.Nm +to run I/O operations in case if an invalid (not cached) page is encountered, +thus avoiding blocking on I/O. +Starting with +.Fx 11 +.Nm +sending files off the +.Xr ffs 7 +filesystem doesn't block on I/O +(see +.Sx IMPLEMENTATION NOTES +), so the condition no longer applies. +However, it is safe if an application utilizes +.Dv SF_NODISKIO +and on +.Er EBUSY +performs the same action as it did in +older +.Fx +versions, e.g. +.Xr aio_read 2, +.Xr read 2 +or +.Nm +in a different context. +.It Dv SF_NOCACHE +The data sent to socket will not be cached by the virtual memory system, +and will be freed directly to the pool of free pages. +.It Dv SF_SYNC +.Nm sleeps until the network stack no longer references the VM pages of the file, making subsequent modifications to it safe. Please note that this is not a guarantee that the data has actually been sent. .El .Pp +The most significant 16 bits of +.Fa flags +specify amount of pages that +.Nm +may read ahead when reading the file. +A macro +.Fn SF_FLAGS +is provided to combine readahead amount and flags. +Example shows specifing readahead of 16 pages and +.Dv SF_NOCACHE +flag: +.Pp +.Bd -literal -offset indent -compact + SF_FLAGS(16, SF_NOCACHE) +.Ed +.Pp When using a socket marked for non-blocking I/O, .Fn sendfile may send fewer bytes than requested. In this case, the number of bytes successfully written is returned in .Fa *sbytes (if specified), and the error .Er EAGAIN is returned. .Sh IMPLEMENTATION NOTES The .Fx implementation of .Fn sendfile +doesn't block on disk I/O when it sends a file off the +.Xr ffs 7 +filesystem. +The syscall returns success before the actual I/O completes, and data +is put into the socket later unattended. +However, the order of data in the socket is preserved, so it is safe +to do further writes to the socket. +.Pp +The +.Fx +implementation of +.Fn sendfile is "zero-copy", meaning that it has been optimized so that copying of the file data is avoided. .Sh TUNING On some architectures, this system call internally uses a special .Fn sendfile buffer .Pq Vt "struct sf_buf" to handle sending file data to the client. If the sending socket is blocking, and there are not enough .Fn sendfile buffers available, .Fn sendfile will block and report a state of .Dq Li sfbufa . If the sending socket is non-blocking and there are not enough .Fn sendfile buffers available, the call will block and wait for the necessary buffers to become available before finishing the call. .Pp The number of .Vt sf_buf Ns 's allocated should be proportional to the number of nmbclusters used to send data to a client via .Fn sendfile . Tune accordingly to avoid blocking! Busy installations that make extensive use of .Fn sendfile may want to increase these values to be inline with their .Va kern.ipc.nmbclusters (see .Xr tuning 7 for details). .Pp The number of .Fn sendfile buffers available is determined at boot time by either the .Va kern.ipc.nsfbufs .Xr loader.conf 5 variable or the .Dv NSFBUFS kernel configuration tunable. The number of .Fn sendfile buffers scales with .Va kern.maxusers . The .Va kern.ipc.nsfbufsused and .Va kern.ipc.nsfbufspeak read-only .Xr sysctl 8 variables show current and peak .Fn sendfile buffers usage respectively. These values may also be viewed through .Nm netstat Fl m . .Pp If a value of zero is reported for .Va kern.ipc.nsfbufs , your architecture does not need to use .Fn sendfile buffers because their task can be efficiently performed by the generic virtual memory structures. .Sh RETURN VALUES .Rv -std sendfile .Sh ERRORS .Bl -tag -width Er .It Bq Er EAGAIN The socket is marked for non-blocking I/O and not all data was sent due to the socket buffer being filled. If specified, the number of bytes successfully sent will be returned in .Fa *sbytes . .It Bq Er EBADF The .Fa fd argument is not a valid file descriptor. .It Bq Er EBADF The .Fa s argument is not a valid socket descriptor. .It Bq Er EBUSY -Completing the entire transfer would have required disk I/O, so -it was aborted. -Partial data may have been sent. -(This error can only occur when +A busy page was encountered and .Dv SF_NODISKIO -is specified.) +had been specified. +Partial data may have been sent. .It Bq Er EFAULT An invalid address was specified for an argument. .It Bq Er EINTR A signal interrupted .Fn sendfile before it could be completed. If specified, the number of bytes successfully sent will be returned in .Fa *sbytes . .It Bq Er EINVAL The .Fa fd argument is not a regular file. .It Bq Er EINVAL The .Fa s argument is not a SOCK_STREAM type socket. .It Bq Er EINVAL The .Fa offset argument is negative. .It Bq Er EIO An error occurred while reading from .Fa fd . .It Bq Er ENOBUFS The system was unable to allocate an internal buffer. .It Bq Er ENOTCONN The .Fa s argument points to an unconnected socket. .It Bq Er ENOTSOCK The .Fa s argument is not a socket. .It Bq Er EOPNOTSUPP The file system for descriptor .Fa fd does not support .Fn sendfile . .It Bq Er EPIPE The socket peer has closed the connection. .El .Sh SEE ALSO .Xr netstat 1 , .Xr open 2 , .Xr send 2 , .Xr socket 2 , .Xr writev 2 , .Xr tuning 7 .Rs .%A K. Elmeleegy .%A A. Chanda .%A A. L. Cox .%A W. Zwaenepoel .%T A Portable Kernel Abstraction for Low-Overhead Ephemeral Mapping Management .%J The Proceedings of the 2005 USENIX Annual Technical Conference .%P pp 223-236 .%D 2005 .Re .Sh HISTORY The .Fn sendfile system call first appeared in .Fx 3.0 . This manual page first appeared in .Fx 3.1 . +In +.Fx 10 +support for sending shared memory descriptors had been introduced. +In +.Fx 11 +a non-blocking implementation had been introduced. .Sh AUTHORS -The +The initial implementation of .Fn sendfile system call and this manual page were written by .An David G. Lawrence Aq Mt dg@dglawrence.com . +The +.Fx 11 +implementation was written by +.An Gleb Smirnoff Aq Mt glebius@FreeBSD.org . Index: projects/clang380-import/lib/libc =================================================================== --- projects/clang380-import/lib/libc (revision 293686) +++ projects/clang380-import/lib/libc (revision 293687) Property changes on: projects/clang380-import/lib/libc ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/lib/libc:r293280-293685 Index: projects/clang380-import/lib/libstand/uuid_to_string.c =================================================================== --- projects/clang380-import/lib/libstand/uuid_to_string.c (revision 293686) +++ projects/clang380-import/lib/libstand/uuid_to_string.c (revision 293687) @@ -1,111 +1,111 @@ /*- * Copyright (c) 2015 M. Warner Losh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Note: some comments taken from lib/libc/uuid/uuid_to_string.c * Copyright (c) 2002,2005 Marcel Moolenaar * Copyright (c) 2002 Hiten Mahesh Pandya */ #include #include /* * Dump len characters into *buf from val as hex and update *buf */ static void tohex(char **buf, int len, uint32_t val) { static const char *hexstr = "0123456789abcdef"; char *walker = *buf; int i; - for (i = len - 1; i >= 0; i++) { + for (i = len - 1; i >= 0; i--) { walker[i] = hexstr[val & 0xf]; val >>= 4; } *buf = walker + len; } /* * uuid_to_string() - Convert a binary UUID into a string representation. * See also: * http://www.opengroup.org/onlinepubs/009629399/uuid_to_string.htm * * NOTE: The references given above do not have a status code for when * the string could not be allocated. The status code has been * taken from the Hewlett-Packard implementation. * * NOTE: we don't support u == NULL for a nil UUID, sorry. * * NOTE: The sequence field is in big-endian, while the time fields are in * native byte order. * * hhhhhhhh-hhhh-hhhh-bbbb-bbbbbbbbbbbb * 01234567-89ab-cdef-0123-456789abcdef */ void uuid_to_string(const uuid_t *u, char **s, uint32_t *status) { uuid_t nil; char *w; if (status != NULL) *status = uuid_s_ok; if (s == NULL) /* Regular version does this odd-ball behavior too */ return; w = *s = malloc(37); if (*s == NULL) { if (status != NULL) *status = uuid_s_no_memory; return; } if (u == NULL) { u = &nil; uuid_create_nil(&nil, NULL); } /* native */ tohex(&w, 8, u->time_low); *w++ = '-'; tohex(&w, 4, u->time_mid); *w++ = '-'; tohex(&w, 4, u->time_hi_and_version); *w++ = '-'; /* Big endian, so do a byte at a time */ tohex(&w, 2, u->clock_seq_hi_and_reserved); tohex(&w, 2, u->clock_seq_low); *w++ = '-'; tohex(&w, 2, u->node[0]); tohex(&w, 2, u->node[1]); tohex(&w, 2, u->node[2]); tohex(&w, 2, u->node[3]); tohex(&w, 2, u->node[4]); tohex(&w, 2, u->node[5]); - *w++ - '\0'; + *w++ = '\0'; } Index: projects/clang380-import/release/release.sh =================================================================== --- projects/clang380-import/release/release.sh (revision 293686) +++ projects/clang380-import/release/release.sh (revision 293687) @@ -1,406 +1,407 @@ #!/bin/sh #- # Copyright (c) 2013-2015 The FreeBSD Foundation # Copyright (c) 2013 Glen Barber # Copyright (c) 2011 Nathan Whitehorn # All rights reserved. # # Portions of this software were developed by Glen Barber # under sponsorship from the FreeBSD Foundation. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # release.sh: check out source trees, and build release components with # totally clean, fresh trees. # Based on release/generate-release.sh written by Nathan Whitehorn # # $FreeBSD$ # export PATH="/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin" VERSION=2 # Prototypes that can be redefined per-chroot or per-target. load_chroot_env() { } load_target_env() { } buildenv_setup() { } usage() { echo "Usage: $0 [-c release.conf]" exit 1 } # env_setup(): Set up the default build environment variables, such as the # CHROOTDIR, VCSCMD, SVNROOT, etc. This is called before the release.conf # file is sourced, if '-c ' is specified. env_setup() { # The directory within which the release will be built. CHROOTDIR="/scratch" RELENGDIR="$(dirname $(realpath ${0}))" # The default version control system command to obtain the sources. for _dir in /usr/bin /usr/local/bin; do for _svn in svn svnlite; do [ -x "${_dir}/${_svn}" ] && VCSCMD="${_dir}/${_svn}" [ ! -z "${VCSCMD}" ] && break 2 done done VCSCMD="${VCSCMD} checkout" # The default svn checkout server, and svn branches for src/, doc/, # and ports/. SVNROOT="svn://svn.FreeBSD.org/" SRCBRANCH="base/head@rHEAD" DOCBRANCH="doc/head@rHEAD" PORTBRANCH="ports/head@rHEAD" # Set for embedded device builds. EMBEDDEDBUILD= # Sometimes one needs to checkout src with --force svn option. # If custom kernel configs copied to src tree before checkout, e.g. SRC_FORCE_CHECKOUT= # The default make.conf and src.conf to use. Set to /dev/null # by default to avoid polluting the chroot(8) environment with # non-default settings. MAKE_CONF="/dev/null" SRC_CONF="/dev/null" # The number of make(1) jobs, defaults to the number of CPUs available # for buildworld, and half of number of CPUs available for buildkernel. WORLD_FLAGS="-j$(sysctl -n hw.ncpu)" KERNEL_FLAGS="-j$(( $(( $(sysctl -n hw.ncpu) + 1 )) / 2))" MAKE_FLAGS="-s" # The name of the kernel to build, defaults to GENERIC. KERNEL="GENERIC" # Set to non-empty value to disable checkout of doc/ and/or ports/. # Disabling ports/ checkout also forces NODOC to be set. NODOC= NOPORTS= # Set to non-empty value to build dvd1.iso as part of the release. WITH_DVD= WITH_COMPRESSED_IMAGES= # Set to non-empty value to build virtual machine images as part of # the release. WITH_VMIMAGES= WITH_COMPRESSED_VMIMAGES= XZ_THREADS=0 # Set to non-empty value to build virtual machine images for various # cloud providers as part of the release. WITH_CLOUDWARE= return 0 } # env_setup() # env_check(): Perform sanity tests on the build environment, such as ensuring # files/directories exist, as well as adding backwards-compatibility hacks if # necessary. This is called unconditionally, and overrides the defaults set # in env_setup() if '-c ' is specified. env_check() { chroot_build_release_cmd="chroot_build_release" # Fix for backwards-compatibility with release.conf that does not have # the trailing '/'. case ${SVNROOT} in *svn*) SVNROOT="${SVNROOT}/" ;; *) ;; esac # Prefix the branches with the SVNROOT for the full checkout URL. SRCBRANCH="${SVNROOT}${SRCBRANCH}" DOCBRANCH="${SVNROOT}${DOCBRANCH}" PORTBRANCH="${SVNROOT}${PORTBRANCH}" if [ -n "${EMBEDDEDBUILD}" ]; then WITH_DVD= WITH_COMPRESSED_IMAGES= NODOC=yes case ${EMBEDDED_TARGET}:${EMBEDDED_TARGET_ARCH} in arm:armv6) chroot_build_release_cmd="chroot_arm_armv6_build_release" ;; *) esac fi # If PORTS is set and NODOC is unset, force NODOC=yes because the ports # tree is required to build the documentation set. if [ -n "${NOPORTS}" ] && [ -z "${NODOC}" ]; then echo "*** NOTICE: Setting NODOC=1 since ports tree is required" echo " and NOPORTS is set." NODOC=yes fi # If NOPORTS and/or NODOC are unset, they must not pass to make as # variables. The release makefile verifies definedness of the # NOPORTS/NODOC variables instead of their values. DOCPORTS= if [ -n "${NOPORTS}" ]; then DOCPORTS="NOPORTS=yes " fi if [ -n "${NODOC}" ]; then DOCPORTS="${DOCPORTS}NODOC=yes" fi # The aggregated build-time flags based upon variables defined within # this file, unless overridden by release.conf. In most cases, these # will not need to be changed. CONF_FILES="__MAKE_CONF=${MAKE_CONF} SRCCONF=${SRC_CONF}" if [ -n "${TARGET}" ] && [ -n "${TARGET_ARCH}" ]; then ARCH_FLAGS="TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH}" else ARCH_FLAGS= fi # Force src checkout if configured FORCE_SRC_KEY= if [ -n "${SRC_FORCE_CHECKOUT}" ]; then FORCE_SRC_KEY="--force" fi if [ -z "${CHROOTDIR}" ]; then echo "Please set CHROOTDIR." exit 1 fi if [ $(id -u) -ne 0 ]; then echo "Needs to be run as root." exit 1 fi CHROOT_MAKEENV="${CHROOT_MAKEENV} \ MAKEOBJDIRPREFIX=${CHROOTDIR}/tmp/obj" CHROOT_WMAKEFLAGS="${MAKE_FLAGS} ${WORLD_FLAGS} ${CONF_FILES}" CHROOT_IMAKEFLAGS="${CONF_FILES}" CHROOT_DMAKEFLAGS="${CONF_FILES}" RELEASE_WMAKEFLAGS="${MAKE_FLAGS} ${WORLD_FLAGS} ${ARCH_FLAGS} \ ${CONF_FILES}" RELEASE_KMAKEFLAGS="${MAKE_FLAGS} ${KERNEL_FLAGS} \ KERNCONF=\"${KERNEL}\" ${ARCH_FLAGS} ${CONF_FILES}" RELEASE_RMAKEFLAGS="${ARCH_FLAGS} \ KERNCONF=\"${KERNEL}\" ${CONF_FILES} ${DOCPORTS} \ WITH_DVD=${WITH_DVD} WITH_VMIMAGES=${WITH_VMIMAGES} \ WITH_CLOUDWARE=${WITH_CLOUDWARE} XZ_THREADS=${XZ_THREADS}" return 0 } # env_check() # chroot_setup(): Prepare the build chroot environment for the release build. chroot_setup() { load_chroot_env mkdir -p ${CHROOTDIR}/usr if [ -z "${SRC_UPDATE_SKIP}" ]; then ${VCSCMD} ${FORCE_SRC_KEY} ${SRCBRANCH} ${CHROOTDIR}/usr/src fi if [ -z "${NODOC}" ] && [ -z "${DOC_UPDATE_SKIP}" ]; then ${VCSCMD} ${DOCBRANCH} ${CHROOTDIR}/usr/doc fi if [ -z "${NOPORTS}" ] && [ -z "${PORTS_UPDATE_SKIP}" ]; then ${VCSCMD} ${PORTBRANCH} ${CHROOTDIR}/usr/ports fi if [ -z "${CHROOTBUILD_SKIP}" ]; then cd ${CHROOTDIR}/usr/src env ${CHROOT_MAKEENV} make ${CHROOT_WMAKEFLAGS} buildworld env ${CHROOT_MAKEENV} make ${CHROOT_IMAKEFLAGS} installworld \ DESTDIR=${CHROOTDIR} env ${CHROOT_MAKEENV} make ${CHROOT_DMAKEFLAGS} distribution \ DESTDIR=${CHROOTDIR} fi return 0 } # chroot_setup() # extra_chroot_setup(): Prepare anything additional within the build # necessary for the release build. extra_chroot_setup() { mkdir -p ${CHROOTDIR}/dev mount -t devfs devfs ${CHROOTDIR}/dev [ -e /etc/resolv.conf ] && cp /etc/resolv.conf \ ${CHROOTDIR}/etc/resolv.conf # Run ldconfig(8) in the chroot directory so /var/run/ld-elf*.so.hints # is created. This is needed by ports-mgmt/pkg. eval chroot ${CHROOTDIR} /etc/rc.d/ldconfig forcerestart # If MAKE_CONF and/or SRC_CONF are set and not character devices # (/dev/null), copy them to the chroot. if [ -e ${MAKE_CONF} ] && [ ! -c ${MAKE_CONF} ]; then mkdir -p ${CHROOTDIR}/$(dirname ${MAKE_CONF}) cp ${MAKE_CONF} ${CHROOTDIR}/${MAKE_CONF} fi if [ -e ${SRC_CONF} ] && [ ! -c ${SRC_CONF} ]; then mkdir -p ${CHROOTDIR}/$(dirname ${SRC_CONF}) cp ${SRC_CONF} ${CHROOTDIR}/${SRC_CONF} fi if [ -d ${CHROOTDIR}/usr/ports ]; then # Trick the ports 'run-autotools-fixup' target to do the right # thing. _OSVERSION=$(chroot ${CHROOTDIR} /usr/bin/uname -U) REVISION=$(chroot ${CHROOTDIR} make -C /usr/src/release -V REVISION) BRANCH=$(chroot ${CHROOTDIR} make -C /usr/src/release -V BRANCH) UNAME_r=${REVISION}-${BRANCH} if [ -d ${CHROOTDIR}/usr/doc ] && [ -z "${NODOC}" ]; then PBUILD_FLAGS="OSVERSION=${_OSVERSION} BATCH=yes" PBUILD_FLAGS="${PBUILD_FLAGS} UNAME_r=${UNAME_r}" PBUILD_FLAGS="${PBUILD_FLAGS} OSREL=${REVISION}" chroot ${CHROOTDIR} make -C /usr/ports/textproc/docproj \ ${PBUILD_FLAGS} OPTIONS_UNSET="FOP IGOR" \ + FORCE_PKG_REGISTER=1 \ install clean distclean fi fi if [ ! -z "${EMBEDDEDPORTS}" ]; then for _PORT in ${EMBEDDEDPORTS}; do eval chroot ${CHROOTDIR} make -C /usr/ports/${_PORT} \ BATCH=1 FORCE_PKG_REGISTER=1 install clean distclean done fi buildenv_setup return 0 } # extra_chroot_setup() # chroot_build_target(): Build the userland and kernel for the build target. chroot_build_target() { load_target_env if [ ! -z "${EMBEDDEDBUILD}" ]; then RELEASE_WMAKEFLAGS="${RELEASE_WMAKEFLAGS} \ TARGET=${EMBEDDED_TARGET} \ TARGET_ARCH=${EMBEDDED_TARGET_ARCH}" RELEASE_KMAKEFLAGS="${RELEASE_KMAKEFLAGS} \ TARGET=${EMBEDDED_TARGET} \ TARGET_ARCH=${EMBEDDED_TARGET_ARCH}" fi eval chroot ${CHROOTDIR} make -C /usr/src ${RELEASE_WMAKEFLAGS} buildworld eval chroot ${CHROOTDIR} make -C /usr/src ${RELEASE_KMAKEFLAGS} buildkernel return 0 } # chroot_build_target # chroot_build_release(): Invoke the 'make release' target. chroot_build_release() { load_target_env if [ ! -z "${WITH_VMIMAGES}" ]; then if [ -z "${VMFORMATS}" ]; then VMFORMATS="$(eval chroot ${CHROOTDIR} \ make -C /usr/src/release -V VMFORMATS)" fi if [ -z "${VMSIZE}" ]; then VMSIZE="$(eval chroot ${CHROOTDIR} \ make -C /usr/src/release -V VMSIZE)" fi RELEASE_RMAKEFLAGS="${RELEASE_RMAKEFLAGS} \ VMFORMATS=\"${VMFORMATS}\" VMSIZE=${VMSIZE}" fi eval chroot ${CHROOTDIR} make -C /usr/src/release \ ${RELEASE_RMAKEFLAGS} release eval chroot ${CHROOTDIR} make -C /usr/src/release \ ${RELEASE_RMAKEFLAGS} install DESTDIR=/R \ WITH_COMPRESSED_IMAGES=${WITH_COMPRESSED_IMAGES} \ WITH_COMPRESSED_VMIMAGES=${WITH_COMPRESSED_VMIMAGES} return 0 } # chroot_build_release() # chroot_arm_armv6_build_release(): Create arm/armv6 SD card image. chroot_arm_armv6_build_release() { load_target_env eval chroot ${CHROOTDIR} make -C /usr/src/release obj if [ -e "${RELENGDIR}/tools/${EMBEDDED_TARGET}.subr" ]; then . "${RELENGDIR}/tools/${EMBEDDED_TARGET}.subr" fi [ ! -z "${RELEASECONF}" ] && . "${RELEASECONF}" WORLDDIR="$(eval chroot ${CHROOTDIR} make -C /usr/src/release -V WORLDDIR)" OBJDIR="$(eval chroot ${CHROOTDIR} make -C /usr/src/release -V .OBJDIR)" DESTDIR="${OBJDIR}/${KERNEL}" IMGBASE="${CHROOTDIR}/${OBJDIR}/${KERNEL}.img" OSRELEASE="$(eval chroot ${CHROOTDIR} make -C /usr/src/release \ TARGET=${EMBEDDED_TARGET} TARGET_ARCH=${EMBEDDED_TARGET_ARCH} \ -V OSRELEASE)" chroot ${CHROOTDIR} mkdir -p ${DESTDIR} chroot ${CHROOTDIR} truncate -s ${IMAGE_SIZE} ${IMGBASE##${CHROOTDIR}} export mddev=$(chroot ${CHROOTDIR} \ mdconfig -f ${IMGBASE##${CHROOTDIR}} ${MD_ARGS}) arm_create_disk arm_install_base arm_install_uboot mdconfig -d -u ${mddev} chroot ${CHROOTDIR} rmdir ${DESTDIR} mv ${IMGBASE} ${CHROOTDIR}/${OBJDIR}/${OSRELEASE}-${KERNEL}.img chroot ${CHROOTDIR} mkdir -p /R chroot ${CHROOTDIR} cp -p ${OBJDIR}/${OSRELEASE}-${KERNEL}.img \ /R/${OSRELEASE}-${KERNEL}.img chroot ${CHROOTDIR} xz -T ${XZ_THREADS} /R/${OSRELEASE}-${KERNEL}.img cd ${CHROOTDIR}/R && sha512 ${OSRELEASE}* \ > CHECKSUM.SHA512 cd ${CHROOTDIR}/R && sha256 ${OSRELEASE}* \ > CHECKSUM.SHA256 return 0 } # chroot_arm_armv6_build_release() # main(): Start here. main() { set -e # Everything must succeed env_setup while getopts c: opt; do case ${opt} in c) RELEASECONF="${OPTARG}" ;; \?) usage ;; esac done shift $(($OPTIND - 1)) if [ ! -z "${RELEASECONF}" ]; then if [ -e "${RELEASECONF}" ]; then . ${RELEASECONF} else echo "Nonexistent configuration file: ${RELEASECONF}" echo "Using default build environment." fi fi env_check trap "umount ${CHROOTDIR}/dev" EXIT # Clean up devfs mount on exit chroot_setup extra_chroot_setup chroot_build_target ${chroot_build_release_cmd} return 0 } # main() main "${@}" Index: projects/clang380-import/share/man/man4/ismt.4 =================================================================== --- projects/clang380-import/share/man/man4/ismt.4 (revision 293686) +++ projects/clang380-import/share/man/man4/ismt.4 (revision 293687) @@ -1,59 +1,59 @@ .\" .\" Copyright (c) 2014 Intel Corporation .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of Intel Corporation nor the names of its .\" contributors may be used to endorse or promote products derived from .\" this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" ismt driver man page. .\" .\" Author: Jim Harris .\" .\" $FreeBSD$ .\" -.Dd May 9, 2014 +.Dd January 11, 2016 .Dt ISMT 4 .Os .Sh NAME .Nm ismt .Nd Intel SMBus Message Transport (SMBus 2.0) driver .Sh SYNOPSIS .Cd device pci .Cd device smbus .Cd device smb .Cd device ismt .Sh DESCRIPTION This driver provides access to the SMBus 2.0 controller device contained in the Intel Atom S1200 and C2000 CPUs. .Sh SEE ALSO .Xr smb 4 , .Xr smbus 4 .Sh HISTORY The .Nm driver first appeared in -.Fx 11.0 . +.Fx 10.3 . .Sh AUTHORS .An Jim Harris Aq Mt jimharris@FreeBSD.org Index: projects/clang380-import/share/man/man4 =================================================================== --- projects/clang380-import/share/man/man4 (revision 293686) +++ projects/clang380-import/share/man/man4 (revision 293687) Property changes on: projects/clang380-import/share/man/man4 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share/man/man4:r293430-293685 Index: projects/clang380-import/share/man/man5/src.conf.5 =================================================================== --- projects/clang380-import/share/man/man5/src.conf.5 (revision 293686) +++ projects/clang380-import/share/man/man5/src.conf.5 (revision 293687) @@ -1,1506 +1,1527 @@ .\" DO NOT EDIT-- this file is automatically generated. .\" from FreeBSD: head/tools/build/options/makeman 292283 2015-12-15 18:42:30Z bdrewery .\" $FreeBSD$ -.Dd December 15, 2015 +.Dd January 9, 2016 .Dt SRC.CONF 5 .Os .Sh NAME .Nm src.conf .Nd "source build options" .Sh DESCRIPTION The .Nm file contains settings that will apply to every build involving the .Fx source tree; see .Xr build 7 . .Pp The .Nm file uses the standard makefile syntax. However, .Nm should not specify any dependencies to .Xr make 1 . Instead, .Nm is to set .Xr make 1 variables that control the aspects of how the system builds. .Pp The default location of .Nm is .Pa /etc/src.conf , though an alternative location can be specified in the .Xr make 1 variable .Va SRCCONF . Overriding the location of .Nm may be necessary if the system-wide settings are not suitable for a particular build. For instance, setting .Va SRCCONF to .Pa /dev/null effectively resets all build controls to their defaults. .Pp The only purpose of .Nm is to control the compilation of the .Fx source code, which is usually located in .Pa /usr/src . As a rule, the system administrator creates .Nm when the values of certain control variables need to be changed from their defaults. .Pp In addition, control variables can be specified for a particular build via the .Fl D option of .Xr make 1 or in its environment; see .Xr environ 7 . .Pp The environment of .Xr make 1 for the build can be controlled via the .Va SRC_ENV_CONF variable, which defaults to .Pa /etc/src-env.conf . Some examples that may only be set in this file are .Va WITH_DIRDEPS_BUILD , and .Va WITH_META_MODE as they are environment-only variables. Note that .Va MAKEOBJDIRPREFIX may be set here only when using .Va WITH_DIRDEPS_BUILD . .Pp The values of variables are ignored regardless of their setting; even if they would be set to .Dq Li FALSE or .Dq Li NO . Just the existence of an option will cause it to be honoured by .Xr make 1 . .Pp The following list provides a name and short description for variables that can be used for source builds. .Bl -tag -width indent .It Va WITHOUT_ACCT .\" from FreeBSD: head/tools/build/options/WITHOUT_ACCT 223201 2011-06-17 20:47:44Z ed Set to not build process accounting tools such as .Xr accton 8 and .Xr sa 8 . .It Va WITHOUT_ACPI .\" from FreeBSD: head/tools/build/options/WITHOUT_ACPI 156932 2006-03-21 07:50:50Z ru Set to not build .Xr acpiconf 8 , .Xr acpidump 8 and related programs. .It Va WITHOUT_AMD .\" from FreeBSD: head/tools/build/options/WITHOUT_AMD 183242 2008-09-21 22:02:26Z sam Set to not build .Xr amd 8 , and related programs. .It Va WITHOUT_APM .\" from FreeBSD: head/tools/build/options/WITHOUT_APM 183242 2008-09-21 22:02:26Z sam Set to not build .Xr apm 8 , .Xr apmd 8 and related programs. .It Va WITHOUT_ASSERT_DEBUG .\" from FreeBSD: head/tools/build/options/WITHOUT_ASSERT_DEBUG 162215 2006-09-11 13:55:27Z ru Set to compile programs and libraries without the .Xr assert 3 checks. .It Va WITHOUT_AT .\" from FreeBSD: head/tools/build/options/WITHOUT_AT 183242 2008-09-21 22:02:26Z sam Set to not build .Xr at 1 and related utilities. .It Va WITHOUT_ATM .\" from FreeBSD: head/tools/build/options/WITHOUT_ATM 156932 2006-03-21 07:50:50Z ru Set to not build programs and libraries related to ATM networking. .It Va WITHOUT_AUDIT .\" from FreeBSD: head/tools/build/options/WITHOUT_AUDIT 156932 2006-03-21 07:50:50Z ru Set to not build audit support into system programs. .It Va WITHOUT_AUTHPF .\" from FreeBSD: head/tools/build/options/WITHOUT_AUTHPF 156932 2006-03-21 07:50:50Z ru Set to not build .Xr authpf 8 . .It Va WITHOUT_AUTOFS .\" from FreeBSD: head/tools/build/options/WITHOUT_AUTOFS 277728 2015-01-26 07:15:49Z ngie Set to not build .Xr autofs 4 related programs, libraries, and kernel modules. .It Va WITH_AUTO_OBJ .\" from FreeBSD: head/tools/build/options/WITH_AUTO_OBJ 284708 2015-06-22 20:21:57Z sjg Enable automatic creation of objdirs. .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_BHYVE .\" from FreeBSD: head/tools/build/options/WITHOUT_BHYVE 277727 2015-01-26 06:44:48Z ngie Set to not build or install .Xr bhyve 8 , associated utilities, and examples. .Pp This option only affects amd64/amd64. .It Va WITHOUT_BINUTILS .\" from FreeBSD: head/tools/build/options/WITHOUT_BINUTILS 286332 2015-08-05 18:30:00Z emaste Set to not build or install binutils (as, ld, objcopy, and objdump ) as part of the normal system build. The resulting system cannot build programs from source. .Pp It is a default setting on arm64/aarch64. .It Va WITHOUT_BINUTILS_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_BINUTILS_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp Set to not build binutils (as, c++-filt, gconv, ld, nm, objcopy, objdump, readelf, size and strip) as part of the bootstrap process. .Bf -symbolic The option does not work for build targets unless some alternative toolchain is provided. .Ef .Pp It is a default setting on arm64/aarch64. .It Va WITHOUT_BLUETOOTH .\" from FreeBSD: head/tools/build/options/WITHOUT_BLUETOOTH 156932 2006-03-21 07:50:50Z ru Set to not build Bluetooth related kernel modules, programs and libraries. .It Va WITHOUT_BOOT .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOT 156932 2006-03-21 07:50:50Z ru Set to not build the boot blocks and loader. .It Va WITHOUT_BOOTPARAMD .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOTPARAMD 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr bootparamd 8 . .It Va WITHOUT_BOOTPD .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOTPD 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr bootpd 8 . .It Va WITHOUT_BSDINSTALL .\" from FreeBSD: head/tools/build/options/WITHOUT_BSDINSTALL 277677 2015-01-25 04:43:13Z ngie Set to not build .Xr bsdinstall 8 , .Xr sade 8 , and related programs. .It Va WITHOUT_BSD_CPIO .\" from FreeBSD: head/tools/build/options/WITHOUT_BSD_CPIO 179813 2008-06-16 05:48:15Z dougb Set to not build the BSD licensed version of cpio based on .Xr libarchive 3 . .It Va WITH_BSD_GREP .\" from FreeBSD: head/tools/build/options/WITH_BSD_GREP 222273 2011-05-25 01:04:12Z obrien Install BSD-licensed grep as '[ef]grep' instead of GNU grep. .It Va WITHOUT_BSNMP .\" from FreeBSD: head/tools/build/options/WITHOUT_BSNMP 183306 2008-09-23 16:15:42Z sam Set to not build or install .Xr bsnmpd 1 and related libraries and data files. .It Va WITHOUT_BZIP2 .\" from FreeBSD: head/tools/build/options/WITHOUT_BZIP2 174550 2007-12-12 16:43:17Z ru Set to not build contributed bzip2 software as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_BZIP2_SUPPORT .El .It Va WITHOUT_BZIP2_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_BZIP2_SUPPORT 166255 2007-01-26 10:19:08Z delphij Set to build some programs without optional bzip2 support. .It Va WITHOUT_CALENDAR .\" from FreeBSD: head/tools/build/options/WITHOUT_CALENDAR 156932 2006-03-21 07:50:50Z ru Set to not build .Xr calendar 1 . .It Va WITHOUT_CAPSICUM .\" from FreeBSD: head/tools/build/options/WITHOUT_CAPSICUM 229319 2012-01-02 21:57:58Z rwatson Set to not build Capsicum support into system programs. .It Va WITHOUT_CASPER .\" from FreeBSD: head/tools/build/options/WITHOUT_CASPER 258838 2013-12-02 08:21:28Z pjd Set to not build Casper program and related libraries. .It Va WITH_CCACHE_BUILD .\" from FreeBSD: head/tools/build/options/WITH_CCACHE_BUILD 290526 2015-11-08 00:50:18Z bdrewery Set to use .Xr ccache 1 for the build. No configuration is required except to install the .Sy devel/ccache package. Using with .Xr distcc 1 should set .Sy CCACHE_PREFIX=/usr/local/bin/distcc . The default cache directory of .Pa $HOME/.ccache will be used, which can be overridden by setting .Sy CCACHE_DIR . The .Sy CCACHE_COMPILERCHECK option defaults to .Sy content when using the in-tree bootstrap compiler, and .Sy mtime when using an external compiler. The .Sy CCACHE_CPP2 option is used for Clang but not GCC. ccache works best when combined with the .Sy WITH_FAST_DEPEND option. .Pp Sharing a cache between multiple work directories requires using a layout similar to .Pa /some/prefix/src .Pa /some/prefix/obj and an environment such as: .Bd -literal -offset indent CCACHE_BASEDIR='${SRCTOP:H}' MAKEOBJDIRPREFIX='${SRCTOP:H}/obj' .Ed .Pp See .Xr ccache 1 for more configuration options. .It Va WITHOUT_CCD .\" from FreeBSD: head/tools/build/options/WITHOUT_CCD 277678 2015-01-25 04:52:48Z ngie Set to not build .Xr geom_ccd 4 and related utilities. .It Va WITHOUT_CDDL .\" from FreeBSD: head/tools/build/options/WITHOUT_CDDL 163861 2006-11-01 09:02:11Z jb Set to not build code licensed under Sun's CDDL. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_CTF .It .Va WITHOUT_ZFS .El .It Va WITHOUT_CLANG .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG 264660 2014-04-18 17:03:58Z imp Set to not build the Clang C/C++ compiler during the regular phase of the build. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32 and sparc64/sparc64. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .El .It Va WITH_CLANG .\" from FreeBSD: head/tools/build/options/WITH_CLANG 264660 2014-04-18 17:03:58Z imp Set to build the Clang C/C++ compiler during the normal phase of the build. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386, pc98/i386, powerpc/powerpc and powerpc/powerpc64. .It Va WITHOUT_CLANG_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_BOOTSTRAP 273177 2014-10-16 18:28:11Z skreuzer Set to not build the Clang C/C++ compiler during the bootstrap phase of the build. You must enable either gcc or clang bootstrap to be able to build the system, unless an alternative compiler is provided via XCC. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_CLANG_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITH_CLANG_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp Set to build the Clang C/C++ compiler during the bootstrap phase of the build. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_CLANG_EXTRAS .\" from FreeBSD: head/tools/build/options/WITH_CLANG_EXTRAS 231057 2012-02-05 23:56:22Z dim Set to build additional clang and llvm tools, such as bugpoint. .It Va WITHOUT_CLANG_FULL .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_FULL 246259 2013-02-02 22:28:29Z dim Set to avoid building the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32 and sparc64/sparc64. .It Va WITH_CLANG_FULL .\" from FreeBSD: head/tools/build/options/WITH_CLANG_FULL 246259 2013-02-02 22:28:29Z dim Set to build the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386, pc98/i386, powerpc/powerpc and powerpc/powerpc64. .It Va WITHOUT_CLANG_IS_CC .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_IS_CC 242629 2012-11-05 21:53:23Z brooks Set to install the GCC compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_CLANG_IS_CC .\" from FreeBSD: head/tools/build/options/WITH_CLANG_IS_CC 235342 2012-05-12 16:12:36Z gjb Set to install the Clang C/C++ compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITHOUT_CPP .\" from FreeBSD: head/tools/build/options/WITHOUT_CPP 156932 2006-03-21 07:50:50Z ru Set to not build .Xr cpp 1 . .It Va WITHOUT_CROSS_COMPILER .\" from FreeBSD: head/tools/build/options/WITHOUT_CROSS_COMPILER 275138 2014-11-26 20:43:09Z gjb Set to not build any cross compiler in the cross-tools stage of buildworld. If you are compiling a different version of .Fx than what is installed on the system, you will need to provide an alternate compiler with XCC to ensure success. If you are compiling with an identical version of .Fx to the host, this option may be safely used. This option may also be safe when the host version of .Fx is close to the sources being built, but all bets are off if there have been any changes to the toolchain between the versions. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS_BOOTSTRAP .It .Va WITHOUT_CLANG_BOOTSTRAP .It .Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP .It .Va WITHOUT_GCC_BOOTSTRAP .El .It Va WITHOUT_CRYPT .\" from FreeBSD: head/tools/build/options/WITHOUT_CRYPT 156932 2006-03-21 07:50:50Z ru Set to not build any crypto code. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_OPENSSH .It .Va WITHOUT_OPENSSL .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITH_CTF .\" from FreeBSD: head/tools/build/options/WITH_CTF 228159 2011-11-30 18:22:44Z fjoe Set to compile with CTF (Compact C Type Format) data. CTF data encapsulates a reduced form of debugging information similar to DWARF and the venerable stabs and is required for DTrace. .It Va WITHOUT_CTM .\" from FreeBSD: head/tools/build/options/WITHOUT_CTM 183242 2008-09-21 22:02:26Z sam Set to not build .Xr ctm 1 and related utilities. .It Va WITHOUT_CUSE .\" from FreeBSD: head/tools/build/options/WITHOUT_CUSE 270171 2014-08-19 15:40:26Z hselasky Set to not build CUSE-related programs and libraries. .It Va WITHOUT_CXX .\" from FreeBSD: head/tools/build/options/WITHOUT_CXX 281053 2015-04-03 23:55:04Z bdrewery Set to not build .Xr c++ 1 and related libraries. It will also prevent building of .Xr gperf 1 and .Xr devd 8 . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_GNUCXX .It .Va WITHOUT_GROFF .El .It Va WITHOUT_DEBUG_FILES .\" from FreeBSD: head/tools/build/options/WITHOUT_DEBUG_FILES 290059 2015-10-27 20:49:56Z emaste Set to avoid building or installing standalone debug files for each executable binary and shared library. .It Va WITHOUT_DICT .\" from FreeBSD: head/tools/build/options/WITHOUT_DICT 156932 2006-03-21 07:50:50Z ru Set to not build the Webster dictionary files. .It Va WITH_DIRDEPS_BUILD .\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_BUILD 290816 2015-11-14 03:24:48Z sjg Enable building in meta mode. This is an experimental build feature. For details see http://www.crufty.net/sjg/docs/freebsd-meta-mode.htm. .Pp The build is driven by dirdeps.mk using .Va DIRDEPS stored in Makefile.depend files found in each directory. .Pp The build can be started from anywhere, and behaves the same. The initial instance of .Xr make 1 recursively reads .Va DIRDEPS from Makefile.depend computing a graph of tree dependencies from the current origin. Setting .Va NO_DIRDEPS will skip checking dirdep dependencies and will only build in the current directory. .Pp As each target is made .Xr make 1 produces a meta file which is used to capture (and compare) the command line, as well as any command output. If .Xr filemon 4 is available the meta file will also capture a record of files used to produce the target by tracking syscalls. .Pp The build will hide commands ran unless .Va NO_SILENT is defined. .Pp When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITH_INSTALL_AS_USER .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITH_AUTO_OBJ (unless .Va WITHOUT_AUTO_OBJ is set explicitly) .It Va WITH_META_MODE (unless .Va WITHOUT_META_MODE is set explicitly) .It Va WITH_STAGING (unless .Va WITHOUT_STAGING is set explicitly) .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .It Va WITH_SYSROOT (unless .Va WITHOUT_SYSROOT is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_DIRDEPS_CACHE .\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_CACHE 290816 2015-11-14 03:24:48Z sjg Cache result of dirdeps.mk which can save significant time for subsequent builds. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_DMAGENT .\" from FreeBSD: head/tools/build/options/WITHOUT_DMAGENT 262335 2014-02-22 13:05:23Z bapt Set to not build dma Mail Transport Agent .It Va WITHOUT_DOCCOMPRESS .\" from FreeBSD: head/tools/build/options/WITHOUT_DOCCOMPRESS 266752 2014-05-27 15:52:27Z gjb Set to not to install compressed system documentation. Only the uncompressed version will be installed. .It Va WITH_DTRACE_TESTS .\" from FreeBSD: head/tools/build/options/WITH_DTRACE_TESTS 286174 2015-08-02 00:37:33Z markj Set to build and install the DTrace test suite in .Pa /usr/tests/cddl/usr.sbin/dtrace . This test suite is considered experimental on architectures other than amd64/amd64 and running it may cause system instability. .It Va WITHOUT_DYNAMICROOT .\" from FreeBSD: head/tools/build/options/WITHOUT_DYNAMICROOT 156932 2006-03-21 07:50:50Z ru Set this if you do not want to link .Pa /bin and .Pa /sbin dynamically. .It Va WITHOUT_ED_CRYPTO .\" from FreeBSD: head/tools/build/options/WITHOUT_ED_CRYPTO 235660 2012-05-19 20:05:27Z marcel Set to build .Xr ed 1 without support for encryption/decryption. .It Va WITHOUT_EE .\" from FreeBSD: head/tools/build/options/WITHOUT_EE 277663 2015-01-25 00:03:44Z ngie Set to not build and install .Xr edit 1 , .Xr ee 1 , and related programs. .It Va WITH_EISA .\" from FreeBSD: head/tools/build/options/WITH_EISA 264654 2014-04-18 16:53:06Z imp Set to build EISA kernel modules. .It Va WITHOUT_ELFCOPY_AS_OBJCOPY .\" from FreeBSD: head/tools/build/options/WITHOUT_ELFCOPY_AS_OBJCOPY 286030 2015-07-29 18:45:38Z emaste Set to build and install .Xr objcopy 1 from GNU Binutils, instead of the one from ELF Tool Chain. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_ELFCOPY_AS_OBJCOPY .\" from FreeBSD: head/tools/build/options/WITH_ELFCOPY_AS_OBJCOPY 286030 2015-07-29 18:45:38Z emaste Set to build and install ELF Tool Chain's elfcopy as .Xr objcopy 1 , instead of the one from GNU Binutils. .Pp It is a default setting on arm64/aarch64. .It Va WITHOUT_EXAMPLES .\" from FreeBSD: head/tools/build/options/WITHOUT_EXAMPLES 156938 2006-03-21 09:06:24Z ru Set to avoid installing examples to .Pa /usr/share/examples/ . .It Va WITH_FAST_DEPEND .\" from FreeBSD: head/tools/build/options/WITH_FAST_DEPEND 290433 2015-11-06 04:45:29Z bdrewery Set to generate .Sy .depend files in the build during compilation instead of the historial .Xr mkdep 1 call during the "make depend" phase. .It Va WITHOUT_FDT .\" from FreeBSD: head/tools/build/options/WITHOUT_FDT 221539 2011-05-06 19:10:27Z ru Set to not build Flattened Device Tree support as part of the base system. This includes the device tree compiler (dtc) and libfdt support library. .It Va WITHOUT_FILE .\" from FreeBSD: head/tools/build/options/WITHOUT_FILE 278193 2015-02-04 10:24:40Z ngie Set to not build .Xr file 1 and related programs. .It Va WITHOUT_FINGER .\" from FreeBSD: head/tools/build/options/WITHOUT_FINGER 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr finger 1 and .Xr fingerd 8 . .It Va WITHOUT_FLOPPY .\" from FreeBSD: head/tools/build/options/WITHOUT_FLOPPY 221540 2011-05-06 19:13:03Z ru Set to not build or install programs for operating floppy disk driver. .It Va WITHOUT_FMTREE .\" from FreeBSD: head/tools/build/options/WITHOUT_FMTREE 261299 2014-01-30 21:37:43Z brooks Set to not build and install .Pa /usr/sbin/fmtree . .It Va WITHOUT_FORMAT_EXTENSIONS .\" from FreeBSD: head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS 250658 2013-05-15 13:04:10Z brooks Set to not enable .Fl fformat-extensions when compiling the kernel. Also disables all format checking. .It Va WITHOUT_FORTH .\" from FreeBSD: head/tools/build/options/WITHOUT_FORTH 156932 2006-03-21 07:50:50Z ru Set to build bootloaders without Forth support. .It Va WITHOUT_FP_LIBC .\" from FreeBSD: head/tools/build/options/WITHOUT_FP_LIBC 156932 2006-03-21 07:50:50Z ru Set to build .Nm libc without floating-point support. .It Va WITHOUT_FREEBSD_UPDATE .\" from FreeBSD: head/tools/build/options/WITHOUT_FREEBSD_UPDATE 183242 2008-09-21 22:02:26Z sam Set to not build .Xr freebsd-update 8 . .It Va WITHOUT_FTP .\" from FreeBSD: head/tools/build/options/WITHOUT_FTP 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr ftp 1 and .Xr ftpd 8 . .It Va WITHOUT_GAMES .\" from FreeBSD: head/tools/build/options/WITHOUT_GAMES 156932 2006-03-21 07:50:50Z ru Set to not build games. .It Va WITHOUT_GCC .\" from FreeBSD: head/tools/build/options/WITHOUT_GCC 264660 2014-04-18 17:03:58Z imp Set to not build and install gcc and g++ as part of the normal build process. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_GCC .\" from FreeBSD: head/tools/build/options/WITH_GCC 255326 2013-09-06 20:49:48Z zeising Set to build and install gcc and g++. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GCC_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_GCC_BOOTSTRAP 273177 2014-10-16 18:28:11Z skreuzer Set to not build gcc and g++ as part of the bootstrap process. You must enable either gcc or clang bootstrap to be able to build the system, unless an alternative compiler is provided via XCC. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_GCC_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITH_GCC_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp Set to build gcc and g++ as part of the bootstrap process. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GCOV .\" from FreeBSD: head/tools/build/options/WITHOUT_GCOV 156932 2006-03-21 07:50:50Z ru Set to not build the .Xr gcov 1 tool. .It Va WITHOUT_GDB .\" from FreeBSD: head/tools/build/options/WITHOUT_GDB 156932 2006-03-21 07:50:50Z ru Set to not build .Xr gdb 1 . .Pp It is a default setting on arm64/aarch64. .It Va WITHOUT_GNU .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU 174550 2007-12-12 16:43:17Z ru Set to not build contributed GNU software as a part of the base system. This option can be useful if the system built must not contain any code covered by the GNU Public License due to legal reasons. .Bf -symbolic The option has no effect yet. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_GNU_SUPPORT .El .It Va WITHOUT_GNUCXX .\" from FreeBSD: head/tools/build/options/WITHOUT_GNUCXX 255321 2013-09-06 20:08:03Z theraven Do not build the GNU C++ stack (g++, libstdc++). This is the default on platforms where clang is the system compiler. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_GNUCXX .\" from FreeBSD: head/tools/build/options/WITH_GNUCXX 255321 2013-09-06 20:08:03Z theraven Build the GNU C++ stack (g++, libstdc++). This is the default on platforms where gcc is the system compiler. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GNU_GREP_COMPAT .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU_GREP_COMPAT 273421 2014-10-21 20:44:33Z emaste Set this option to omit the gnu extensions to grep from being included in BSD grep. .It Va WITHOUT_GNU_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU_SUPPORT 156932 2006-03-21 07:50:50Z ru Set to build some programs without optional GNU support. .It Va WITHOUT_GPIO .\" from FreeBSD: head/tools/build/options/WITHOUT_GPIO 228081 2011-11-28 17:54:34Z dim Set to not build .Xr gpioctl 8 as part of the base system. .It Va WITHOUT_GPL_DTC .\" from FreeBSD: head/tools/build/options/WITHOUT_GPL_DTC 264515 2014-04-15 20:41:55Z imp Set to build the BSD licensed version of the device tree compiler, instead of the GPL'd one from elinux.org. .It Va WITHOUT_GROFF .\" from FreeBSD: head/tools/build/options/WITHOUT_GROFF 218941 2011-02-22 08:13:49Z uqs Set to not build .Xr groff 1 and .Xr vgrind 1 . You should consider installing the textproc/groff port to not break .Xr man 1 . .It Va WITHOUT_GSSAPI .\" from FreeBSD: head/tools/build/options/WITHOUT_GSSAPI 174548 2007-12-12 16:39:32Z ru Set to not build libgssapi. .It Va WITHOUT_HAST .\" from FreeBSD: head/tools/build/options/WITHOUT_HAST 277725 2015-01-26 06:27:07Z ngie Set to not build .Xr hastd 8 and related utilities. .It Va WITH_HESIOD .\" from FreeBSD: head/tools/build/options/WITH_HESIOD 156932 2006-03-21 07:50:50Z ru Set to build Hesiod support. .It Va WITHOUT_HTML .\" from FreeBSD: head/tools/build/options/WITHOUT_HTML 156932 2006-03-21 07:50:50Z ru Set to not build HTML docs. .It Va WITHOUT_HYPERV .\" from FreeBSD: head/tools/build/options/WITHOUT_HYPERV 271493 2014-09-13 02:15:31Z delphij Set to not build or install HyperV utilities. .It Va WITHOUT_ICONV .\" from FreeBSD: head/tools/build/options/WITHOUT_ICONV 254919 2013-08-26 17:15:56Z antoine Set to not build iconv as part of libc. .It Va WITHOUT_INCLUDES .\" from FreeBSD: head/tools/build/options/WITHOUT_INCLUDES 275138 2014-11-26 20:43:09Z gjb Set to not install header files. This option used to be spelled .Va NO_INCS . .Bf -symbolic The option does not work for build targets. .Ef .It Va WITHOUT_INET .\" from FreeBSD: head/tools/build/options/WITHOUT_INET 221266 2011-04-30 17:58:28Z bz Set to not build programs and libraries related to IPv4 networking. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_INET_SUPPORT .El .It Va WITHOUT_INET6 .\" from FreeBSD: head/tools/build/options/WITHOUT_INET6 156932 2006-03-21 07:50:50Z ru Set to not build programs and libraries related to IPv6 networking. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_INET6_SUPPORT .El .It Va WITHOUT_INET6_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_INET6_SUPPORT 156932 2006-03-21 07:50:50Z ru Set to build libraries, programs, and kernel modules without IPv6 support. .It Va WITHOUT_INETD .\" from FreeBSD: head/tools/build/options/WITHOUT_INETD 278192 2015-02-04 10:19:32Z ngie Set to not build .Xr inetd 8 . .It Va WITHOUT_INET_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_INET_SUPPORT 221266 2011-04-30 17:58:28Z bz Set to build libraries, programs, and kernel modules without IPv4 support. .It Va WITHOUT_INSTALLLIB .\" from FreeBSD: head/tools/build/options/WITHOUT_INSTALLLIB 174497 2007-12-09 21:56:21Z dougb Set this if you do not want to install optional libraries. For example when creating a .Xr nanobsd 8 image. .It Va WITH_INSTALL_AS_USER .\" from FreeBSD: head/tools/build/options/WITH_INSTALL_AS_USER 238021 2012-07-02 20:24:01Z marcel Set to make install targets succeed for non-root users by installing files with owner and group attributes set to that of the user running the .Xr make 1 command. The user still has to set the .Va DESTDIR variable to point to a directory where the user has write permissions. .It Va WITHOUT_IPFILTER .\" from FreeBSD: head/tools/build/options/WITHOUT_IPFILTER 156932 2006-03-21 07:50:50Z ru Set to not build IP Filter package. .It Va WITHOUT_IPFW .\" from FreeBSD: head/tools/build/options/WITHOUT_IPFW 183242 2008-09-21 22:02:26Z sam Set to not build IPFW tools. .It Va WITHOUT_ISCSI .\" from FreeBSD: head/tools/build/options/WITHOUT_ISCSI 277675 2015-01-25 04:20:11Z ngie Set to not build .Xr iscid 8 and related utilities. .It Va WITHOUT_JAIL .\" from FreeBSD: head/tools/build/options/WITHOUT_JAIL 249966 2013-04-27 04:09:09Z eadler Set to not build tools for the support of jails; e.g., .Xr jail 8 . .It Va WITHOUT_KDUMP .\" from FreeBSD: head/tools/build/options/WITHOUT_KDUMP 240690 2012-09-19 11:38:37Z zeising Set to not build .Xr kdump 1 and .Xr truss 1 . .It Va WITHOUT_KERBEROS .\" from FreeBSD: head/tools/build/options/WITHOUT_KERBEROS 174549 2007-12-12 16:42:03Z ru Set this if you do not want to build Kerberos 5 (KTH Heimdal). When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS_SUPPORT .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITHOUT_KERBEROS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_KERBEROS_SUPPORT 251794 2013-06-15 20:29:07Z eadler Set to build some programs without Kerberos support, like .Xr ssh 1 , .Xr telnet 1 , .Xr sshd 8 , and .Xr telnetd 8 . .It Va WITHOUT_KERNEL_SYMBOLS .\" from FreeBSD: head/tools/build/options/WITHOUT_KERNEL_SYMBOLS 222189 2011-05-22 18:23:17Z imp Set to not install kernel symbol files. .Bf -symbolic This option is recommended for those people who have small root partitions. .Ef .It Va WITHOUT_KVM .\" from FreeBSD: head/tools/build/options/WITHOUT_KVM 174550 2007-12-12 16:43:17Z ru Set to not build the .Nm libkvm library as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KVM_SUPPORT .El .It Va WITHOUT_KVM_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_KVM_SUPPORT 170644 2007-06-13 02:08:04Z sepotvin Set to build some programs without optional .Nm libkvm support. .It Va WITHOUT_LDNS .\" from FreeBSD: head/tools/build/options/WITHOUT_LDNS 255591 2013-09-15 13:11:13Z des Setting this variable will prevent the LDNS library from being built. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_LDNS_UTILS .It .Va WITHOUT_UNBOUND .El .It Va WITHOUT_LDNS_UTILS .\" from FreeBSD: head/tools/build/options/WITHOUT_LDNS_UTILS 255850 2013-09-24 14:33:31Z des Setting this variable will prevent building the LDNS utilities .Xr drill 1 and .Xr host 1 . .It Va WITHOUT_LEGACY_CONSOLE .\" from FreeBSD: head/tools/build/options/WITHOUT_LEGACY_CONSOLE 249966 2013-04-27 04:09:09Z eadler Set to not build programs that support a legacy PC console; e.g., .Xr kbdcontrol 8 and .Xr vidcontrol 8 . .It Va WITHOUT_LIB32 .\" from FreeBSD: head/tools/build/options/WITHOUT_LIB32 274664 2014-11-18 17:06:48Z imp On 64-bit platforms, set to not build 32-bit library set and a .Nm ld-elf32.so.1 runtime linker. .It Va WITHOUT_LIBCPLUSPLUS .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBCPLUSPLUS 246262 2013-02-02 22:42:46Z dim Set to avoid building libcxxrt and libc++. .It Va WITHOUT_LIBPTHREAD .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBPTHREAD 188848 2009-02-20 11:09:55Z mtm Set to not build the .Nm libpthread providing library, .Nm libthr . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_LIBTHR .El .It Va WITHOUT_LIBTHR .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBTHR 156932 2006-03-21 07:50:50Z ru Set to not build the .Nm libthr (1:1 threading) library. +.It Va WITHOUT_LLDB +.\" from FreeBSD: head/tools/build/options/WITHOUT_LLDB 289275 2015-10-14 00:23:31Z emaste +Set to not build the LLDB debugger. +.Pp +It is a default setting on +arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_LLDB .\" from FreeBSD: head/tools/build/options/WITH_LLDB 255722 2013-09-20 01:52:02Z emaste Set to build the LLDB debugger. +.Pp +It is a default setting on +amd64/amd64 and arm64/aarch64. +.It Va WITHOUT_LLVM_LIBUNWIND +.\" from FreeBSD: head/tools/build/options/WITHOUT_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste +Set to use GCC's stack unwinder (instead of LLVM's libunwind). +.Pp +It is a default setting on +amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. +.It Va WITH_LLVM_LIBUNWIND +.\" from FreeBSD: head/tools/build/options/WITH_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste +Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder). +.Pp +It is a default setting on +arm64/aarch64. .It Va WITHOUT_LOCALES .\" from FreeBSD: head/tools/build/options/WITHOUT_LOCALES 156932 2006-03-21 07:50:50Z ru Set to not build localization files; see .Xr locale 1 . .It Va WITHOUT_LOCATE .\" from FreeBSD: head/tools/build/options/WITHOUT_LOCATE 183242 2008-09-21 22:02:26Z sam Set to not build .Xr locate 1 and related programs. .It Va WITHOUT_LPR .\" from FreeBSD: head/tools/build/options/WITHOUT_LPR 156932 2006-03-21 07:50:50Z ru Set to not build .Xr lpr 1 and related programs. .It Va WITHOUT_LS_COLORS .\" from FreeBSD: head/tools/build/options/WITHOUT_LS_COLORS 235660 2012-05-19 20:05:27Z marcel Set to build .Xr ls 1 without support for colors to distinguish file types. .It Va WITHOUT_LZMA_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_LZMA_SUPPORT 245171 2013-01-08 18:37:12Z obrien Set to build some programs without optional lzma compression support. .It Va WITHOUT_MAIL .\" from FreeBSD: head/tools/build/options/WITHOUT_MAIL 183242 2008-09-21 22:02:26Z sam Set to not build any mail support (MUA or MTA). When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_DMAGENT .It .Va WITHOUT_MAILWRAPPER .It .Va WITHOUT_SENDMAIL .El .It Va WITHOUT_MAILWRAPPER .\" from FreeBSD: head/tools/build/options/WITHOUT_MAILWRAPPER 156932 2006-03-21 07:50:50Z ru Set to not build the .Xr mailwrapper 8 MTA selector. .It Va WITHOUT_MAKE .\" from FreeBSD: head/tools/build/options/WITHOUT_MAKE 183242 2008-09-21 22:02:26Z sam Set to not install .Xr make 1 and related support files. .It Va WITHOUT_MAN .\" from FreeBSD: head/tools/build/options/WITHOUT_MAN 156932 2006-03-21 07:50:50Z ru Set to not build manual pages. When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_MAN_UTILS (unless .Va WITH_MAN_UTILS is set explicitly) .El .It Va WITHOUT_MANCOMPRESS .\" from FreeBSD: head/tools/build/options/WITHOUT_MANCOMPRESS 266752 2014-05-27 15:52:27Z gjb Set to not to install compressed man pages. Only the uncompressed versions will be installed. .It Va WITHOUT_MANDOCDB .\" from FreeBSD: head/tools/build/options/WITHOUT_MANDOCDB 283777 2015-05-30 17:41:37Z bapt Use the .Xr mandoc 1 version of .Xr makewhatis 8 database and utilities. .It Va WITHOUT_MAN_UTILS .\" from FreeBSD: head/tools/build/options/WITHOUT_MAN_UTILS 208322 2010-05-20 00:07:21Z jkim Set to not build utilities for manual pages, .Xr apropos 1 , .Xr catman 1 , .Xr makewhatis 1 , .Xr man 1 , .Xr whatis 1 , .Xr manctl 8 , and related support files. .It Va WITH_META_MODE .\" from FreeBSD: head/tools/build/options/WITH_META_MODE 290816 2015-11-14 03:24:48Z sjg Create meta files when not doing DIRDEPS_BUILD. The meta files can be useful for debugging. .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_NAND .\" from FreeBSD: head/tools/build/options/WITH_NAND 235537 2012-05-17 10:11:18Z gber Set to build the NAND Flash components. .It Va WITHOUT_NDIS .\" from FreeBSD: head/tools/build/options/WITHOUT_NDIS 183242 2008-09-21 22:02:26Z sam Set to not build programs and libraries related to NDIS emulation support. .It Va WITHOUT_NETCAT .\" from FreeBSD: head/tools/build/options/WITHOUT_NETCAT 156932 2006-03-21 07:50:50Z ru Set to not build .Xr nc 1 utility. .It Va WITHOUT_NETGRAPH .\" from FreeBSD: head/tools/build/options/WITHOUT_NETGRAPH 183242 2008-09-21 22:02:26Z sam Set to not build applications to support .Xr netgraph 4 . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_ATM .It .Va WITHOUT_BLUETOOTH .It .Va WITHOUT_NETGRAPH_SUPPORT .El .It Va WITHOUT_NETGRAPH_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_NETGRAPH_SUPPORT 183305 2008-09-23 16:11:15Z sam Set to build libraries, programs, and kernel modules without netgraph support. .It Va WITHOUT_NIS .\" from FreeBSD: head/tools/build/options/WITHOUT_NIS 156932 2006-03-21 07:50:50Z ru Set to not build .Xr NIS 8 support and related programs. If set, you might need to adopt your .Xr nsswitch.conf 5 and remove .Sq nis entries. .It Va WITHOUT_NLS .\" from FreeBSD: head/tools/build/options/WITHOUT_NLS 156932 2006-03-21 07:50:50Z ru Set to not build NLS catalogs. .It Va WITHOUT_NLS_CATALOGS .\" from FreeBSD: head/tools/build/options/WITHOUT_NLS_CATALOGS 156932 2006-03-21 07:50:50Z ru Set to not build NLS catalog support for .Xr csh 1 . .It Va WITHOUT_NS_CACHING .\" from FreeBSD: head/tools/build/options/WITHOUT_NS_CACHING 172803 2007-10-19 14:01:25Z ru Set to disable name caching in the .Pa nsswitch subsystem. The generic caching daemon, .Xr nscd 8 , will not be built either if this option is set. .It Va WITHOUT_NTP .\" from FreeBSD: head/tools/build/options/WITHOUT_NTP 183242 2008-09-21 22:02:26Z sam Set to not build .Xr ntpd 8 and related programs. .It Va WITH_OFED .\" from FreeBSD: head/tools/build/options/WITH_OFED 228081 2011-11-28 17:54:34Z dim Set to build the .Dq "OpenFabrics Enterprise Distribution" Infiniband software stack. .It Va WITH_OPENLDAP .\" from FreeBSD: head/tools/build/options/WITH_OPENLDAP 264902 2014-04-24 23:17:31Z imp Enable building openldap support for kerberos. .It Va WITHOUT_OPENSSH .\" from FreeBSD: head/tools/build/options/WITHOUT_OPENSSH 156932 2006-03-21 07:50:50Z ru Set to not build OpenSSH. .It Va WITHOUT_OPENSSL .\" from FreeBSD: head/tools/build/options/WITHOUT_OPENSSL 156932 2006-03-21 07:50:50Z ru Set to not build OpenSSL. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_OPENSSH .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITHOUT_PAM .\" from FreeBSD: head/tools/build/options/WITHOUT_PAM 174550 2007-12-12 16:43:17Z ru Set to not build PAM library and modules. .Bf -symbolic This option is deprecated and does nothing. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_PAM_SUPPORT .El .It Va WITHOUT_PAM_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_PAM_SUPPORT 156932 2006-03-21 07:50:50Z ru Set to build some programs without PAM support, particularly .Xr ftpd 8 and .Xr ppp 8 . .It Va WITHOUT_PC_SYSINSTALL .\" from FreeBSD: head/tools/build/options/WITHOUT_PC_SYSINSTALL 245606 2013-01-18 15:57:09Z eadler Set to not build .Xr pc-sysinstall 8 and related programs. .It Va WITHOUT_PF .\" from FreeBSD: head/tools/build/options/WITHOUT_PF 156932 2006-03-21 07:50:50Z ru Set to not build PF firewall package. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_AUTHPF .El .It Va WITHOUT_PKGBOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_PKGBOOTSTRAP 258924 2013-12-04 15:58:42Z bdrewery Set to not build .Xr pkg 7 bootstrap tool. .It Va WITHOUT_PMC .\" from FreeBSD: head/tools/build/options/WITHOUT_PMC 183242 2008-09-21 22:02:26Z sam Set to not build .Xr pmccontrol 8 and related programs. .It Va WITHOUT_PORTSNAP .\" from FreeBSD: head/tools/build/options/WITHOUT_PORTSNAP 183242 2008-09-21 22:02:26Z sam Set to not build or install .Xr portsnap 8 and related files. .It Va WITHOUT_PPP .\" from FreeBSD: head/tools/build/options/WITHOUT_PPP 183242 2008-09-21 22:02:26Z sam Set to not build .Xr ppp 8 and related programs. .It Va WITHOUT_PROFILE .\" from FreeBSD: head/tools/build/options/WITHOUT_PROFILE 228196 2011-12-02 09:09:54Z fjoe Set to avoid compiling profiled libraries. .It Va WITHOUT_QUOTAS .\" from FreeBSD: head/tools/build/options/WITHOUT_QUOTAS 183242 2008-09-21 22:02:26Z sam Set to not build .Xr quota 8 and related programs. .It Va WITHOUT_RADIUS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_RADIUS_SUPPORT 278182 2015-02-04 06:53:45Z ngie Set to not build radius support into various applications, like .Xr pam_radius 8 and .Xr ppp 8 . .It Va WITHOUT_RBOOTD .\" from FreeBSD: head/tools/build/options/WITHOUT_RBOOTD 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr rbootd 8 . .It Va WITHOUT_RCMDS .\" from FreeBSD: head/tools/build/options/WITHOUT_RCMDS 156932 2006-03-21 07:50:50Z ru Disable building of the .Bx r-commands. This includes .Xr rlogin 1 , .Xr rsh 1 , etc. .It Va WITHOUT_RCS .\" from FreeBSD: head/tools/build/options/WITHOUT_RCS 275138 2014-11-26 20:43:09Z gjb Set to not build .Xr rcs 1 , .Xr etcupdate 8 , and related utilities. .It Va WITHOUT_RESCUE .\" from FreeBSD: head/tools/build/options/WITHOUT_RESCUE 156932 2006-03-21 07:50:50Z ru Set to not build .Xr rescue 8 . .It Va WITHOUT_ROUTED .\" from FreeBSD: head/tools/build/options/WITHOUT_ROUTED 183242 2008-09-21 22:02:26Z sam Set to not build .Xr routed 8 utility. .It Va WITHOUT_SENDMAIL .\" from FreeBSD: head/tools/build/options/WITHOUT_SENDMAIL 156932 2006-03-21 07:50:50Z ru Set to not build .Xr sendmail 8 and related programs. .It Va WITHOUT_SETUID_LOGIN .\" from FreeBSD: head/tools/build/options/WITHOUT_SETUID_LOGIN 156932 2006-03-21 07:50:50Z ru Set this to disable the installation of .Xr login 1 as a set-user-ID root program. .It Va WITHOUT_SHAREDOCS .\" from FreeBSD: head/tools/build/options/WITHOUT_SHAREDOCS 156932 2006-03-21 07:50:50Z ru Set to not build the .Bx 4.4 legacy docs. .It Va WITH_SHARED_TOOLCHAIN .\" from FreeBSD: head/tools/build/options/WITH_SHARED_TOOLCHAIN 235342 2012-05-12 16:12:36Z gjb Set to build the toolchain binaries shared. The set includes .Xr cc 1 , .Xr make 1 and necessary utilities like assembler, linker and library archive manager. .It Va WITH_SORT_THREADS .\" from FreeBSD: head/tools/build/options/WITH_SORT_THREADS 264158 2014-04-05 18:00:45Z imp Set to enable threads in .Xr sort 1 . .It Va WITHOUT_SOURCELESS .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS 230972 2012-02-04 00:54:43Z rmh Set to not build kernel modules that include sourceless code (either microcode or native code for host CPU). When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_SOURCELESS_HOST .It .Va WITHOUT_SOURCELESS_UCODE .El .It Va WITHOUT_SOURCELESS_HOST .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS_HOST 230972 2012-02-04 00:54:43Z rmh Set to not build kernel modules that include sourceless native code for host CPU. .It Va WITHOUT_SOURCELESS_UCODE .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS_UCODE 230972 2012-02-04 00:54:43Z rmh Set to not build kernel modules that include sourceless microcode. .It Va WITHOUT_SSP .\" from FreeBSD: head/tools/build/options/WITHOUT_SSP 180012 2008-06-25 21:33:28Z ru Set to not build world with propolice stack smashing protection. .It Va WITH_STAGING .\" from FreeBSD: head/tools/build/options/WITH_STAGING 290816 2015-11-14 03:24:48Z sjg Enable staging of files to a stage tree. This can be best thought of as auto-install to .Va DESTDIR with some extra meta data to ensure dependencies can be tracked. Depends on .Va WITH_DIRDEPS_BUILD . When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_STAGING_MAN .\" from FreeBSD: head/tools/build/options/WITH_STAGING_MAN 284708 2015-06-22 20:21:57Z sjg Enable staging of MAN pages to stage tree. .It Va WITH_STAGING_PROG .\" from FreeBSD: head/tools/build/options/WITH_STAGING_PROG 284708 2015-06-22 20:21:57Z sjg Enable staging of PROGs to stage tree. .It Va WITH_STALE_STAGED .\" from FreeBSD: head/tools/build/options/WITH_STALE_STAGED 284708 2015-06-22 20:21:57Z sjg Check staged files are not stale. .It Va WITH_SVN .\" from FreeBSD: head/tools/build/options/WITH_SVN 252561 2013-07-03 12:36:47Z zeising Set to install .Xr svnlite 1 as .Xr svn 1 . .It Va WITHOUT_SVNLITE .\" from FreeBSD: head/tools/build/options/WITHOUT_SVNLITE 252561 2013-07-03 12:36:47Z zeising Set to not build .Xr svnlite 1 and related programs. .It Va WITHOUT_SYMVER .\" from FreeBSD: head/tools/build/options/WITHOUT_SYMVER 169649 2007-05-17 05:03:24Z deischen Set to disable symbol versioning when building shared libraries. .It Va WITHOUT_SYSCONS .\" from FreeBSD: head/tools/build/options/WITHOUT_SYSCONS 156932 2006-03-21 07:50:50Z ru Set to not build .Xr syscons 4 support files such as keyboard maps, fonts, and screen output maps. .It Va WITH_SYSROOT .\" from FreeBSD: head/tools/build/options/WITH_SYSROOT 290816 2015-11-14 03:24:48Z sjg Enable use of sysroot during build. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_TALK .\" from FreeBSD: head/tools/build/options/WITHOUT_TALK 277676 2015-01-25 04:37:44Z ngie Set to not build or install .Xr talk 1 and .Xr talkd 8 . .It Va WITHOUT_TCP_WRAPPERS .\" from FreeBSD: head/tools/build/options/WITHOUT_TCP_WRAPPERS 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr tcpd 8 , and related utilities. .It Va WITHOUT_TCSH .\" from FreeBSD: head/tools/build/options/WITHOUT_TCSH 156932 2006-03-21 07:50:50Z ru Set to not build and install .Pa /bin/csh (which is .Xr tcsh 1 ) . .It Va WITHOUT_TELNET .\" from FreeBSD: head/tools/build/options/WITHOUT_TELNET 183242 2008-09-21 22:02:26Z sam Set to not build .Xr telnet 8 and related programs. .It Va WITHOUT_TESTS .\" from FreeBSD: head/tools/build/options/WITHOUT_TESTS 268778 2014-07-16 21:40:11Z jmmv Set to not build nor install the .Fx Test Suite in .Pa /usr/tests/ . See .Xr tests 7 for more details. This also disables the build of all test-related dependencies, including ATF. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_DTRACE_TESTS .It .Va WITHOUT_TESTS_SUPPORT .El .It Va WITHOUT_TESTS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_TESTS_SUPPORT 274665 2014-11-18 17:06:50Z imp Set to disables the build of all test-related dependencies, including ATF. .It Va WITHOUT_TEXTPROC .\" from FreeBSD: head/tools/build/options/WITHOUT_TEXTPROC 183242 2008-09-21 22:02:26Z sam Set to not build programs used for text processing. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_GROFF .El .It Va WITHOUT_TFTP .\" from FreeBSD: head/tools/build/options/WITHOUT_TFTP 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr tftp 1 and .Xr tftpd 8 . .It Va WITHOUT_TIMED .\" from FreeBSD: head/tools/build/options/WITHOUT_TIMED 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr timed 8 . .It Va WITHOUT_TOOLCHAIN .\" from FreeBSD: head/tools/build/options/WITHOUT_TOOLCHAIN 273172 2014-10-16 15:55:13Z brooks Set to not install header or programs used for program development, compilers, debuggers etc. .Bf -symbolic The option does not work for build targets. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_GCC .It .Va WITHOUT_GDB .It .Va WITHOUT_INCLUDES .El .It Va WITHOUT_UNBOUND .\" from FreeBSD: head/tools/build/options/WITHOUT_UNBOUND 255597 2013-09-15 14:51:23Z des Set to not build .Xr unbound 8 and related programs. .It Va WITHOUT_USB .\" from FreeBSD: head/tools/build/options/WITHOUT_USB 156932 2006-03-21 07:50:50Z ru Set to not build USB-related programs and libraries. .It Va WITHOUT_USB_GADGET_EXAMPLES .\" from FreeBSD: head/tools/build/options/WITHOUT_USB_GADGET_EXAMPLES 274665 2014-11-18 17:06:50Z imp Set to build USB gadget kernel modules. .It Va WITHOUT_UTMPX .\" from FreeBSD: head/tools/build/options/WITHOUT_UTMPX 231530 2012-02-11 20:28:42Z ed Set to not build user accounting tools such as .Xr last 1 , .Xr users 1 , .Xr who 1 , .Xr ac 8 , .Xr lastlogin 8 and .Xr utx 8 . .It Va WITHOUT_VI .\" from FreeBSD: head/tools/build/options/WITHOUT_VI 264903 2014-04-24 23:17:40Z imp Set to not build and install vi, view, ex and related programs. .It Va WITHOUT_VT .\" from FreeBSD: head/tools/build/options/WITHOUT_VT 268022 2014-06-30 00:20:12Z emaste Set to not build .Xr vt 4 support files (fonts and keymaps). .It Va WITHOUT_WARNS .\" from FreeBSD: head/tools/build/options/WITHOUT_WARNS 276559 2015-01-02 18:57:58Z bapt Set this to not add warning flags to the compiler invocations. Useful as a temporary workaround when code enters the tree which triggers warnings in environments that differ from the original developer. .It Va WITHOUT_WIRELESS .\" from FreeBSD: head/tools/build/options/WITHOUT_WIRELESS 183242 2008-09-21 22:02:26Z sam Set to not build programs used for 802.11 wireless networks; especially .Xr wpa_supplicant 8 and .Xr hostapd 8 . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_WIRELESS_SUPPORT .El .It Va WITHOUT_WIRELESS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_WIRELESS_SUPPORT 183305 2008-09-23 16:11:15Z sam Set to build libraries, programs, and kernel modules without 802.11 wireless support. .It Va WITHOUT_WPA_SUPPLICANT_EAPOL .\" from FreeBSD: head/tools/build/options/WITHOUT_WPA_SUPPLICANT_EAPOL 156932 2006-03-21 07:50:50Z ru Build .Xr wpa_supplicant 8 without support for the IEEE 802.1X protocol and without support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS protocols (usable only via 802.1X). .It Va WITHOUT_ZFS .\" from FreeBSD: head/tools/build/options/WITHOUT_ZFS 168409 2007-04-06 02:13:30Z pjd Set to not build ZFS file system. .It Va WITHOUT_ZONEINFO .\" from FreeBSD: head/tools/build/options/WITHOUT_ZONEINFO 235342 2012-05-12 16:12:36Z gjb Set to not build the timezone database. .El .Sh FILES .Bl -tag -compact -width Pa .It Pa /etc/src.conf .It Pa /etc/src-env.conf .It Pa /usr/share/mk/bsd.own.mk .El .Sh SEE ALSO .Xr make 1 , .Xr make.conf 5 , .Xr build 7 , .Xr ports 7 .Sh HISTORY The .Nm file appeared in .Fx 7.0 . .Sh AUTHORS This manual page was autogenerated. Index: projects/clang380-import/share/misc/committers-src.dot =================================================================== --- projects/clang380-import/share/misc/committers-src.dot (revision 293686) +++ projects/clang380-import/share/misc/committers-src.dot (revision 293687) @@ -1,755 +1,757 @@ # $FreeBSD$ # This file is meant to list all FreeBSD src committers and describe the # mentor-mentee relationships between them. # The graphical output can be generated from this file with the following # command: # $ dot -T png -o file.png committers-src.dot # # The dot binary is part of the graphics/graphviz port. digraph src { # Node definitions follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??"] # # ????/??/?? is the date when the commit bit was obtained, usually the one you # can find looking at svn logs for the svnadmin/access file. # Use YYYY/MM/DD format. # # For returned commit bits, the node definition will follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??\n????/??/??"] # # The first date is the same as for an active committer, the second date is # the date when the commit bit has been returned. Again, check svn logs. node [color=grey62, style=filled, bgcolor=black]; # Alumni go here.. Try to keep things sorted. alm [label="Andrew Moore\nalm@FreeBSD.org\n1993/06/12\n????/??/??"] anholt [label="Eric Anholt\nanholt@FreeBSD.org\n2002/04/22\n2008/08/07"] archie [label="Archie Cobbs\narchie@FreeBSD.org\n1998/11/06\n2006/06/09"] arr [label="Andrew R. Reiter\narr@FreeBSD.org\n2001/11/02\n2005/05/25"] arun [label="Arun Sharma\narun@FreeBSD.org\n2003/03/06\n2006/12/16"] asmodai [label="Jeroen Ruigrok\nasmodai@FreeBSD.org\n1999/12/16\n2001/11/16"] benjsc [label="Benjamin Close\nbenjsc@FreeBSD.org\n2007/02/09\n2010/09/15"] billf [label="Bill Fumerola\nbillf@FreeBSD.org\n1998/11/11\n2008/11/10"] bmah [label="Bruce A. Mah\nbmah@FreeBSD.org\n2002/01/29\n2009/09/13"] bmilekic [label="Bosko Milekic\nbmilekic@FreeBSD.org\n2000/09/21\n2008/11/10"] bushman [label="Michael Bushkov\nbushman@FreeBSD.org\n2007/03/10\n2010/04/29"] carl [label="Carl Delsey\ncarl@FreeBSD.org\n2013/01/14\n2014/03/06"] ceri [label="Ceri Davies\nceri@FreeBSD.org\n2006/11/07\n2012/03/07"] cjc [label="Crist J. Clark\ncjc@FreeBSD.org\n2001/06/01\n2006/12/29"] davidxu [label="David Xu\ndavidxu@FreeBSD.org\n2002/09/02\n2014/04/14"] dds [label="Diomidis Spinellis\ndds@FreeBSD.org\n2003/06/20\n2010/09/22"] dhartmei [label="Daniel Hartmeier\ndhartmei@FreeBSD.org\n2004/04/06\n2008/12/08"] dmlb [label="Duncan Barclay\ndmlb@FreeBSD.org\n2001/12/14\n2008/11/10"] dougb [label="Doug Barton\ndougb@FreeBSD.org\n2000/10/26\n2012/10/08"] eik [label="Oliver Eikemeier\neik@FreeBSD.org\n2004/05/20\n2008/11/10"] furuta [label="Atsushi Furuta\nfuruta@FreeBSD.org\n2000/06/21\n2003/03/08"] gj [label="Gary L. Jennejohn\ngj@FreeBSD.org\n1994/??/??\n2006/04/28"] groudier [label="Gerard Roudier\ngroudier@FreeBSD.org\n1999/12/30\n2006/04/06"] jake [label="Jake Burkholder\njake@FreeBSD.org\n2000/05/16\n2008/11/10"] jayanth [label="Jayanth Vijayaraghavan\njayanth@FreeBSD.org\n2000/05/08\n2008/11/10"] jb [label="John Birrell\njb@FreeBSD.org\n1997/03/27\n2009/12/15"] jdp [label="John Polstra\njdp@FreeBSD.org\n1995/12/07\n2008/02/26"] jedgar [label="Chris D. Faulhaber\njedgar@FreeBSD.org\n1999/12/15\n2006/04/07"] jkh [label="Jordan K. Hubbard\njkh@FreeBSD.org\n1993/06/12\n2008/06/13"] jlemon [label="Jonathan Lemon\njlemon@FreeBSD.org\n1997/08/14\n2008/11/10"] joe [label="Josef Karthauser\njoe@FreeBSD.org\n1999/10/22\n2008/08/10"] jtc [label="J.T. Conklin\njtc@FreeBSD.org\n1993/06/12\n????/??/??"] kargl [label="Steven G. Kargl\nkargl@FreeBSD.org\n2011/01/17\n2015/06/28"] kbyanc [label="Kelly Yancey\nkbyanc@FreeBSD.org\n2000/07/11\n2006/07/25"] keichii [label="Michael Wu\nkeichii@FreeBSD.org\n2001/03/07\n2006/04/28"] linimon [label="Mark Linimon\nlinimon@FreeBSD.org\n2006/09/30\n2008/05/04"] lulf [label="Ulf Lilleengen\nlulf@FreeBSD.org\n2007/10/24\n2012/01/19"] mb [label="Maxim Bolotin\nmb@FreeBSD.org\n2000/04/06\n2003/03/08"] marks [label="Mark Santcroos\nmarks@FreeBSD.org\n2004/03/18\n2008/09/29"] mike [label="Mike Barcroft\nmike@FreeBSD.org\n2001/07/17\n2006/04/28"] msmith [label="Mike Smith\nmsmith@FreeBSD.org\n1996/10/22\n2003/12/15"] murray [label="Murray Stokely\nmurray@FreeBSD.org\n2000/04/05\n2010/07/25"] mux [label="Maxime Henrion\nmux@FreeBSD.org\n2002/03/03\n2011/06/22"] nate [label="Nate Willams\nnate@FreeBSD.org\n1993/06/12\n2003/12/15"] njl [label="Nate Lawson\nnjl@FreeBSD.org\n2002/08/07\n2008/02/16"] non [label="Noriaki Mitsnaga\nnon@FreeBSD.org\n2000/06/19\n2007/03/06"] onoe [label="Atsushi Onoe\nonoe@FreeBSD.org\n2000/07/21\n2008/11/10"] rafan [label="Rong-En Fan\nrafan@FreeBSD.org\n2007/01/31\n2012/07/23"] randi [label="Randi Harper\nrandi@FreeBSD.org\n2010/04/20\n2012/05/10"] rgrimes [label="Rod Grimes\nrgrimes@FreeBSD.org\n1993/06/12\n2003/03/08"] rink [label="Rink Springer\nrink@FreeBSD.org\n2006/01/16\n2010/11/04"] robert [label="Robert Drehmel\nrobert@FreeBSD.org\n2001/08/23\n2006/05/13"] sah [label="Sam Hopkins\nsah@FreeBSD.org\n2004/12/15\n2008/11/10"] shafeeq [label="Shafeeq Sinnamohideen\nshafeeq@FreeBSD.org\n2000/06/19\n2006/04/06"] sheldonh [label="Sheldon Hearn\nsheldonh@FreeBSD.org\n1999/06/14\n2006/05/13"] shiba [label="Takeshi Shibagaki\nshiba@FreeBSD.org\n2000/06/19\n2008/11/10"] shin [label="Yoshinobu Inoue\nshin@FreeBSD.org\n1999/07/29\n2003/03/08"] snb [label="Nick Barkas\nsnb@FreeBSD.org\n2009/05/05\n2010/11/04"] tmm [label="Thomas Moestl\ntmm@FreeBSD.org\n2001/03/07\n2006/07/12"] toshi [label="Toshihiko Arai\ntoshi@FreeBSD.org\n2000/07/06\n2003/03/08"] tshiozak [label="Takuya SHIOZAKI\ntshiozak@FreeBSD.org\n2001/04/25\n2003/03/08"] uch [label="UCHIYAMA Yasushi\nuch@FreeBSD.org\n2000/06/21\n2002/04/24"] wilko [label="Wilko Bulte\nwilko@FreeBSD.org\n2000/01/13\n2013/01/17"] yar [label="Yar Tikhiy\nyar@FreeBSD.org\n2001/03/25\n2012/05/23"] zack [label="Zack Kirsch\nzack@FreeBSD.org\n2010/11/05\n2012/09/08"] node [color=lightblue2, style=filled, bgcolor=black]; # Current src committers go here. Try to keep things sorted. ache [label="Andrey Chernov\nache@FreeBSD.org\n1993/10/31"] achim [label="Achim Leubner\nachim@FreeBSD.org\n2013/01/23"] adrian [label="Adrian Chadd\nadrian@FreeBSD.org\n2000/07/03"] ae [label="Andrey V. Elsukov\nae@FreeBSD.org\n2010/06/03"] akiyama [label="Shunsuke Akiyama\nakiyama@FreeBSD.org\n2000/06/19"] alc [label="Alan Cox\nalc@FreeBSD.org\n1999/02/23"] allanjude [label="Allan Jude\nallanjude@FreeBSD.org\n2015/07/30"] ambrisko [label="Doug Ambrisko\nambrisko@FreeBSD.org\n2001/12/19"] anchie [label="Ana Kukec\nanchie@FreeBSD.org\n2010/04/14"] andre [label="Andre Oppermann\nandre@FreeBSD.org\n2003/11/12"] andreast [label="Andreas Tobler\nandreast@FreeBSD.org\n2010/09/05"] andrew [label="Andrew Turner\nandrew@FreeBSD.org\n2010/07/19"] antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2008/02/03"] araujo [label="Marcelo Araujo\naraujo@FreeBSD.org\n2015/08/04"] ariff [label="Ariff Abdullah\nariff@FreeBSD.org\n2005/11/14"] art [label="Artem Belevich\nart@FreeBSD.org\n2011/03/29"] arybchik [label="Andrew Rybchenko\narybchik@FreeBSD.org\n2014/10/12"] asomers [label="Alan Somers\nasomers@FreeBSD.org\n2013/04/24"] avg [label="Andriy Gapon\navg@FreeBSD.org\n2009/02/18"] avos [label="Andriy Voskoboinyk\navos@FreeBSD.org\n2015/09/24"] bapt [label="Baptiste Daroussin\nbapt@FreeBSD.org\n2011/12/23"] bdrewery [label="Bryan Drewery\nbdrewery@FreeBSD.org\n2013/12/14"] benl [label="Ben Laurie\nbenl@FreeBSD.org\n2011/05/18"] benno [label="Benno Rice\nbenno@FreeBSD.org\n2000/11/02"] bms [label="Bruce M Simpson\nbms@FreeBSD.org\n2003/08/06"] br [label="Ruslan Bukin\nbr@FreeBSD.org\n2013/09/02"] brian [label="Brian Somers\nbrian@FreeBSD.org\n1996/12/16"] brooks [label="Brooks Davis\nbrooks@FreeBSD.org\n2001/06/21"] brucec [label="Bruce Cran\nbrucec@FreeBSD.org\n2010/01/29"] brueffer [label="Christian Brueffer\nbrueffer@FreeBSD.org\n2006/02/28"] bruno [label="Bruno Ducrot\nbruno@FreeBSD.org\n2005/07/18"] bryanv [label="Bryan Venteicher\nbryanv@FreeBSD.org\n2012/11/03"] bschmidt [label="Bernhard Schmidt\nbschmidt@FreeBSD.org\n2010/02/06"] bz [label="Bjoern A. Zeeb\nbz@FreeBSD.org\n2004/07/27"] cem [label="Conrad Meyer\ncem@FreeBSD.org\n2015/07/05"] cognet [label="Olivier Houchard\ncognet@FreeBSD.org\n2002/10/09"] cokane [label="Coleman Kane\ncokane@FreeBSD.org\n2000/06/19"] cperciva [label="Colin Percival\ncperciva@FreeBSD.org\n2004/01/20"] csjp [label="Christian S.J. Peron\ncsjp@FreeBSD.org\n2004/05/04"] das [label="David Schultz\ndas@FreeBSD.org\n2003/02/21"] davide [label="Davide Italiano\ndavide@FreeBSD.org\n2012/01/27"] dchagin [label="Dmitry Chagin\ndchagin@FreeBSD.org\n2009/02/28"] delphij [label="Xin Li\ndelphij@FreeBSD.org\n2004/09/14"] des [label="Dag-Erling Smorgrav\ndes@FreeBSD.org\n1998/04/03"] dfr [label="Doug Rabson\ndfr@FreeBSD.org\n????/??/??"] dg [label="David Greenman\ndg@FreeBSD.org\n1993/06/14"] dim [label="Dimitry Andric\ndim@FreeBSD.org\n2010/08/30"] dteske [label="Devin Teske\ndteske@FreeBSD.org\n2012/04/10"] dumbbell [label="Jean-Sebastien Pedron\ndumbbell@FreeBSD.org\n2004/11/29"] dwmalone [label="David Malone\ndwmalone@FreeBSD.org\n2000/07/11"] eadler [label="Eitan Adler\neadler@FreeBSD.org\n2012/01/18"] ed [label="Ed Schouten\ned@FreeBSD.org\n2008/05/22"] edavis [label="Eric Davis\nedavis@FreeBSD.org\n2013/10/09"] edwin [label="Edwin Groothuis\nedwin@FreeBSD.org\n2007/06/25"] eivind [label="Eivind Eklund\neivind@FreeBSD.org\n1997/02/02"] emaste [label="Ed Maste\nemaste@FreeBSD.org\n2005/10/04"] emax [label="Maksim Yevmenkin\nemax@FreeBSD.org\n2003/10/12"] eri [label="Ermal Luci\neri@FreeBSD.org\n2008/06/11"] erj [label="Eric Joyner\nerj@FreeBSD.org\n2014/12/14"] fabient [label="Fabien Thomas\nfabient@FreeBSD.org\n2009/03/16"] fanf [label="Tony Finch\nfanf@FreeBSD.org\n2002/05/05"] fjoe [label="Max Khon\nfjoe@FreeBSD.org\n2001/08/06"] flz [label="Florent Thoumie\nflz@FreeBSD.org\n2006/03/30"] gabor [label="Gabor Kovesdan\ngabor@FreeBSD.org\n2010/02/02"] gad [label="Garance A. Drosehn\ngad@FreeBSD.org\n2000/10/27"] gallatin [label="Andrew Gallatin\ngallatin@FreeBSD.org\n1999/01/15"] gavin [label="Gavin Atkinson\ngavin@FreeBSD.org\n2009/12/07"] gibbs [label="Justin T. Gibbs\ngibbs@FreeBSD.org\n????/??/??"] gjb [label="Glen Barber\ngjb@FreeBSD.org\n2013/06/04"] gleb [label="Gleb Kurtsou\ngleb@FreeBSD.org\n2011/09/19"] glebius [label="Gleb Smirnoff\nglebius@FreeBSD.org\n2004/07/14"] gnn [label="George V. Neville-Neil\ngnn@FreeBSD.org\n2004/10/11"] gordon [label="Gordon Tetlow\ngordon@FreeBSD.org\n2002/05/17"] grehan [label="Peter Grehan\ngrehan@FreeBSD.org\n2002/08/08"] grog [label="Greg Lehey\ngrog@FreeBSD.org\n1998/08/30"] gshapiro [label="Gregory Shapiro\ngshapiro@FreeBSD.org\n2000/07/12"] harti [label="Hartmut Brandt\nharti@FreeBSD.org\n2003/01/29"] hiren [label="Hiren Panchasara\nhiren@FreeBSD.org\n2013/04/12"] hmp [label="Hiten Pandya\nhmp@FreeBSD.org\n2004/03/23"] ian [label="Ian Lepore\nian@FreeBSD.org\n2013/01/07"] iedowse [label="Ian Dowse\niedowse@FreeBSD.org\n2000/12/01"] imp [label="Warner Losh\nimp@FreeBSD.org\n1996/09/20"] ivoras [label="Ivan Voras\nivoras@FreeBSD.org\n2008/06/10"] jah [label="Jason A. Harmening\njah@FreeBSD.org\n2015/03/08"] jamie [label="Jamie Gritton\njamie@FreeBSD.org\n2009/01/28"] jasone [label="Jason Evans\njasone@FreeBSD.org\n1999/03/03"] jceel [label="Jakub Klama\njceel@FreeBSD.org\n2011/09/25"] jch [label="Julien Charbon\njch@FreeBSD.org\n2014/09/24"] jchandra [label="Jayachandran C.\njchandra@FreeBSD.org\n2010/05/19"] jeff [label="Jeff Roberson\njeff@FreeBSD.org\n2002/02/21"] jh [label="Jaakko Heinonen\njh@FreeBSD.org\n2009/10/02"] jhb [label="John Baldwin\njhb@FreeBSD.org\n1999/08/23"] jhibbits [label="Justin Hibbits\njhibbits@FreeBSD.org\n2011/11/30"] jilles [label="Jilles Tjoelker\njilles@FreeBSD.org\n2009/05/22"] jimharris [label="Jim Harris\njimharris@FreeBSD.org\n2011/12/09"] jinmei [label="JINMEI Tatuya\njinmei@FreeBSD.org\n2007/03/17"] jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2005/07/06"] jkoshy [label="A. Joseph Koshy\njkoshy@FreeBSD.org\n1998/05/13"] jlh [label="Jeremie Le Hen\njlh@FreeBSD.org\n2012/04/22"] jls [label="Jordan Sissel\njls@FreeBSD.org\n2006/12/06"] jmg [label="John-Mark Gurney\njmg@FreeBSD.org\n1997/02/13"] jmmv [label="Julio Merino\njmmv@FreeBSD.org\n2013/11/02"] joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1993/11/14"] jon [label="Jonathan Chen\njon@FreeBSD.org\n2000/10/17"] jonathan [label="Jonathan Anderson\njonathan@FreeBSD.org\n2010/10/07"] jpaetzel [label="Josh Paetzel\njpaetzel@FreeBSD.org\n2011/01/21"] jtl [label="Jonathan T. Looney\njtl@FreeBSD.org\n2015/10/26"] julian [label="Julian Elischer\njulian@FreeBSD.org\n1993/04/19"] jwd [label="John De Boskey\njwd@FreeBSD.org\n2000/05/19"] kaiw [label="Kai Wang\nkaiw@FreeBSD.org\n2007/09/26"] kan [label="Alexander Kabaev\nkan@FreeBSD.org\n2002/07/21"] ken [label="Ken Merry\nken@FreeBSD.org\n1998/09/08"] kensmith [label="Ken Smith\nkensmith@FreeBSD.org\n2004/01/23"] kevlo [label="Kevin Lo\nkevlo@FreeBSD.org\n2006/07/23"] kib [label="Konstantin Belousov\nkib@FreeBSD.org\n2006/06/03"] kmacy [label="Kip Macy\nkmacy@FreeBSD.org\n2005/06/01"] kp [label="Kristof Provost\nkp@FreeBSD.org\n2015/03/22"] le [label="Lukas Ertl\nle@FreeBSD.org\n2004/02/02"] lidl [label="Kurt Lidl\nlidl@FreeBSD.org\n2015/10/21"] loos [label="Luiz Otavio O Souza\nloos@FreeBSD.org\n2013/07/03"] lstewart [label="Lawrence Stewart\nlstewart@FreeBSD.org\n2008/10/06"] marcel [label="Marcel Moolenaar\nmarcel@FreeBSD.org\n1999/07/03"] marius [label="Marius Strobl\nmarius@FreeBSD.org\n2004/04/17"] markj [label="Mark Johnston\nmarkj@FreeBSD.org\n2012/12/18"] markm [label="Mark Murray\nmarkm@FreeBSD.org\n1995/04/24"] markus [label="Markus Brueffer\nmarkus@FreeBSD.org\n2006/06/01"] matteo [label="Matteo Riondato\nmatteo@FreeBSD.org\n2006/01/18"] mav [label="Alexander Motin\nmav@FreeBSD.org\n2007/04/12"] maxim [label="Maxim Konovalov\nmaxim@FreeBSD.org\n2002/02/07"] mdf [label="Matthew Fleming\nmdf@FreeBSD.org\n2010/06/04"] mdodd [label="Matthew N. Dodd\nmdodd@FreeBSD.org\n1999/07/27"] melifaro [label="Alexander V. Chernikov\nmelifaro@FreeBSD.org\n2011/10/04"] mjacob [label="Matt Jacob\nmjacob@FreeBSD.org\n1997/08/13"] mjg [label="Mateusz Guzik\nmjg@FreeBSD.org\n2012/06/04"] mlaier [label="Max Laier\nmlaier@FreeBSD.org\n2004/02/10"] mmel [label="Michal Meloun\nmmel@FreeBSD.org\n2015/11/01"] monthadar [label="Monthadar Al Jaberi\nmonthadar@FreeBSD.org\n2012/04/02"] mp [label="Mark Peek\nmp@FreeBSD.org\n2001/07/27"] mr [label="Michael Reifenberger\nmr@FreeBSD.org\n2001/09/30"] neel [label="Neel Natu\nneel@FreeBSD.org\n2009/09/20"] netchild [label="Alexander Leidinger\nnetchild@FreeBSD.org\n2005/03/31"] ngie [label="Garrett Cooper\nngie@FreeBSD.org\n2014/07/27"] nork [label="Norikatsu Shigemura\nnork@FreeBSD.org\n2009/06/09"] np [label="Navdeep Parhar\nnp@FreeBSD.org\n2009/06/05"] nwhitehorn [label="Nathan Whitehorn\nnwhitehorn@FreeBSD.org\n2008/07/03"] obrien [label="David E. O'Brien\nobrien@FreeBSD.org\n1996/10/29"] olli [label="Oliver Fromme\nolli@FreeBSD.org\n2008/02/14"] oshogbo [label="Mariusz Zaborski\noshogbo@FreeBSD.org\n2015/04/15"] peadar [label="Peter Edwards\npeadar@FreeBSD.org\n2004/03/08"] peter [label="Peter Wemm\npeter@FreeBSD.org\n1995/07/04"] peterj [label="Peter Jeremy\npeterj@FreeBSD.org\n2012/09/14"] pfg [label="Pedro Giffuni\npfg@FreeBSD.org\n2011/12/01"] philip [label="Philip Paeps\nphilip@FreeBSD.org\n2004/01/21"] phk [label="Poul-Henning Kamp\nphk@FreeBSD.org\n1994/02/21"] pho [label="Peter Holm\npho@FreeBSD.org\n2008/11/16"] pjd [label="Pawel Jakub Dawidek\npjd@FreeBSD.org\n2004/02/02"] pkelsey [label="Patrick Kelsey\pkelsey@FreeBSD.org\n2014/05/29"] pluknet [label="Sergey Kandaurov\npluknet@FreeBSD.org\n2010/10/05"] ps [label="Paul Saab\nps@FreeBSD.org\n2000/02/23"] qingli [label="Qing Li\nqingli@FreeBSD.org\n2005/04/13"] ray [label="Aleksandr Rybalko\nray@FreeBSD.org\n2011/05/25"] rdivacky [label="Roman Divacky\nrdivacky@FreeBSD.org\n2008/03/13"] remko [label="Remko Lodder\nremko@FreeBSD.org\n2007/02/23"] rik [label="Roman Kurakin\nrik@FreeBSD.org\n2003/12/18"] rmacklem [label="Rick Macklem\nrmacklem@FreeBSD.org\n2009/03/27"] rmh [label="Robert Millan\nrmh@FreeBSD.org\n2011/09/18"] rnoland [label="Robert Noland\nrnoland@FreeBSD.org\n2008/09/15"] roberto [label="Ollivier Robert\nroberto@FreeBSD.org\n1995/02/22"] rodrigc [label="Craig Rodrigues\nrodrigc@FreeBSD.org\n2005/05/14"] royger [label="Roger Pau Monne\nroyger@FreeBSD.org\n2013/11/26"] rpaulo [label="Rui Paulo\nrpaulo@FreeBSD.org\n2007/09/25"] rpokala [label="Ravi Pokala\nrpokala@FreeBSD.org\n2015/11/19"] rrs [label="Randall R Stewart\nrrs@FreeBSD.org\n2007/02/08"] rse [label="Ralf S. Engelschall\nrse@FreeBSD.org\n1997/07/31"] rstone [label="Ryan Stone\nrstone@FreeBSD.org\n2010/04/19"] ru [label="Ruslan Ermilov\nru@FreeBSD.org\n1999/05/27"] rwatson [label="Robert N. M. Watson\nrwatson@FreeBSD.org\n1999/12/16"] sam [label="Sam Leffler\nsam@FreeBSD.org\n2002/07/02"] sanpei [label="MIHIRA Sanpei Yoshiro\nsanpei@FreeBSD.org\n2000/06/19"] sbruno [label="Sean Bruno\nsbruno@FreeBSD.org\n2008/08/02"] scf [label="Sean C. Farley\nscf@FreeBSD.org\n2007/06/24"] schweikh [label="Jens Schweikhardt\nschweikh@FreeBSD.org\n2001/04/06"] scottl [label="Scott Long\nscottl@FreeBSD.org\n2000/09/28"] se [label="Stefan Esser\nse@FreeBSD.org\n1994/08/26"] sephe [label="Sepherosa Ziehau\nsephe@FreeBSD.org\n2007/03/28"] sepotvin [label="Stephane E. Potvin\nsepotvin@FreeBSD.org\n2007/02/15"] simon [label="Simon L. Nielsen\nsimon@FreeBSD.org\n2006/03/07"] sjg [label="Simon J. Gerraty\nsjg@FreeBSD.org\n2012/10/23"] skra [label="Svatopluk Kraus\nslm@FreeBSD.org\n2015/10/28"] slm [label="Stephen McConnell\nslm@FreeBSD.org\n2014/05/07"] smh [label="Steven Hartland\nsmh@FreeBSD.org\n2012/11/12"] sobomax [label="Maxim Sobolev\nsobomax@FreeBSD.org\n2001/07/25"] sos [label="Soren Schmidt\nsos@FreeBSD.org\n????/??/??"] sson [label="Stacey Son\nsson@FreeBSD.org\n2008/07/08"] stas [label="Stanislav Sedov\nstas@FreeBSD.org\n2008/08/22"] suz [label="SUZUKI Shinsuke\nsuz@FreeBSD.org\n2002/03/26"] syrinx [label="Shteryana Shopova\nsyrinx@FreeBSD.org\n2006/10/07"] takawata [label="Takanori Watanabe\ntakawata@FreeBSD.org\n2000/07/06"] theraven [label="David Chisnall\ntheraven@FreeBSD.org\n2011/11/11"] thompsa [label="Andrew Thompson\nthompsa@FreeBSD.org\n2005/05/25"] ticso [label="Bernd Walter\nticso@FreeBSD.org\n2002/01/31"] tijl [label="Tijl Coosemans\ntijl@FreeBSD.org\n2010/07/16"] trasz [label="Edward Tomasz Napierala\ntrasz@FreeBSD.org\n2008/08/22"] trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/05/28"] trociny [label="Mikolaj Golub\ntrociny@FreeBSD.org\n2011/03/10"] tuexen [label="Michael Tuexen\ntuexen@FreeBSD.org\n2009/06/06"] tychon [label="Tycho Nightingale\ntychon@FreeBSD.org\n2014/01/21"] ume [label="Hajimu UMEMOTO\nume@FreeBSD.org\n2000/02/26"] uqs [label="Ulrich Spoerlein\nuqs@FreeBSD.org\n2010/01/28"] vangyzen [label="Eric van Gyzen\nvangyzen@FreeBSD.org\n2015/03/08"] vanhu [label="Yvan Vanhullebus\nvanhu@FreeBSD.org\n2008/07/21"] versus [label="Konrad Jankowski\nversus@FreeBSD.org\n2008/10/27"] weongyo [label="Weongyo Jeong\nweongyo@FreeBSD.org\n2007/12/21"] wes [label="Wes Peters\nwes@FreeBSD.org\n1998/11/25"] whu [label="Wei Hu\nwhu@FreeBSD.org\n2015/02/11"] wkoszek [label="Wojciech A. Koszek\nwkoszek@FreeBSD.org\n2006/02/21"] wollman [label="Garrett Wollman\nwollman@FreeBSD.org\n????/??/??"] wsalamon [label="Wayne Salamon\nwsalamon@FreeBSD.org\n2005/06/25"] yongari [label="Pyun YongHyeon\nyongari@FreeBSD.org\n2004/08/01"] zbb [label="Zbigniew Bodek\nzbb@FreeBSD.org\n2013/09/02"] zec [label="Marko Zec\nzec@FreeBSD.org\n2008/06/22"] zml [label="Zachary Loafman\nzml@FreeBSD.org\n2009/05/27"] zont [label="Andrey Zonov\nzont@FreeBSD.org\n2012/08/21"] # Pseudo target representing rev 1.1 of commit.allow day1 [label="Birth of FreeBSD"] # Here are the mentor/mentee relationships. # Group together all the mentees for a particular mentor. # Keep the list sorted by mentor login. day1 -> jtc day1 -> jkh day1 -> nate day1 -> rgrimes day1 -> alm day1 -> dg adrian -> avos adrian -> lidl adrian -> loos adrian -> monthadar adrian -> ray adrian -> rmh +adrian -> sephe ae -> melifaro alc -> davide andre -> qingli anholt -> jkim avg -> art avg -> pluknet avg -> smh bapt -> allanjude bapt -> araujo bapt -> bdrewery benno -> grehan billf -> dougb billf -> gad billf -> jedgar billf -> jhb billf -> shafeeq bmilekic -> csjp bms -> dhartmei bms -> mlaier bms -> thompsa brian -> joe brooks -> bushman brooks -> jamie brooks -> theraven bz -> anchie bz -> jamie bz -> syrinx cognet -> br cognet -> jceel cognet -> kevlo cognet -> ian cognet -> wkoszek cognet -> zbb cperciva -> eadler cperciva -> flz cperciva -> randi cperciva -> simon csjp -> bushman das -> kargl das -> rodrigc delphij -> gabor delphij -> rafan +delphij -> sephe des -> anholt des -> hmp des -> mike des -> olli des -> ru des -> bapt dds -> versus dfr -> gallatin dfr -> zml dg -> peter dim -> theraven dwmalone -> fanf dwmalone -> peadar dwmalone -> snb ed -> dim ed -> gavin ed -> jilles ed -> rdivacky ed -> uqs eivind -> des eivind -> rwatson emaste -> achim emaste -> rstone emaste -> dteske emaste -> markj emax -> markus fjoe -> versus gallatin -> ticso gavin -> versus gibbs -> mjacob gibbs -> njl gibbs -> royger gibbs -> whu glebius -> mav gnn -> jinmei gnn -> rrs gnn -> ivoras gnn -> vanhu gnn -> lstewart gnn -> np gnn -> davide gnn -> arybchik gnn -> erj gnn -> kp gnn -> jtl grehan -> bryanv grog -> edwin grog -> le grog -> peterj imp -> akiyama imp -> ambrisko imp -> andrew imp -> bmah imp -> bruno imp -> dmlb imp -> emax imp -> furuta imp -> joe imp -> jon imp -> keichii imp -> mb imp -> mr imp -> neel imp -> non imp -> nork imp -> onoe imp -> remko imp -> rik imp -> rink imp -> sanpei imp -> shiba imp -> takawata imp -> toshi imp -> uch jake -> bms jake -> gordon jake -> harti jake -> jeff jake -> kmacy jake -> robert jake -> yongari jb -> sson jdp -> fjoe jfv -> erj jhb -> arr jhb -> avg jhb -> jch jhb -> jeff jhb -> kbyanc jhb -> peterj jhb -> pfg jhb -> rnoland jhb -> rpokala jimharris -> carl jkh -> dfr jkh -> gj jkh -> grog jkh -> imp jkh -> jlemon jkh -> joerg jkh -> jwd jkh -> msmith jkh -> murray jkh -> phk jkh -> wes jkh -> yar jkoshy -> kaiw jkoshy -> fabient jkoshy -> rstone jlemon -> bmilekic jlemon -> brooks jmallett -> pkelsey jmmv -> ngie joerg -> brian joerg -> eik joerg -> jmg joerg -> le joerg -> netchild joerg -> schweikh julian -> glebius julian -> davidxu julian -> archie julian -> adrian julian -> zec julian -> mp kan -> kib ken -> asomers ken -> slm kib -> ae kib -> dchagin kib -> gjb kib -> jah kib -> jlh kib -> jpaetzel kib -> lulf kib -> melifaro kib -> mmel kib -> pho kib -> pluknet kib -> rdivacky kib -> rmacklem kib -> rmh kib -> skra kib -> stas kib -> tijl kib -> trociny kib -> vangyzen kib -> zont kmacy -> lstewart marcel -> allanjude marcel -> art marcel -> arun marcel -> marius marcel -> nwhitehorn marcel -> sjg markj -> cem markm -> jasone markm -> sheldonh mav -> ae mdf -> gleb mdodd -> jake mike -> das mlaier -> benjsc mlaier -> dhartmei mlaier -> thompsa mlaier -> eri msmith -> cokane msmith -> jasone msmith -> scottl murray -> delphij mux -> cognet mux -> dumbbell netchild -> ariff njl -> marks njl -> philip njl -> rpaulo njl -> sepotvin nwhitehorn -> andreast nwhitehorn -> jhibbits obrien -> benno obrien -> groudier obrien -> gshapiro obrien -> kan obrien -> sam peter -> asmodai peter -> jayanth peter -> ps philip -> benl philip -> ed philip -> jls philip -> matteo philip -> uqs philip -> kp phk -> jkoshy phk -> mux pjd -> kib pjd -> lulf pjd -> oshogbo pjd -> smh pjd -> trociny rgrimes -> markm rmacklem -> jwd royger -> whu rpaulo -> avg rpaulo -> bschmidt rpaulo -> dim rpaulo -> jmmv rpaulo -> lidl rpaulo -> ngie rrs -> brucec rrs -> jchandra rrs -> tuexen rstone -> markj ru -> ceri ru -> cjc ru -> eik ru -> maxim ru -> sobomax rwatson -> adrian rwatson -> antoine rwatson -> bmah rwatson -> brueffer rwatson -> bz rwatson -> cperciva rwatson -> emaste rwatson -> gnn rwatson -> jh rwatson -> jonathan rwatson -> kensmith rwatson -> kmacy rwatson -> linimon rwatson -> rmacklem rwatson -> shafeeq rwatson -> tmm rwatson -> trasz rwatson -> trhodes rwatson -> wsalamon rodrigc -> araujo sam -> andre sam -> benjsc sam -> sephe sbruno -> hiren sbruno -> jimharris schweikh -> dds scottl -> achim scottl -> jimharris scottl -> pjd scottl -> sah scottl -> sbruno scottl -> slm scottl -> yongari sheldonh -> dwmalone sheldonh -> iedowse shin -> ume simon -> benl sos -> marcel thompsa -> weongyo thompsa -> eri trasz -> jh trasz -> mjg ume -> jinmei ume -> suz ume -> tshiozak wes -> scf wkoszek -> jceel wollman -> gad zml -> mdf zml -> zack } Index: projects/clang380-import/share/mk/src.opts.mk =================================================================== --- projects/clang380-import/share/mk/src.opts.mk (revision 293686) +++ projects/clang380-import/share/mk/src.opts.mk (revision 293687) @@ -1,417 +1,417 @@ # $FreeBSD$ # # Option file for FreeBSD /usr/src builds. # # Users define WITH_FOO and WITHOUT_FOO on the command line or in /etc/src.conf # and /etc/make.conf files. These translate in the build system to MK_FOO={yes,no} # with sensible (usually) defaults. # # Makefiles must include bsd.opts.mk after defining specific MK_FOO options that # are applicable for that Makefile (typically there are none, but sometimes there # are exceptions). Recursive makes usually add MK_FOO=no for options that they wish # to omit from that make. # # Makefiles must include bsd.mkopt.mk before they test the value of any MK_FOO # variable. # # Makefiles may also assume that this file is included by src.opts.mk should it # need variables defined there prior to the end of the Makefile where # bsd.{subdir,lib.bin}.mk is traditionally included. # # The old-style YES_FOO and NO_FOO are being phased out. No new instances of them # should be added. Old instances should be removed since they were just to # bridge the gap between FreeBSD 4 and FreeBSD 5. # # Makefiles should never test WITH_FOO or WITHOUT_FOO directly (although an # exception is made for _WITHOUT_SRCONF which turns off this mechanism # completely inside bsd.*.mk files). # .if !target(____) ____: .include # # Define MK_* variables (which are either "yes" or "no") for users # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the # make(1) environment. # These should be tested with `== "no"' or `!= "no"' in makefiles. # The NO_* variables should only be set by makefiles for variables # that haven't been converted over. # # These options are used by src the builds __DEFAULT_YES_OPTIONS = \ ACCT \ ACPI \ AMD \ APM \ AT \ ATM \ AUDIT \ AUTHPF \ AUTOFS \ BHYVE \ BINUTILS \ BINUTILS_BOOTSTRAP \ BLUETOOTH \ BOOT \ BOOTPARAMD \ BOOTPD \ BSD_CPIO \ BSDINSTALL \ BSNMP \ BZIP2 \ CALENDAR \ CAPSICUM \ CASPER \ CCD \ CDDL \ CPP \ CROSS_COMPILER \ CRYPT \ CTM \ CUSE \ CXX \ DICT \ DMAGENT \ DYNAMICROOT \ ED_CRYPTO \ EE \ ELFTOOLCHAIN_BOOTSTRAP \ EXAMPLES \ FDT \ FILE \ FINGER \ FLOPPY \ FMTREE \ FORTH \ FP_LIBC \ FREEBSD_UPDATE \ FTP \ GAMES \ GCOV \ GDB \ GNU \ GNU_GREP_COMPAT \ GPIO \ GPL_DTC \ GROFF \ HAST \ HTML \ HYPERV \ ICONV \ INET \ INET6 \ INETD \ IPFILTER \ IPFW \ ISCSI \ JAIL \ KDUMP \ KVM \ LDNS \ LDNS_UTILS \ LEGACY_CONSOLE \ LIB32 \ LIBPTHREAD \ LIBTHR \ LOCALES \ LOCATE \ LPR \ LS_COLORS \ LZMA_SUPPORT \ MAIL \ MAILWRAPPER \ MAKE \ MANDOCDB \ NDIS \ NETCAT \ NETGRAPH \ NLS_CATALOGS \ NS_CACHING \ NTP \ OPENSSL \ PAM \ PC_SYSINSTALL \ PF \ PKGBOOTSTRAP \ PMC \ PORTSNAP \ PPP \ QUOTAS \ RADIUS_SUPPORT \ RCMDS \ RBOOTD \ RCS \ RESCUE \ ROUTED \ SENDMAIL \ SETUID_LOGIN \ SHAREDOCS \ SOURCELESS \ SOURCELESS_HOST \ SOURCELESS_UCODE \ SVNLITE \ SYSCONS \ TALK \ TCP_WRAPPERS \ TCSH \ TELNET \ TESTS \ TEXTPROC \ TFTP \ TIMED \ UNBOUND \ USB \ UTMPX \ VI \ VT \ WIRELESS \ WPA_SUPPLICANT_EAPOL \ ZFS \ ZONEINFO __DEFAULT_NO_OPTIONS = \ BSD_GREP \ CLANG_EXTRAS \ DTRACE_TESTS \ EISA \ HESIOD \ LIBSOFT \ NAND \ OFED \ OPENLDAP \ SHARED_TOOLCHAIN \ SORT_THREADS \ SVN # # Default behaviour of some options depends on the architecture. Unfortunately # this means that we have to test TARGET_ARCH (the buildworld case) as well # as MACHINE_ARCH (the non-buildworld case). Normally TARGET_ARCH is not # used at all in bsd.*.mk, but we have to make an exception here if we want # to allow defaults for some things like clang to vary by target architecture. # Additional, per-target behavior should be rarely added only after much # gnashing of teeth and grinding of gears. # .if defined(TARGET_ARCH) __T=${TARGET_ARCH} .else __T=${MACHINE_ARCH} .endif .if defined(TARGET) __TT=${TARGET} .else __TT=${MACHINE} .endif .include # If the compiler is not C++11 capable, disable Clang and use GCC instead. # This means that architectures that have GCC 4.2 as default can not # build Clang without using an external compiler. .if ${COMPILER_FEATURES:Mc++11} && (${__T} == "aarch64" || \ ${__T} == "amd64" || ${__TT} == "arm" || ${__T} == "i386") # Clang is enabled, and will be installed as the default /usr/bin/cc. __DEFAULT_YES_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_FULL CLANG_IS_CC __DEFAULT_NO_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX .elif ${COMPILER_FEATURES:Mc++11} && ${__T:Mpowerpc*} # On powerpc, if an external compiler that supports C++11 is used as ${CC}, # then Clang is enabled, but GCC is installed as the default /usr/bin/cc. __DEFAULT_YES_OPTIONS+=CLANG CLANG_FULL GCC GCC_BOOTSTRAP GNUCXX __DEFAULT_NO_OPTIONS+=CLANG_BOOTSTRAP CLANG_IS_CC .else # Everything else disables Clang, and uses GCC instead. __DEFAULT_YES_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX __DEFAULT_NO_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_FULL CLANG_IS_CC .endif # In-tree binutils/gcc are older versions without modern architecture support. .if ${__T} == "aarch64" || ${__T} == "riscv64" BROKEN_OPTIONS+=BINUTILS BINUTILS_BOOTSTRAP GCC GCC_BOOTSTRAP GDB -__DEFAULT_YES_OPTIONS+=ELFCOPY_AS_OBJCOPY +__DEFAULT_YES_OPTIONS+=ELFCOPY_AS_OBJCOPY LLVM_LIBUNWIND .else -__DEFAULT_NO_OPTIONS+=ELFCOPY_AS_OBJCOPY +__DEFAULT_NO_OPTIONS+=ELFCOPY_AS_OBJCOPY LLVM_LIBUNWIND .endif .if ${__T} == "riscv64" BROKEN_OPTIONS+=PROFILE # "sorry, unimplemented: profiler support for RISC-V" BROKEN_OPTIONS+=TESTS # "undefined reference to `_Unwind_Resume'" BROKEN_OPTIONS+=CXX # "libcxxrt.so: undefined reference to `_Unwind_Resume_or_Rethrow'" .endif .if ${__T} == "aarch64" || ${__T} == "amd64" __DEFAULT_YES_OPTIONS+=LLDB .else __DEFAULT_NO_OPTIONS+=LLDB .endif # LLVM lacks support for FreeBSD 64-bit atomic operations for ARMv4/ARMv5 .if ${__T} == "arm" || ${__T} == "armeb" BROKEN_OPTIONS+=LLDB .endif # Only doing soft float API stuff on armv6 .if ${__T} != "armv6" BROKEN_OPTIONS+=LIBSOFT .endif .include # # MK_* options that default to "yes" if the compiler is a C++11 compiler. # .for var in \ LIBCPLUSPLUS .if !defined(MK_${var}) .if ${COMPILER_FEATURES:Mc++11} .if defined(WITHOUT_${var}) MK_${var}:= no .else MK_${var}:= yes .endif .else .if defined(WITH_${var}) MK_${var}:= yes .else MK_${var}:= no .endif .endif .endif .endfor # # Force some options off if their dependencies are off. # Order is somewhat important. # .if ${MK_LIBPTHREAD} == "no" MK_LIBTHR:= no .endif .if ${MK_LDNS} == "no" MK_LDNS_UTILS:= no MK_UNBOUND:= no .endif .if ${MK_SOURCELESS} == "no" MK_SOURCELESS_HOST:= no MK_SOURCELESS_UCODE:= no .endif .if ${MK_CDDL} == "no" MK_ZFS:= no MK_CTF:= no .endif .if ${MK_CRYPT} == "no" MK_OPENSSL:= no MK_OPENSSH:= no MK_KERBEROS:= no .endif .if ${MK_CXX} == "no" MK_CLANG:= no MK_GROFF:= no MK_GNUCXX:= no .endif .if ${MK_MAIL} == "no" MK_MAILWRAPPER:= no MK_SENDMAIL:= no MK_DMAGENT:= no .endif .if ${MK_NETGRAPH} == "no" MK_ATM:= no MK_BLUETOOTH:= no .endif .if ${MK_OPENSSL} == "no" MK_OPENSSH:= no MK_KERBEROS:= no .endif .if ${MK_PF} == "no" MK_AUTHPF:= no .endif .if ${MK_TESTS} == "no" MK_DTRACE_TESTS:= no .endif .if ${MK_TEXTPROC} == "no" MK_GROFF:= no .endif .if ${MK_CROSS_COMPILER} == "no" MK_BINUTILS_BOOTSTRAP:= no MK_CLANG_BOOTSTRAP:= no MK_ELFTOOLCHAIN_BOOTSTRAP:= no MK_GCC_BOOTSTRAP:= no .endif .if ${MK_TOOLCHAIN} == "no" MK_BINUTILS:= no MK_CLANG:= no MK_GCC:= no MK_GDB:= no MK_INCLUDES:= no .endif .if ${MK_CLANG} == "no" MK_CLANG_EXTRAS:= no MK_CLANG_FULL:= no .endif # # Set defaults for the MK_*_SUPPORT variables. # # # MK_*_SUPPORT options which default to "yes" unless their corresponding # MK_* variable is set to "no". # .for var in \ BZIP2 \ GNU \ INET \ INET6 \ KERBEROS \ KVM \ NETGRAPH \ PAM \ TESTS \ WIRELESS .if defined(WITHOUT_${var}_SUPPORT) || ${MK_${var}} == "no" MK_${var}_SUPPORT:= no .else MK_${var}_SUPPORT:= yes .endif .endfor # # MK_* options whose default value depends on another option. # .for vv in \ GSSAPI/KERBEROS \ MAN_UTILS/MAN .if defined(WITH_${vv:H}) MK_${vv:H}:= yes .elif defined(WITHOUT_${vv:H}) MK_${vv:H}:= no .else MK_${vv:H}:= ${MK_${vv:T}} .endif .endfor .if !${COMPILER_FEATURES:Mc++11} MK_LLDB:= no .endif # gcc 4.8 and newer supports libc++, so suppress gnuc++ in that case. # while in theory we could build it with that, we don't want to do # that since it creates too much confusion for too little gain. .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 40800 MK_GNUCXX:=no MK_GCC:=no .endif .endif # !target(____) Index: projects/clang380-import/share =================================================================== --- projects/clang380-import/share (revision 293686) +++ projects/clang380-import/share (revision 293687) Property changes on: projects/clang380-import/share ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share:r293430-293685 Index: projects/clang380-import/sys/amd64/amd64/elf_machdep.c =================================================================== --- projects/clang380-import/sys/amd64/amd64/elf_machdep.c (revision 293686) +++ projects/clang380-import/sys/amd64/amd64/elf_machdep.c (revision 293687) @@ -1,293 +1,294 @@ /*- * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); static Elf64_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld-kfreebsd-x86-64.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &kfreebsd_brand_info); void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf64_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf64_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf64_Addr *where, val; Elf32_Addr *where32, val32; Elf_Addr addr; Elf_Addr addend; Elf_Size rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); /* Addend is 32 bit on 32 bit relocs */ switch (rtype) { case R_X86_64_PC32: case R_X86_64_32S: addend = *(Elf32_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } switch (rtype) { case R_X86_64_NONE: /* none */ break; case R_X86_64_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); val = addr + addend; if (error != 0) return -1; if (*where != val) *where = val; break; case R_X86_64_PC32: /* S + A - P */ error = lookup(lf, symidx, 1, &addr); where32 = (Elf32_Addr *)where; val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where); if (error != 0) return -1; if (*where32 != val32) *where32 = val32; break; case R_X86_64_32S: /* S + A sign extend */ error = lookup(lf, symidx, 1, &addr); val32 = (Elf32_Addr)(addr + addend); where32 = (Elf32_Addr *)where; if (error != 0) return -1; if (*where32 != val32) *where32 = val32; break; case R_X86_64_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_X86_64_GLOB_DAT: /* S */ case R_X86_64_JMP_SLOT: /* XXX need addend + offset */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; if (*where != addr) *where = addr; break; case R_X86_64_RELATIVE: /* B + A */ addr = relocbase + addend; val = addr; if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %ld\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/clang380-import/sys/amd64/amd64/trap.c =================================================================== --- projects/clang380-import/sys/amd64/amd64/trap.c (revision 293686) +++ projects/clang380-import/sys/amd64/amd64/trap.c (revision 293687) @@ -1,968 +1,975 @@ /*- * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * AMD64 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #ifdef KDTRACE_HOOKS #include #endif extern void __noinline trap(struct trapframe *frame); extern void trap_check(struct trapframe *frame); extern void syscall(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, int); static void trap_fatal(struct trapframe *, vm_offset_t); #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "trace trap", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; #ifdef KDB static int kdb_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RWTUN, &kdb_on_nmi, 0, "Go to KDB on NMI"); #endif static int panic_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN, &panic_on_nmi, 0, "Panic on NMI"); static int prot_fault_translation; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { #ifdef KDTRACE_HOOKS struct reg regs; #endif struct thread *td = curthread; struct proc *p = td->td_proc; int i = 0, ucode = 0, code; u_int type; register_t addr = 0; ksiginfo_t ksi; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); goto out; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A non-zero return value from the handler means that * the NMI was consumed by it and we can return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) goto out; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) goto out; #endif } if (type == T_MCHK) { mca_intr(); goto out; } if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (ISPL(frame->tf_cs) == SEL_UPL) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } code = frame->tf_err; if (ISPL(frame->tf_cs) == SEL_UPL) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) goto out; } #endif frame->tf_rflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) goto userout; i = SIGFPE; break; case T_PROTFLT: /* general protection fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: i = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ + /* + * Emulator can take care about this trap? + */ + if (*p->p_sysent->sv_trap != NULL && + (*p->p_sysent->sv_trap)(td) == 0) + goto userout; + addr = frame->tf_addr; i = trap_pfault(frame, TRUE); if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); goto userout; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) goto userout; i = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; break; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); goto out; case T_DNA: KASSERT(!PCB_USER_FPU(td->td_pcb), ("Unregistered use of FPU in kernel")); fpudna(); goto out; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * For now, supporting kernel handler * registration for FPU traps is overkill. */ trap_fatal(frame, 0); goto out; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; goto out; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; goto out; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; goto out; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; goto out; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; goto out; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; goto out; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; goto out; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ /* XXX check upper bits here */ load_dr6(rdr6() & 0xfffffff0); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, 0, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto out; } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ #endif /* DEV_ISA */ } trap_fatal(frame, 0); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); userout: out: return; } /* * Ensure that we ignore any DTrace-induced faults. This function cannot * be instrumented, so it cannot generate such faults itself. */ void trap_check(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) return; #endif trap(frame); } static int trap_pfault(frame, usermode) struct trapframe *frame; int usermode; { vm_offset_t va; vm_map_t map; int rv = 0; vm_prot_t ftype; struct thread *td = curthread; struct proc *p = td->td_proc; vm_offset_t eva = frame->tf_addr; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) goto nogo; map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a usermode address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. */ if (!usermode && (td->td_intr_nesting_level != 0 || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } nogo: if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss; u_int type; long esp; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_RSV ? " rsv" : "", code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); if (ISPL(frame->tf_cs) == SEL_UPL) { ss = frame->tf_ss & 0xffff; esp = frame->tf_rsp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (long)&frame->tf_rsp; } printf("stack pointer = 0x%x:0x%lx\n", ss, esp); printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) printf("nested task, "); if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_panic || kdb_active) if (kdb_trap(type, 0, frame)) return; #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). */ void dblfault_handler(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault\n"); printf("rip = 0x%lx\n", frame->tf_rip); printf("rsp = 0x%lx\n", frame->tf_rsp); printf("rbp = 0x%lx\n", frame->tf_rbp); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; register_t *argp; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; reg = 0; regcnt = 6; params = (caddr_t)frame->tf_rsp + sizeof(register_t); sa->code = frame->tf_rax; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = frame->tf_rdi; reg++; regcnt--; } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]), ("Too many syscall arguments!")); error = 0; argp = &frame->tf_rdi; argp += reg; bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt); if (sa->narg > regcnt) { KASSERT(params != NULL, ("copyin args with no params!")); error = copyin(params, &sa->args[regcnt], (sa->narg - regcnt) * sizeof(sa->args[0])); } if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; } return (error); } #include "../../kern/subr_syscall.c" /* * System call handler for native binaries. The trap frame is already * set up by the assembler trampoline and a pointer to it is saved in * td_frame. */ void amd64_syscall(struct thread *td, int traced) { struct syscall_args sa; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (ISPL(td->td_frame->tf_cs) != SEL_UPL) { panic("syscall"); /* NOT REACHED */ } #endif error = syscallenter(td, &sa); /* * Traced syscall. */ if (__predict_false(traced)) { td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returing with kernel FPU ctx leaked", syscallname(td->td_proc, sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, sa.code))); syscallret(td, error, &sa); /* * If the user-supplied value of %rip is not a canonical * address, then some CPUs will trigger a ring 0 #GP during * the sysret instruction. However, the fault handler would * execute in ring 0 with the user's %gs and %rsp which would * not be safe. Instead, use the full return path which * catches the problem safely. */ if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } Index: projects/clang380-import/sys/amd64/linux/linux_sysvec.c =================================================================== --- projects/clang380-import/sys/amd64/linux/linux_sysvec.c (revision 293686) +++ projects/clang380-import/sys/amd64/linux/linux_sysvec.c (revision 293687) @@ -1,943 +1,992 @@ /*- * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #define __ELF_WORD_SIZE 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64, 1); #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif #if defined(DEBUG) SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux 64 debugging control"); #endif /* * Allow the this functions to use the ldebug() facility * even though they are not syscalls themselves. Map them * to syscall 0. This is slightly less bogus than using * ldebug(sigreturn). */ #define LINUX_SYS_linux_rt_sendsig 0 const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static register_t * linux_copyout_strings(struct image_params *imgp); static int elf_linux_fixup(register_t **stack_base, struct image_params *iparams); static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); +static int linux_vsyscall(struct thread *td); /* * Linux syscalls return negative errno's, we do positive and map them * Reference: * FreeBSD: src/sys/sys/errno.h * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h * linux-2.6.17.8/include/asm-generic/errno.h */ static int bsd_to_linux_errno[ELAST + 1] = { -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, -72, -67, -71 }; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc; frame = td->td_frame; sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rcx; sa->args[4] = frame->tf_r8; sa->args[5] = frame->tf_r9; sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; /* * On Linux only %rcx and %r11 values are not preserved across * the syscall. * So, do not clobber %rdx and %r10 */ td->td_retval[1] = frame->tf_rdx; frame->tf_r10 = frame->tf_rcx; cpu_set_syscall_retval(td, error); /* Restore all registers. */ set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } static int elf_linux_fixup(register_t **stack_base, struct image_params *imgp) { Elf_Auxargs *args; Elf_Addr *base; Elf_Addr *pos; struct ps_strings *arginfo; struct proc *p; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; KASSERT(curthread->td_proc == imgp->proc, ("unsafe elf_linux_fixup(), should be curproc")); base = (Elf64_Addr *)*stack_base; args = (Elf64_Auxargs *)imgp->auxargs; pos = base + (imgp->args->argc + imgp->args->envc + 2); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; base--; suword(base, (uint64_t)imgp->args->argc); *stack_base = (register_t *)base; return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; char **vectp; char *stringp, *destp; register_t *stack_base; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* * Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (caddr_t)arginfo - SPARE_USRSPACE - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (LINUX_AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } /* * Copied from amd64/amd64/machdep.c * * XXX fpu state need? don't think so */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct proc *p; struct l_ucontext uc; struct l_sigcontext *context; struct trapframe *regs; unsigned long rflags; int error; ksiginfo_t ksi; regs = td->td_frame; error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc)); if (error != 0) return (error); p = td->td_proc; context = &uc.uc_mcontext; rflags = context->sc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ #define RFLAG_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) if (!RFLAG_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("linux_rt_sigreturn: rflags = 0x%lx\n", rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { printf("linux_rt_sigreturn: cs = 0x%x\n", context->sc_cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } PROC_LOCK(p); linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); regs->tf_rdi = context->sc_rdi; regs->tf_rsi = context->sc_rsi; regs->tf_rdx = context->sc_rdx; regs->tf_rbp = context->sc_rbp; regs->tf_rbx = context->sc_rbx; regs->tf_rcx = context->sc_rcx; regs->tf_rax = context->sc_rax; regs->tf_rip = context->sc_rip; regs->tf_rsp = context->sc_rsp; regs->tf_r8 = context->sc_r8; regs->tf_r9 = context->sc_r9; regs->tf_r10 = context->sc_r10; regs->tf_r11 = context->sc_r11; regs->tf_r12 = context->sc_r12; regs->tf_r13 = context->sc_r13; regs->tf_r14 = context->sc_r14; regs->tf_r15 = context->sc_r15; regs->tf_cs = context->sc_cs; regs->tf_err = context->sc_err; regs->tf_rflags = rflags; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * copied from amd64/amd64/machdep.c * * Send an interrupt to process. */ static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct l_rt_sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; caddr_t sp; struct trapframe *regs; int sig, code; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u", catcher, sig, mask, code); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe); } else sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul); mtx_unlock(&psp->ps_mtx); /* Translate the signal. */ sig = bsd_to_linux_signal(sig); /* Save user context. */ bzero(&sf, sizeof(sf)); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask); sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); sf.sf_sc.uc_mcontext.sc_rdi = regs->tf_rdi; sf.sf_sc.uc_mcontext.sc_rsi = regs->tf_rsi; sf.sf_sc.uc_mcontext.sc_rdx = regs->tf_rdx; sf.sf_sc.uc_mcontext.sc_rbp = regs->tf_rbp; sf.sf_sc.uc_mcontext.sc_rbx = regs->tf_rbx; sf.sf_sc.uc_mcontext.sc_rcx = regs->tf_rcx; sf.sf_sc.uc_mcontext.sc_rax = regs->tf_rax; sf.sf_sc.uc_mcontext.sc_rip = regs->tf_rip; sf.sf_sc.uc_mcontext.sc_rsp = regs->tf_rsp; sf.sf_sc.uc_mcontext.sc_r8 = regs->tf_r8; sf.sf_sc.uc_mcontext.sc_r9 = regs->tf_r9; sf.sf_sc.uc_mcontext.sc_r10 = regs->tf_r10; sf.sf_sc.uc_mcontext.sc_r11 = regs->tf_r11; sf.sf_sc.uc_mcontext.sc_r12 = regs->tf_r12; sf.sf_sc.uc_mcontext.sc_r13 = regs->tf_r13; sf.sf_sc.uc_mcontext.sc_r14 = regs->tf_r14; sf.sf_sc.uc_mcontext.sc_r15 = regs->tf_r15; sf.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags; sf.sf_sc.uc_mcontext.sc_err = regs->tf_err; sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); sf.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rax = 0; regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ regs->tf_rdx = (register_t)&sfp->sf_sc; /* arg 3 in %rdx */ sf.sf_handler = catcher; /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = linux_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * If a linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a linux * binary is doing the exec, so we do not create an EXEC module for it. */ static int exec_linux_imgact_try(struct image_params *iparams); static int exec_linux_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1, len; /* * The interpreter for shell scripts run from a linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds * attempt to use the alternate path for the interpreter. * If an alternate path is found, use our stringspace * to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); if (rpath != NULL) { len = strlen(rpath) + 1; if (len <= MAXSHELLCMDLEN) memcpy(imgp->interpreter_name, rpath, len); free(rpath, M_TEMP); } } } return(error); } +#define LINUX_VSYSCALL_START (-10UL << 20) +#define LINUX_VSYSCALL_SZ 1024 + +const unsigned long linux_vsyscall_vector[] = { + LINUX_SYS_gettimeofday, + LINUX_SYS_linux_time, + /* getcpu not implemented */ +}; + +static int +linux_vsyscall(struct thread *td) +{ + struct trapframe *frame; + uint64_t retqaddr; + int code, traced; + int error; + + frame = td->td_frame; + + /* Check %rip for vsyscall area */ + if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START)) + return (EINVAL); + if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0) + return (EINVAL); + code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ; + if (code >= nitems(linux_vsyscall_vector)) + return (EINVAL); + + /* + * vsyscall called as callq *(%rax), so we must + * use return address from %rsp and also fixup %rsp + */ + error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr)); + if (error) + return (error); + + frame->tf_rip = retqaddr; + frame->tf_rax = linux_vsyscall_vector[code]; + frame->tf_rsp += 8; + + traced = (frame->tf_flags & PSL_T); + + amd64_syscall(td, traced); + + return (0); +} + struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = elf_linux_fixup, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, - .sv_thread_detach = linux_thread_detach + .sv_thread_detach = linux_thread_detach, + .sv_trap = linux_vsyscall, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec, SHAREDPAGE); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)SHAREDPAGE); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); }; SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)linux_vdso_deinstall, NULL); static char GNULINUX_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (FALSE); /* * For linux we encode osrel as follows (see linux_mib.c): * VVVMMMIII (version, major, minor), see linux_mib.c. */ *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; return (TRUE); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNULINUX_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_glibc2brandshort = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib64/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, &linux_glibc2brandshort, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk64", NULL, MTX_DEF); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux x86-64 ELF exec handler installed\n"); } else printf("cannot insert Linux x86-64 ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); Index: projects/clang380-import/sys/amd64/linux32/linux32_sysvec.c =================================================================== --- projects/clang380-import/sys/amd64/linux32/linux32_sysvec.c (revision 293686) +++ projects/clang380-import/sys/amd64/linux32/linux32_sysvec.c (revision 293687) @@ -1,1204 +1,1205 @@ /*- * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #ifndef COMPAT_FREEBSD32 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" #endif #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); #define AUXARGS_ENTRY_32(pos, id, val) \ do { \ suword32(pos++, id); \ suword32(pos++, val); \ } while (0) #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif /* * Allow the sendsig functions to use the ldebug() facility * even though they are not syscalls themselves. Map them * to syscall 0. This is slightly less bogus than using * ldebug(sigreturn). */ #define LINUX32_SYS_linux_rt_sendsig 0 #define LINUX32_SYS_linux_sendsig 0 const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux32_locore_o_start; extern char _binary_linux32_locore_o_end; extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int elf_linux_fixup(register_t **stack_base, struct image_params *iparams); static register_t *linux_copyout_strings(struct image_params *imgp); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack); static void linux32_fixlimit(struct rlimit *rl, int which); static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); /* * Linux syscalls return negative errno's, we do positive and map them * Reference: * FreeBSD: src/sys/sys/errno.h * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h * linux-2.6.17.8/include/asm-generic/errno.h */ static int bsd_to_linux_errno[ELAST + 1] = { -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, -72, -67, -71 }; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc == imgp->proc, ("unsafe elf_linux_fixup(), should be curproc")); base = (Elf32_Addr *)*stack_base; args = (Elf32_Auxargs *)imgp->auxargs; pos = base + (imgp->args->argc + imgp->args->envc + 2); AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY_32(pos, AT_BASE, args->base); AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary)); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp)); if (args->execfd != -1) AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY_32(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; base--; suword32(base, (uint32_t)imgp->args->argc); *stack_base = (register_t *)base; return (0); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int oonstack; int sig; int code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; frame.sf_siginfo = PTROUT(&fp->sf_si); frame.sf_ucontext = PTROUT(&fp->sf_sc); /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn * and libgcc unwind. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = 0; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), fp, oonstack); #endif PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int oonstack; int sig, code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(sendsig)) printf(ARGS(sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = regs->tf_gs; frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_rdi; frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; frame.sf_sc.sc_ebx = regs->tf_rbx; frame.sf_sc.sc_esp = regs->tf_rsp; frame.sf_sc.sc_edx = regs->tf_rdx; frame.sf_sc.sc_ecx = regs->tf_rcx; frame.sf_sc.sc_eax = regs->tf_rax; frame.sf_sc.sc_eip = regs->tf_rip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_rflags; frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; sigset_t bmask; l_sigset_t lmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(sigreturn)) printf(ARGS(sigreturn, "%p"), (void *)args->sfp); #endif /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; lmask.__mask = frame.sf_extramask[0]; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context. */ regs->tf_rdi = frame.sf_sc.sc_edi; regs->tf_rsi = frame.sf_sc.sc_esi; regs->tf_rbp = frame.sf_sc.sc_ebp; regs->tf_rbx = frame.sf_sc.sc_ebx; regs->tf_rdx = frame.sf_sc.sc_edx; regs->tf_rcx = frame.sf_sc.sc_ecx; regs->tf_rax = frame.sf_sc.sc_eax; regs->tf_rip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_es = frame.sf_sc.sc_es; regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_gs = frame.sf_sc.sc_gs; regs->tf_rflags = eflags; regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); #endif /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context */ regs->tf_gs = context->sc_gs; regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_rdi = context->sc_edi; regs->tf_rsi = context->sc_esi; regs->tf_rbp = context->sc_ebp; regs->tf_rbx = context->sc_ebx; regs->tf_rdx = context->sc_edx; regs->tf_rcx = context->sc_ecx; regs->tf_rax = context->sc_eax; regs->tf_rip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_rflags = eflags; regs->tf_rsp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); /* * call sigaltstack & ignore results.. */ lss = &uc.uc_stack; ss.ss_sp = PTRIN(lss->ss_sp); ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); #endif (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; p = td->td_proc; frame = td->td_frame; sa->args[0] = frame->tf_rbx; sa->args[1] = frame->tf_rcx; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rsi; sa->args[4] = frame->tf_rdi; sa->args[5] = frame->tf_rbp; /* Unconfirmed */ sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } /* * If a linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a linux * binary is doing the exec, so we do not create an EXEC module for it. */ static int exec_linux_imgact_try(struct image_params *iparams); static int exec_linux_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1; /* * The interpreter for shell scripts run from a linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds attempt * to use the alternate path for the interpreter. If an * alternate * path is found, use our stringspace to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); if (rpath != NULL) imgp->args->fname_buf = imgp->interpreter_name = rpath; } } return (error); } /* * Clear registers on exec * XXX copied from ia32_signal.c. */ static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_gs = _ugssel; regs->tf_fs = _ufssel; regs->tf_es = _udatasel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = imgp->ps_strings; fpstate_drop(td); /* Do full restore on return so that we can change to a different %cs */ set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); td->td_retval[1] = 0; } /* * XXX copied from ia32_sysvec.c. */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; u_int32_t *vectp; char *stringp, *destp; u_int32_t *stack_base; struct linux32_ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; /* * Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; destp = (caddr_t)arginfo - SPARE_USRSPACE - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (LINUX_AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(u_int32_t)); } else /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (uint32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (uint32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, "32-bit Linux emulation"); static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, &linux32_maxdsiz, 0, ""); static u_long linux32_maxssiz = LINUX32_MAXSSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, &linux32_maxssiz, 0, ""); static u_long linux32_maxvmem = LINUX32_MAXVMEM; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, &linux32_maxvmem, 0, ""); #if defined(DEBUG) SYSCTL_PROC(_compat_linux32, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux debugging control"); #endif static void linux32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (linux32_maxdsiz != 0) { if (rl->rlim_cur > linux32_maxdsiz) rl->rlim_cur = linux32_maxdsiz; if (rl->rlim_max > linux32_maxdsiz) rl->rlim_max = linux32_maxdsiz; } break; case RLIMIT_STACK: if (linux32_maxssiz != 0) { if (rl->rlim_cur > linux32_maxssiz) rl->rlim_cur = linux32_maxssiz; if (rl->rlim_max > linux32_maxssiz) rl->rlim_max = linux32_maxssiz; } break; case RLIMIT_VMEM: if (linux32_maxvmem != 0) { if (rl->rlim_cur > linux32_maxvmem) rl->rlim_cur = linux32_maxvmem; if (rl->rlim_max > linux32_maxvmem) rl->rlim_max = linux32_maxvmem; } break; } } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX32_SYS_MAXSYSCALL, .sv_table = linux32_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = elf_linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux32_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = LINUX32_MAXUSER, .sv_usrstack = LINUX32_USRSTACK, .sv_psstrings = LINUX32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = exec_linux_setregs, .sv_fixlimit = linux32_fixlimit, .sv_maxssiz = &linux32_maxssiz, .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux32_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, + .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux32_locore_o_end - &_binary_linux32_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); }; SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (FALSE); /* * For linux we encode osrel as follows (see linux_mib.c): * VVVMMMIII (version, major, minor), see linux_mib.c. */ *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; return (TRUE); } static Elf_Brandnote linux32_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux32_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); Index: projects/clang380-import/sys/arm/arm/elf_machdep.c =================================================================== --- projects/clang380-import/sys/arm/arm/elf_machdep.c (revision 293686) +++ projects/clang380-import/sys/arm/arm/elf_machdep.c (revision 293687) @@ -1,282 +1,283 @@ /*- * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static boolean_t elf32_arm_abi_supported(struct image_params *); struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = #if __ARM_ARCH >= 6 SV_SHP | SV_TIMEKEEP | #endif SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static boolean_t elf32_arm_abi_supported(struct image_params *imgp) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; /* * When configured for EABI, FreeBSD supports EABI vesions 4 and 5. */ if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary (rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (FALSE); } return (TRUE); } void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } /* * It is possible for the compiler to emit relocations for unaligned data. * We handle this situation with these inlines. */ #define RELOC_ALIGNED_P(x) \ (((uintptr_t)(x) & (sizeof(void *) - 1)) == 0) static __inline Elf_Addr load_ptr(Elf_Addr *where) { Elf_Addr res; if (RELOC_ALIGNED_P(where)) return *where; memcpy(&res, where, sizeof(res)); return (res); } static __inline void store_ptr(Elf_Addr *where, Elf_Addr val) { if (RELOC_ALIGNED_P(where)) *where = val; else memcpy(where, &val, sizeof(val)); } #undef RELOC_ALIGNED_P /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = load_ptr(where); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_ARM_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (load_ptr(where) != addr) store_ptr(where, addr); } return (0); } switch (rtype) { case R_ARM_NONE: /* none */ break; case R_ARM_ABS32: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; store_ptr(where, addr + load_ptr(where)); break; case R_ARM_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_ARM_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error == 0) { store_ptr(where, addr); return (0); } return (-1); case R_ARM_RELATIVE: break; default: printf("kldload: unexpected relocation type %d\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { /* * The pmap code does not do an icache sync upon establishing executable * mappings in the kernel pmap. It's an optimization based on the fact * that kernel memory allocations always have EXECUTABLE protection even * when the memory isn't going to hold executable code. The only time * kernel memory holding instructions does need a sync is after loading * a kernel module, and that's when this function gets called. Normal * data cache maintenance has already been done by the IO code, and TLB * maintenance has been done by the pmap code, so all we have to do here * is invalidate the instruction cache (which also invalidates the * branch predictor cache on platforms that have one). */ cpu_icache_sync_all(); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/clang380-import/sys/arm64/arm64/elf_machdep.c =================================================================== --- projects/clang380-import/sys/arm64/arm64/elf_machdep.c (revision 293686) +++ projects/clang380-import/sys/arm64/arm64/elf_machdep.c (revision 293687) @@ -1,215 +1,217 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014 Andrew Turner. * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linker_if.h" static struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, + .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_AARCH64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_AARCH64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where, addr, addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_AARCH64_RELATIVE) *where = elf_relocaddr(lf, relocbase + addend); return (0); } switch (rtype) { case R_AARCH64_NONE: case R_AARCH64_RELATIVE: break; case R_AARCH64_ABS64: case R_AARCH64_GLOB_DAT: case R_AARCH64_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); *where = addr + addend; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return (-1); } return (0); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } /* Process one elf relocation with addend. */ int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_cpu_load_file(linker_file_t lf) { if (lf->id != 1) cpu_icache_sync_range((vm_offset_t)lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/clang380-import/sys/boot/common/ufsread.c =================================================================== --- projects/clang380-import/sys/boot/common/ufsread.c (revision 293686) +++ projects/clang380-import/sys/boot/common/ufsread.c (revision 293687) @@ -1,304 +1,303 @@ /*- * Copyright (c) 2002 McAfee, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and McAfee Research,, the Security Research Division of * McAfee, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as * part of the DARPA CHATS research program * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1998 Robert Nordier * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this * paragraph and the following disclaimer are duplicated in all * such forms. * * This software is provided "AS IS" and without any express or * implied warranties, including, without limitation, the implied * warranties of merchantability and fitness for a particular * purpose. */ #include __FBSDID("$FreeBSD$"); #include #include #include #ifdef UFS_SMALL_CGBASE /* XXX: Revert to old (broken for over 1.5Tb filesystems) version of cgbase (see sys/ufs/ffs/fs.h rev 1.39) so that small boot loaders (e.g. boot2) can support both UFS1 and UFS2. */ #undef cgbase #define cgbase(fs, c) ((ufs2_daddr_t)((fs)->fs_fpg * (c))) #endif typedef uint32_t ufs_ino_t; /* * We use 4k `virtual' blocks for filesystem data, whatever the actual * filesystem block size. FFS blocks are always a multiple of 4k. */ #define VBLKSHIFT 12 #define VBLKSIZE (1 << VBLKSHIFT) #define VBLKMASK (VBLKSIZE - 1) #define DBPERVBLK (VBLKSIZE / DEV_BSIZE) #define INDIRPERVBLK(fs) (NINDIR(fs) / ((fs)->fs_bsize >> VBLKSHIFT)) #define IPERVBLK(fs) (INOPB(fs) / ((fs)->fs_bsize >> VBLKSHIFT)) #define INO_TO_VBA(fs, ipervblk, x) \ (fsbtodb(fs, cgimin(fs, ino_to_cg(fs, x))) + \ (((x) % (fs)->fs_ipg) / (ipervblk) * DBPERVBLK)) #define INO_TO_VBO(ipervblk, x) ((x) % ipervblk) #define FS_TO_VBA(fs, fsb, off) (fsbtodb(fs, fsb) + \ ((off) / VBLKSIZE) * DBPERVBLK) #define FS_TO_VBO(fs, fsb, off) ((off) & VBLKMASK) /* Buffers that must not span a 64k boundary. */ struct dmadat { char blkbuf[VBLKSIZE]; /* filesystem blocks */ char indbuf[VBLKSIZE]; /* indir blocks */ char sbbuf[SBLOCKSIZE]; /* superblock */ char secbuf[DEV_BSIZE]; /* for MBR/disklabel */ }; static struct dmadat *dmadat; static ufs_ino_t lookup(const char *); static ssize_t fsread(ufs_ino_t, void *, size_t); static uint8_t ls, dsk_meta; static uint32_t fs_off; static __inline uint8_t fsfind(const char *name, ufs_ino_t * ino) { static char buf[DEV_BSIZE]; struct direct *d; char *s; ssize_t n; fs_off = 0; while ((n = fsread(*ino, buf, DEV_BSIZE)) > 0) for (s = buf; s < buf + DEV_BSIZE;) { d = (void *)s; if (ls) printf("%s ", d->d_name); else if (!strcmp(name, d->d_name)) { *ino = d->d_ino; return d->d_type; } s += d->d_reclen; } if (n != -1 && ls) printf("\n"); return 0; } static ufs_ino_t lookup(const char *path) { static char name[MAXNAMLEN + 1]; const char *s; ufs_ino_t ino; ssize_t n; uint8_t dt; ino = ROOTINO; dt = DT_DIR; for (;;) { if (*path == '/') path++; if (!*path) break; for (s = path; *s && *s != '/'; s++); if ((n = s - path) > MAXNAMLEN) return 0; ls = *path == '?' && n == 1 && !*s; memcpy(name, path, n); name[n] = 0; if (dt != DT_DIR) { printf("%s: not a directory.\n", name); return (0); } if ((dt = fsfind(name, &ino)) <= 0) break; path = s; } return dt == DT_REG ? ino : 0; } /* * Possible superblock locations ordered from most to least likely. */ static int sblock_try[] = SBLOCKSEARCH; #if defined(UFS2_ONLY) #define DIP(field) dp2.field #elif defined(UFS1_ONLY) #define DIP(field) dp1.field #else #define DIP(field) fs.fs_magic == FS_UFS1_MAGIC ? dp1.field : dp2.field #endif static ssize_t fsread(ufs_ino_t inode, void *buf, size_t nbyte) { #ifndef UFS2_ONLY static struct ufs1_dinode dp1; ufs1_daddr_t addr1; #endif #ifndef UFS1_ONLY static struct ufs2_dinode dp2; #endif static struct fs fs; static ufs_ino_t inomap; char *blkbuf; void *indbuf; char *s; size_t n, nb, size, off, vboff; ufs_lbn_t lbn; ufs2_daddr_t addr2, vbaddr; static ufs2_daddr_t blkmap, indmap; u_int u; blkbuf = dmadat->blkbuf; indbuf = dmadat->indbuf; if (!dsk_meta) { inomap = 0; for (n = 0; sblock_try[n] != -1; n++) { if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE, SBLOCKSIZE / DEV_BSIZE)) return -1; memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); if (( #if defined(UFS1_ONLY) fs.fs_magic == FS_UFS1_MAGIC #elif defined(UFS2_ONLY) (fs.fs_magic == FS_UFS2_MAGIC && fs.fs_sblockloc == sblock_try[n]) #else fs.fs_magic == FS_UFS1_MAGIC || (fs.fs_magic == FS_UFS2_MAGIC && fs.fs_sblockloc == sblock_try[n]) #endif ) && fs.fs_bsize <= MAXBSIZE && fs.fs_bsize >= sizeof(struct fs)) break; } if (sblock_try[n] == -1) { - printf("Not ufs\n"); return -1; } dsk_meta++; } else memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); if (!inode) return 0; if (inomap != inode) { n = IPERVBLK(&fs); if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK)) return -1; n = INO_TO_VBO(n, inode); #if defined(UFS1_ONLY) memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, sizeof(struct ufs1_dinode)); #elif defined(UFS2_ONLY) memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, sizeof(struct ufs2_dinode)); #else if (fs.fs_magic == FS_UFS1_MAGIC) memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, sizeof(struct ufs1_dinode)); else memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, sizeof(struct ufs2_dinode)); #endif inomap = inode; fs_off = 0; blkmap = indmap = 0; } s = buf; size = DIP(di_size); n = size - fs_off; if (nbyte > n) nbyte = n; nb = nbyte; while (nb) { lbn = lblkno(&fs, fs_off); off = blkoff(&fs, fs_off); if (lbn < NDADDR) { addr2 = DIP(di_db[lbn]); } else if (lbn < NDADDR + NINDIR(&fs)) { n = INDIRPERVBLK(&fs); addr2 = DIP(di_ib[0]); u = (u_int)(lbn - NDADDR) / n * DBPERVBLK; vbaddr = fsbtodb(&fs, addr2) + u; if (indmap != vbaddr) { if (dskread(indbuf, vbaddr, DBPERVBLK)) return -1; indmap = vbaddr; } n = (lbn - NDADDR) & (n - 1); #if defined(UFS1_ONLY) memcpy(&addr1, (ufs1_daddr_t *)indbuf + n, sizeof(ufs1_daddr_t)); addr2 = addr1; #elif defined(UFS2_ONLY) memcpy(&addr2, (ufs2_daddr_t *)indbuf + n, sizeof(ufs2_daddr_t)); #else if (fs.fs_magic == FS_UFS1_MAGIC) { memcpy(&addr1, (ufs1_daddr_t *)indbuf + n, sizeof(ufs1_daddr_t)); addr2 = addr1; } else memcpy(&addr2, (ufs2_daddr_t *)indbuf + n, sizeof(ufs2_daddr_t)); #endif } else return -1; vbaddr = fsbtodb(&fs, addr2) + (off >> VBLKSHIFT) * DBPERVBLK; vboff = off & VBLKMASK; n = sblksize(&fs, size, lbn) - (off & ~VBLKMASK); if (n > VBLKSIZE) n = VBLKSIZE; if (blkmap != vbaddr) { if (dskread(blkbuf, vbaddr, n >> DEV_BSHIFT)) return -1; blkmap = vbaddr; } n -= vboff; if (n > nb) n = nb; memcpy(s, blkbuf + vboff, n); s += n; fs_off += n; nb -= n; } return nbyte; } Index: projects/clang380-import/sys/boot/efi/boot1/Makefile =================================================================== --- projects/clang380-import/sys/boot/efi/boot1/Makefile (revision 293686) +++ projects/clang380-import/sys/boot/efi/boot1/Makefile (revision 293687) @@ -1,116 +1,115 @@ # $FreeBSD$ MAN= .include # In-tree GCC does not support __attribute__((ms_abi)). .if ${COMPILER_TYPE} != "gcc" MK_SSP= no PROG= boot1.sym INTERNALPROG= # architecture-specific loader code SRCS= boot1.c self_reloc.c start.S CFLAGS+= -I. CFLAGS+= -I${.CURDIR}/../include CFLAGS+= -I${.CURDIR}/../include/${MACHINE} CFLAGS+= -I${.CURDIR}/../../../contrib/dev/acpica/include CFLAGS+= -I${.CURDIR}/../../.. # Always add MI sources and REGULAR efi loader bits .PATH: ${.CURDIR}/../loader/arch/${MACHINE} .PATH: ${.CURDIR}/../loader .PATH: ${.CURDIR}/../../common CFLAGS+= -I${.CURDIR}/../../common FILES= boot1.efi boot1.efifat FILESMODE_boot1.efi= ${BINMODE} LDSCRIPT= ${.CURDIR}/../loader/arch/${MACHINE}/ldscript.${MACHINE} LDFLAGS= -Wl,-T${LDSCRIPT} -Wl,-Bsymbolic -shared .if ${MACHINE_CPUARCH} == "aarch64" CFLAGS+= -msoft-float -mgeneral-regs-only .endif .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" CFLAGS+= -fPIC LDFLAGS+= -Wl,-znocombreloc .endif -.if ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "i386" # # Add libstand for the runtime functions used by the compiler - for example # __aeabi_* (arm) or __divdi3 (i386). +# as well as required string and memory functions for all platforms. # DPADD+= ${LIBSTAND} LDADD+= -lstand -.endif DPADD+= ${LDSCRIPT} OBJCOPY?= objcopy OBJDUMP?= objdump .if ${MACHINE_CPUARCH} == "amd64" EFI_TARGET= efi-app-x86_64 .elif ${MACHINE_CPUARCH} == "i386" EFI_TARGET= efi-app-ia32 .else EFI_TARGET= binary .endif boot1.efi: ${PROG} if [ `${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*' | wc -l` != 0 ]; then \ ${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*'; \ exit 1; \ fi ${OBJCOPY} -j .peheader -j .text -j .sdata -j .data \ -j .dynamic -j .dynsym -j .rel.dyn \ -j .rela.dyn -j .reloc -j .eh_frame \ --output-target=${EFI_TARGET} ${.ALLSRC} ${.TARGET} boot1.o: ${.CURDIR}/../../common/ufsread.c # The following inserts our objects into a template FAT file system # created by generate-fat.sh .include "${.CURDIR}/Makefile.fat" BOOT1_MAXSIZE?= 131072 boot1.efifat: boot1.efi @set -- `ls -l boot1.efi`; \ x=$$(($$5-${BOOT1_MAXSIZE})); \ if [ $$x -ge 0 ]; then \ echo "boot1 $$x bytes too large; regenerate FAT templates?" >&2 ;\ exit 1; \ fi echo ${.OBJDIR} uudecode ${.CURDIR}/fat-${MACHINE}.tmpl.bz2.uu mv fat-${MACHINE}.tmpl.bz2 ${.TARGET}.bz2 bzip2 -f -d ${.TARGET}.bz2 dd if=boot1.efi of=${.TARGET} seek=${BOOT1_OFFSET} conv=notrunc CLEANFILES= boot1.efi boot1.efifat .endif # ${COMPILER_TYPE} != "gcc" .include beforedepend ${OBJS}: machine CLEANFILES+= machine machine: ln -sf ${.CURDIR}/../../../${MACHINE}/include machine .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" beforedepend ${OBJS}: x86 CLEANFILES+= x86 x86: ln -sf ${.CURDIR}/../../../x86/include x86 .endif Index: projects/clang380-import/sys/boot/efi/boot1/boot1.c =================================================================== --- projects/clang380-import/sys/boot/efi/boot1/boot1.c (revision 293686) +++ projects/clang380-import/sys/boot/efi/boot1/boot1.c (revision 293687) @@ -1,575 +1,323 @@ /*- * Copyright (c) 1998 Robert Nordier * All rights reserved. * Copyright (c) 2001 Robert Drehmel * All rights reserved. * Copyright (c) 2014 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this * paragraph and the following disclaimer are duplicated in all * such forms. * * This software is provided "AS IS" and without any express or * implied warranties, including, without limitation, the implied * warranties of merchantability and fitness for a particular * purpose. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include +#include #include #include #define _PATH_LOADER "/boot/loader.efi" #define _PATH_KERNEL "/boot/kernel/kernel" #define BSIZEMAX 16384 -typedef int putc_func_t(char c, void *arg); +void panic(const char *fmt, ...) __dead2; +void putchar(int c); -struct sp_data { - char *sp_buf; - u_int sp_len; - u_int sp_size; -}; - -static const char digits[] = "0123456789abcdef"; - -static void panic(const char *fmt, ...) __dead2; -static int printf(const char *fmt, ...); -static int putchar(char c, void *arg); -static int vprintf(const char *fmt, va_list ap); -static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap); - -static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap); -static int __putc(char c, void *arg); -static int __puts(const char *s, putc_func_t *putc, void *arg); -static int __sputc(char c, void *arg); -static char *__uitoa(char *buf, u_int val, int base); -static char *__ultoa(char *buf, u_long val, int base); - static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet); static void load(const char *fname); static EFI_SYSTEM_TABLE *systab; static EFI_HANDLE *image; -static void -bcopy(const void *src, void *dst, size_t len) -{ - const char *s = src; - char *d = dst; - - while (len-- != 0) - *d++ = *s++; -} - -static void -memcpy(void *dst, const void *src, size_t len) -{ - bcopy(src, dst, len); -} - -static void -bzero(void *b, size_t len) -{ - char *p = b; - - while (len-- != 0) - *p++ = 0; -} - -static int -strcmp(const char *s1, const char *s2) -{ - for (; *s1 == *s2 && *s1; s1++, s2++) - ; - return ((u_char)*s1 - (u_char)*s2); -} - static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL; static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL; static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL; static EFI_GUID ConsoleControlGUID = EFI_CONSOLE_CONTROL_PROTOCOL_GUID; static EFI_BLOCK_IO *bootdev; static EFI_DEVICE_PATH *bootdevpath; static EFI_HANDLE *bootdevhandle; EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab) { EFI_HANDLE handles[128]; EFI_BLOCK_IO *blkio; UINTN i, nparts = sizeof(handles), cols, rows, max_dim, best_mode; EFI_STATUS status; EFI_DEVICE_PATH *devpath; EFI_BOOT_SERVICES *BS; EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL; SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL; char *path = _PATH_LOADER; systab = Xsystab; image = Ximage; BS = systab->BootServices; status = BS->LocateProtocol(&ConsoleControlGUID, NULL, (VOID **)&ConsoleControl); if (status == EFI_SUCCESS) (void)ConsoleControl->SetMode(ConsoleControl, EfiConsoleControlScreenText); /* * Reset the console and find the best text mode. */ conout = systab->ConOut; conout->Reset(conout, TRUE); max_dim = best_mode = 0; for (i = 0; ; i++) { status = conout->QueryMode(conout, i, &cols, &rows); if (EFI_ERROR(status)) break; if (cols * rows > max_dim) { max_dim = cols * rows; best_mode = i; } } if (max_dim > 0) conout->SetMode(conout, best_mode); conout->EnableCursor(conout, TRUE); conout->ClearScreen(conout); printf("\n" ">> FreeBSD EFI boot block\n"); printf(" Loader path: %s\n", path); status = systab->BootServices->LocateHandle(ByProtocol, &BlockIoProtocolGUID, NULL, &nparts, handles); nparts /= sizeof(handles[0]); for (i = 0; i < nparts; i++) { status = systab->BootServices->HandleProtocol(handles[i], &DevicePathGUID, (void **)&devpath); if (EFI_ERROR(status)) continue; while (!IsDevicePathEnd(NextDevicePathNode(devpath))) devpath = NextDevicePathNode(devpath); status = systab->BootServices->HandleProtocol(handles[i], &BlockIoProtocolGUID, (void **)&blkio); if (EFI_ERROR(status)) continue; if (!blkio->Media->LogicalPartition) continue; if (domount(devpath, blkio, 1) >= 0) break; } if (i == nparts) panic("No bootable partition found"); bootdevhandle = handles[i]; load(path); panic("Load failed"); return EFI_SUCCESS; } static int dskread(void *buf, u_int64_t lba, int nblk) { EFI_STATUS status; int size; lba = lba / (bootdev->Media->BlockSize / DEV_BSIZE); size = nblk * DEV_BSIZE; status = bootdev->ReadBlocks(bootdev, bootdev->Media->MediaId, lba, size, buf); if (EFI_ERROR(status)) return (-1); return (0); } #include "ufsread.c" static ssize_t fsstat(ufs_ino_t inode) { #ifndef UFS2_ONLY static struct ufs1_dinode dp1; ufs1_daddr_t addr1; #endif #ifndef UFS1_ONLY static struct ufs2_dinode dp2; #endif static struct fs fs; static ufs_ino_t inomap; char *blkbuf; void *indbuf; size_t n, nb, size, off, vboff; ufs_lbn_t lbn; ufs2_daddr_t addr2, vbaddr; static ufs2_daddr_t blkmap, indmap; u_int u; blkbuf = dmadat->blkbuf; indbuf = dmadat->indbuf; if (!dsk_meta) { inomap = 0; for (n = 0; sblock_try[n] != -1; n++) { if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE, SBLOCKSIZE / DEV_BSIZE)) return -1; memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); if (( #if defined(UFS1_ONLY) fs.fs_magic == FS_UFS1_MAGIC #elif defined(UFS2_ONLY) (fs.fs_magic == FS_UFS2_MAGIC && fs.fs_sblockloc == sblock_try[n]) #else fs.fs_magic == FS_UFS1_MAGIC || (fs.fs_magic == FS_UFS2_MAGIC && fs.fs_sblockloc == sblock_try[n]) #endif ) && fs.fs_bsize <= MAXBSIZE && fs.fs_bsize >= sizeof(struct fs)) break; } if (sblock_try[n] == -1) { - printf("Not ufs\n"); return -1; } dsk_meta++; } else memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); if (!inode) return 0; if (inomap != inode) { n = IPERVBLK(&fs); if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK)) return -1; n = INO_TO_VBO(n, inode); #if defined(UFS1_ONLY) memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, sizeof(struct ufs1_dinode)); #elif defined(UFS2_ONLY) memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, sizeof(struct ufs2_dinode)); #else if (fs.fs_magic == FS_UFS1_MAGIC) memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, sizeof(struct ufs1_dinode)); else memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, sizeof(struct ufs2_dinode)); #endif inomap = inode; fs_off = 0; blkmap = indmap = 0; } size = DIP(di_size); n = size - fs_off; return (n); } static struct dmadat __dmadat; static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet) { dmadat = &__dmadat; bootdev = blkio; bootdevpath = device; if (fsread(0, NULL, 0)) { if (!quiet) printf("domount: can't read superblock\n"); return (-1); } if (!quiet) printf("Succesfully mounted UFS filesystem\n"); return (0); } static void load(const char *fname) { ufs_ino_t ino; EFI_STATUS status; EFI_HANDLE loaderhandle; EFI_LOADED_IMAGE *loaded_image; void *buffer; size_t bufsize; if ((ino = lookup(fname)) == 0) { printf("File %s not found\n", fname); return; } bufsize = fsstat(ino); status = systab->BootServices->AllocatePool(EfiLoaderData, bufsize, &buffer); fsread(ino, buffer, bufsize); /* XXX: For secure boot, we need our own loader here */ status = systab->BootServices->LoadImage(TRUE, image, bootdevpath, buffer, bufsize, &loaderhandle); if (EFI_ERROR(status)) printf("LoadImage failed with error %lu\n", EFI_ERROR_CODE(status)); status = systab->BootServices->HandleProtocol(loaderhandle, &LoadedImageGUID, (VOID**)&loaded_image); if (EFI_ERROR(status)) printf("HandleProtocol failed with error %lu\n", EFI_ERROR_CODE(status)); loaded_image->DeviceHandle = bootdevhandle; status = systab->BootServices->StartImage(loaderhandle, NULL, NULL); if (EFI_ERROR(status)) printf("StartImage failed with error %lu\n", EFI_ERROR_CODE(status)); } -static void +void panic(const char *fmt, ...) { - char buf[128]; va_list ap; + printf("panic: "); va_start(ap, fmt); - vsnprintf(buf, sizeof buf, fmt, ap); - printf("panic: %s\n", buf); + vprintf(fmt, ap); va_end(ap); + printf("\n"); while (1) {} } -static int -printf(const char *fmt, ...) +void +putchar(int c) { - va_list ap; - int ret; - - /* Don't annoy the user as we probe for partitions */ - if (strcmp(fmt,"Not ufs\n") == 0) - return 0; - - va_start(ap, fmt); - ret = vprintf(fmt, ap); - va_end(ap); - return (ret); -} - -static int -putchar(char c, void *arg) -{ CHAR16 buf[2]; if (c == '\n') { buf[0] = '\r'; buf[1] = 0; systab->ConOut->OutputString(systab->ConOut, buf); } buf[0] = c; buf[1] = 0; systab->ConOut->OutputString(systab->ConOut, buf); - return (1); -} - -static int -vprintf(const char *fmt, va_list ap) -{ - int ret; - - ret = __printf(fmt, putchar, 0, ap); - return (ret); -} - -static int -vsnprintf(char *str, size_t sz, const char *fmt, va_list ap) -{ - struct sp_data sp; - int ret; - - sp.sp_buf = str; - sp.sp_len = 0; - sp.sp_size = sz; - ret = __printf(fmt, __sputc, &sp, ap); - return (ret); -} - -static int -__printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap) -{ - char buf[(sizeof(long) * 8) + 1]; - char *nbuf; - u_long ul; - u_int ui; - int lflag; - int sflag; - char *s; - int pad; - int ret; - int c; - - nbuf = &buf[sizeof buf - 1]; - ret = 0; - while ((c = *fmt++) != 0) { - if (c != '%') { - ret += putc(c, arg); - continue; - } - lflag = 0; - sflag = 0; - pad = 0; -reswitch: c = *fmt++; - switch (c) { - case '#': - sflag = 1; - goto reswitch; - case '%': - ret += putc('%', arg); - break; - case 'c': - c = va_arg(ap, int); - ret += putc(c, arg); - break; - case 'd': - if (lflag == 0) { - ui = (u_int)va_arg(ap, int); - if (ui < (int)ui) { - ui = -ui; - ret += putc('-', arg); - } - s = __uitoa(nbuf, ui, 10); - } else { - ul = (u_long)va_arg(ap, long); - if (ul < (long)ul) { - ul = -ul; - ret += putc('-', arg); - } - s = __ultoa(nbuf, ul, 10); - } - ret += __puts(s, putc, arg); - break; - case 'l': - lflag = 1; - goto reswitch; - case 'o': - if (lflag == 0) { - ui = (u_int)va_arg(ap, u_int); - s = __uitoa(nbuf, ui, 8); - } else { - ul = (u_long)va_arg(ap, u_long); - s = __ultoa(nbuf, ul, 8); - } - ret += __puts(s, putc, arg); - break; - case 'p': - ul = (u_long)va_arg(ap, void *); - s = __ultoa(nbuf, ul, 16); - ret += __puts("0x", putc, arg); - ret += __puts(s, putc, arg); - break; - case 's': - s = va_arg(ap, char *); - ret += __puts(s, putc, arg); - break; - case 'u': - if (lflag == 0) { - ui = va_arg(ap, u_int); - s = __uitoa(nbuf, ui, 10); - } else { - ul = va_arg(ap, u_long); - s = __ultoa(nbuf, ul, 10); - } - ret += __puts(s, putc, arg); - break; - case 'x': - if (lflag == 0) { - ui = va_arg(ap, u_int); - s = __uitoa(nbuf, ui, 16); - } else { - ul = va_arg(ap, u_long); - s = __ultoa(nbuf, ul, 16); - } - if (sflag) - ret += __puts("0x", putc, arg); - ret += __puts(s, putc, arg); - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - pad = pad * 10 + c - '0'; - goto reswitch; - default: - break; - } - } - return (ret); -} - -static int -__sputc(char c, void *arg) -{ - struct sp_data *sp; - - sp = arg; - if (sp->sp_len < sp->sp_size) - sp->sp_buf[sp->sp_len++] = c; - sp->sp_buf[sp->sp_len] = '\0'; - return (1); -} - -static int -__puts(const char *s, putc_func_t *putc, void *arg) -{ - const char *p; - int ret; - - ret = 0; - for (p = s; *p != '\0'; p++) - ret += putc(*p, arg); - return (ret); -} - -static char * -__uitoa(char *buf, u_int ui, int base) -{ - char *p; - - p = buf; - *p = '\0'; - do - *--p = digits[ui % base]; - while ((ui /= base) != 0); - return (p); -} - -static char * -__ultoa(char *buf, u_long ul, int base) -{ - char *p; - - p = buf; - *p = '\0'; - do - *--p = digits[ul % base]; - while ((ul /= base) != 0); - return (p); } Index: projects/clang380-import/sys/boot/i386/loader/main.c =================================================================== --- projects/clang380-import/sys/boot/i386/loader/main.c (revision 293686) +++ projects/clang380-import/sys/boot/i386/loader/main.c (revision 293687) @@ -1,451 +1,461 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * MD bootstrap main() and assorted miscellaneous * commands. */ #include #include #include #include #include #include #include #include "bootstrap.h" #include "common/bootargs.h" #include "libi386/libi386.h" #include "libi386/smbios.h" #include "btxv86.h" #ifdef LOADER_ZFS_SUPPORT #include "../zfs/libzfs.h" #endif CTASSERT(sizeof(struct bootargs) == BOOTARGS_SIZE); CTASSERT(offsetof(struct bootargs, bootinfo) == BA_BOOTINFO); CTASSERT(offsetof(struct bootargs, bootflags) == BA_BOOTFLAGS); CTASSERT(offsetof(struct bootinfo, bi_size) == BI_SIZE); /* Arguments passed in from the boot1/boot2 loader */ static struct bootargs *kargs; static u_int32_t initial_howto; static u_int32_t initial_bootdev; static struct bootinfo *initial_bootinfo; struct arch_switch archsw; /* MI/MD interface boundary */ static void extract_currdev(void); static int isa_inb(int port); static void isa_outb(int port, int value); void exit(int code); #ifdef LOADER_ZFS_SUPPORT static void init_zfs_bootenv(char *currdev); static void i386_zfs_probe(void); #endif /* from vers.c */ extern char bootprog_name[], bootprog_rev[], bootprog_date[], bootprog_maker[]; /* XXX debugging */ extern char end[]; static void *heap_top; static void *heap_bottom; int main(void) { int i; /* Pick up arguments */ kargs = (void *)__args; initial_howto = kargs->howto; initial_bootdev = kargs->bootdev; initial_bootinfo = kargs->bootinfo ? (struct bootinfo *)PTOV(kargs->bootinfo) : NULL; /* Initialize the v86 register set to a known-good state. */ bzero(&v86, sizeof(v86)); v86.efl = PSL_RESERVED_DEFAULT | PSL_I; /* * Initialise the heap as early as possible. Once this is done, malloc() is usable. */ bios_getmem(); #if defined(LOADER_BZIP2_SUPPORT) || defined(LOADER_FIREWIRE_SUPPORT) || \ defined(LOADER_GPT_SUPPORT) || defined(LOADER_ZFS_SUPPORT) if (high_heap_size > 0) { heap_top = PTOV(high_heap_base + high_heap_size); heap_bottom = PTOV(high_heap_base); if (high_heap_base < memtop_copyin) memtop_copyin = high_heap_base; } else #endif { heap_top = (void *)PTOV(bios_basemem); heap_bottom = (void *)end; } setheap(heap_bottom, heap_top); /* * XXX Chicken-and-egg problem; we want to have console output early, but some * console attributes may depend on reading from eg. the boot device, which we * can't do yet. * * We can use printf() etc. once this is done. * If the previous boot stage has requested a serial console, prefer that. */ bi_setboothowto(initial_howto); if (initial_howto & RB_MULTIPLE) { if (initial_howto & RB_SERIAL) setenv("console", "comconsole vidconsole", 1); else setenv("console", "vidconsole comconsole", 1); } else if (initial_howto & RB_SERIAL) setenv("console", "comconsole", 1); else if (initial_howto & RB_MUTE) setenv("console", "nullconsole", 1); cons_probe(); /* * Initialise the block cache */ bcache_init(32, 512); /* 16k cache XXX tune this */ /* * Special handling for PXE and CD booting. */ if (kargs->bootinfo == 0) { /* * We only want the PXE disk to try to init itself in the below * walk through devsw if we actually booted off of PXE. */ if (kargs->bootflags & KARGS_FLAGS_PXE) pxe_enable(kargs->pxeinfo ? PTOV(kargs->pxeinfo) : NULL); else if (kargs->bootflags & KARGS_FLAGS_CD) bc_add(initial_bootdev); } archsw.arch_autoload = i386_autoload; archsw.arch_getdev = i386_getdev; archsw.arch_copyin = i386_copyin; archsw.arch_copyout = i386_copyout; archsw.arch_readin = i386_readin; archsw.arch_isainb = isa_inb; archsw.arch_isaoutb = isa_outb; #ifdef LOADER_ZFS_SUPPORT archsw.arch_zfs_probe = i386_zfs_probe; #endif /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); printf("BIOS %dkB/%dkB available memory\n", bios_basemem / 1024, bios_extmem / 1024); if (initial_bootinfo != NULL) { initial_bootinfo->bi_basemem = bios_basemem / 1024; initial_bootinfo->bi_extmem = bios_extmem / 1024; } /* detect ACPI for future reference */ biosacpi_detect(); /* detect SMBIOS for future reference */ smbios_detect(NULL); /* detect PCI BIOS for future reference */ biospci_detect(); printf("\n"); printf("%s, Revision %s\n", bootprog_name, bootprog_rev); printf("(%s, %s)\n", bootprog_maker, bootprog_date); extract_currdev(); /* set $currdev and $loaddev */ setenv("LINES", "24", 1); /* optional */ bios_getsmap(); interact(NULL); /* if we ever get here, it is an error */ return (1); } /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. * * XXX should be extended for netbooting. */ static void extract_currdev(void) { struct i386_devdesc new_currdev; #ifdef LOADER_ZFS_SUPPORT char buf[20]; struct zfs_boot_args *zargs; #endif int biosdev = -1; /* Assume we are booting from a BIOS disk by default */ new_currdev.d_dev = &biosdisk; /* new-style boot loaders such as pxeldr and cdldr */ if (kargs->bootinfo == 0) { if ((kargs->bootflags & KARGS_FLAGS_CD) != 0) { /* we are booting from a CD with cdboot */ new_currdev.d_dev = &bioscd; new_currdev.d_unit = bc_bios2unit(initial_bootdev); } else if ((kargs->bootflags & KARGS_FLAGS_PXE) != 0) { /* we are booting from pxeldr */ new_currdev.d_dev = &pxedisk; new_currdev.d_unit = 0; } else { /* we don't know what our boot device is */ new_currdev.d_kind.biosdisk.slice = -1; new_currdev.d_kind.biosdisk.partition = 0; biosdev = -1; } #ifdef LOADER_ZFS_SUPPORT } else if ((kargs->bootflags & KARGS_FLAGS_ZFS) != 0) { zargs = NULL; /* check for new style extended argument */ if ((kargs->bootflags & KARGS_FLAGS_EXTARG) != 0) zargs = (struct zfs_boot_args *)(kargs + 1); if (zargs != NULL && zargs->size >= offsetof(struct zfs_boot_args, primary_pool)) { /* sufficient data is provided */ new_currdev.d_kind.zfs.pool_guid = zargs->pool; new_currdev.d_kind.zfs.root_guid = zargs->root; if (zargs->size >= sizeof(*zargs) && zargs->primary_vdev != 0) { sprintf(buf, "%llu", zargs->primary_pool); setenv("vfs.zfs.boot.primary_pool", buf, 1); sprintf(buf, "%llu", zargs->primary_vdev); setenv("vfs.zfs.boot.primary_vdev", buf, 1); } } else { /* old style zfsboot block */ new_currdev.d_kind.zfs.pool_guid = kargs->zfspool; new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.d_dev = &zfs_dev; #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ new_currdev.d_kind.biosdisk.slice = -1; new_currdev.d_kind.biosdisk.partition = 0; biosdev = -1; } else { new_currdev.d_kind.biosdisk.slice = B_SLICE(initial_bootdev) - 1; new_currdev.d_kind.biosdisk.partition = B_PARTITION(initial_bootdev); biosdev = initial_bootinfo->bi_bios_dev; /* * If we are booted by an old bootstrap, we have to guess at the BIOS * unit number. We will lose if there is more than one disk type * and we are not booting from the lowest-numbered disk type * (ie. SCSI when IDE also exists). */ if ((biosdev == 0) && (B_TYPE(initial_bootdev) != 2)) /* biosdev doesn't match major */ biosdev = 0x80 + B_UNIT(initial_bootdev); /* assume harddisk */ } new_currdev.d_type = new_currdev.d_dev->dv_type; /* * If we are booting off of a BIOS disk and we didn't succeed in determining * which one we booted off of, just use disk0: as a reasonable default. */ if ((new_currdev.d_type == biosdisk.dv_type) && ((new_currdev.d_unit = bd_bios2unit(biosdev)) == -1)) { printf("Can't work out which disk we are booting from.\n" "Guessed BIOS device 0x%x not found by probes, defaulting to disk0:\n", biosdev); new_currdev.d_unit = 0; } #ifdef LOADER_ZFS_SUPPORT - init_zfs_bootenv(zfs_fmtdev(&new_currdev)); + if (new_currdev.d_type == DEVT_ZFS) + init_zfs_bootenv(zfs_fmtdev(&new_currdev)); #endif env_setenv("currdev", EV_VOLATILE, i386_fmtdev(&new_currdev), i386_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset, env_nounset); } #ifdef LOADER_ZFS_SUPPORT static void init_zfs_bootenv(char *currdev) { char *beroot; + if (strlen(currdev) == 0) + return; + if(strncmp(currdev, "zfs:", 4) != 0) + return; /* Remove the trailing : */ currdev[strlen(currdev) - 1] = '\0'; setenv("zfs_be_active", currdev, 1); + setenv("zfs_be_currpage", "1", 1); /* Do not overwrite if already set */ setenv("vfs.root.mountfrom", currdev, 0); /* Forward past zfs: */ currdev = strchr(currdev, ':'); currdev++; /* Remove the last element (current bootenv) */ beroot = strrchr(currdev, '/'); if (beroot != NULL) beroot[0] = '\0'; - beroot = currdev; - setenv("zfs_be_root", beroot, 1); } #endif COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); printf("Rebooting...\n"); delay(1000000); __exit(0); } /* provide this for panic, as it's not in the startup code */ void exit(int code) { __exit(code); } COMMAND_SET(heap, "heap", "show heap usage", command_heap); static int command_heap(int argc, char *argv[]) { mallocstats(); printf("heap base at %p, top at %p, upper limit at %p\n", heap_bottom, sbrk(0), heap_top); return(CMD_OK); } #ifdef LOADER_ZFS_SUPPORT COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset", command_lszfs); static int command_lszfs(int argc, char *argv[]) { int err; if (argc != 2) { command_errmsg = "wrong number of arguments"; return (CMD_ERROR); } err = zfs_list(argv[1]); if (err != 0) { command_errmsg = strerror(err); return (CMD_ERROR); } return (CMD_OK); } COMMAND_SET(reloadbe, "reloadbe", "refresh the list of ZFS Boot Environments", command_reloadbe); static int command_reloadbe(int argc, char *argv[]) { int err; + char *root; if (argc > 2) { command_errmsg = "wrong number of arguments"; return (CMD_ERROR); } if (argc == 2) { err = zfs_bootenv(argv[1]); } else { + root = getenv("zfs_be_root"); + if (root == NULL) { + /* There does not appear to be a ZFS pool here, exit without error */ + return (CMD_OK); + } err = zfs_bootenv(getenv("zfs_be_root")); } if (err != 0) { command_errmsg = strerror(err); return (CMD_ERROR); } return (CMD_OK); } #endif /* ISA bus access functions for PnP. */ static int isa_inb(int port) { return (inb(port)); } static void isa_outb(int port, int value) { outb(port, value); } #ifdef LOADER_ZFS_SUPPORT static void i386_zfs_probe(void) { char devname[32]; int unit; /* * Open all the disks we can find and see if we can reconstruct * ZFS pools from them. */ for (unit = 0; unit < MAXBDDEV; unit++) { if (bd_unit2bios(unit) == -1) break; sprintf(devname, "disk%d:", unit); zfs_probe_dev(devname, NULL); } } #endif Index: projects/clang380-import/sys/boot/powerpc/boot1.chrp/boot1.c =================================================================== --- projects/clang380-import/sys/boot/powerpc/boot1.chrp/boot1.c (revision 293686) +++ projects/clang380-import/sys/boot/powerpc/boot1.chrp/boot1.c (revision 293687) @@ -1,771 +1,767 @@ /*- * Copyright (c) 1998 Robert Nordier * All rights reserved. * Copyright (c) 2001 Robert Drehmel * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this * paragraph and the following disclaimer are duplicated in all * such forms. * * This software is provided "AS IS" and without any express or * implied warranties, including, without limitation, the implied * warranties of merchantability and fitness for a particular * purpose. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #define _PATH_LOADER "/boot/loader" #define _PATH_KERNEL "/boot/kernel/kernel" #define BSIZEMAX 16384 typedef int putc_func_t(char c, void *arg); typedef int32_t ofwh_t; struct sp_data { char *sp_buf; u_int sp_len; u_int sp_size; }; static const char digits[] = "0123456789abcdef"; static char bootpath[128]; static char bootargs[128]; static ofwh_t bootdev; static struct fs fs; static char blkbuf[BSIZEMAX]; static unsigned int fsblks; static uint32_t fs_off; int main(int ac, char **av); static void exit(int) __dead2; static void load(const char *); static int dskread(void *, u_int64_t, int); static void usage(void); static void bcopy(const void *src, void *dst, size_t len); static void bzero(void *b, size_t len); static int domount(const char *device, int quiet); static void panic(const char *fmt, ...) __dead2; static int printf(const char *fmt, ...); static int putchar(char c, void *arg); static int vprintf(const char *fmt, va_list ap); static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap); static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap); static int __putc(char c, void *arg); static int __puts(const char *s, putc_func_t *putc, void *arg); static int __sputc(char c, void *arg); static char *__uitoa(char *buf, u_int val, int base); static char *__ultoa(char *buf, u_long val, int base); void __syncicache(void *, int); /* * Open Firmware interface functions */ typedef u_int32_t ofwcell_t; typedef u_int32_t u_ofwh_t; typedef int (*ofwfp_t)(void *); ofwfp_t ofw; /* the prom Open Firmware entry */ ofwh_t chosenh; void ofw_init(void *, int, int (*)(void *), char *, int); static ofwh_t ofw_finddevice(const char *); static ofwh_t ofw_open(const char *); static int ofw_close(ofwh_t); static int ofw_getprop(ofwh_t, const char *, void *, size_t); static int ofw_setprop(ofwh_t, const char *, void *, size_t); static int ofw_read(ofwh_t, void *, size_t); static int ofw_write(ofwh_t, const void *, size_t); static int ofw_claim(void *virt, size_t len, u_int align); static int ofw_seek(ofwh_t, u_int64_t); static void ofw_exit(void) __dead2; ofwh_t bootdevh; ofwh_t stdinh, stdouth; __asm(" \n\ .data \n\ .align 4 \n\ stack: \n\ .space 16384 \n\ \n\ .text \n\ .globl _start \n\ _start: \n\ lis %r1,stack@ha \n\ addi %r1,%r1,stack@l \n\ addi %r1,%r1,8192 \n\ \n\ b ofw_init \n\ "); void ofw_init(void *vpd, int res, int (*openfirm)(void *), char *arg, int argl) { char *av[16]; char *p; int ac; ofw = openfirm; chosenh = ofw_finddevice("/chosen"); ofw_getprop(chosenh, "stdin", &stdinh, sizeof(stdinh)); ofw_getprop(chosenh, "stdout", &stdouth, sizeof(stdouth)); ofw_getprop(chosenh, "bootargs", bootargs, sizeof(bootargs)); ofw_getprop(chosenh, "bootpath", bootpath, sizeof(bootpath)); bootargs[sizeof(bootargs) - 1] = '\0'; bootpath[sizeof(bootpath) - 1] = '\0'; p = bootpath; while (*p != '\0') { if (*p == ':') { *(++p) = '\0'; break; } p++; } ac = 0; p = bootargs; for (;;) { while (*p == ' ' && *p != '\0') p++; if (*p == '\0' || ac >= 16) break; av[ac++] = p; while (*p != ' ' && *p != '\0') p++; if (*p != '\0') *p++ = '\0'; } exit(main(ac, av)); } static ofwh_t ofw_finddevice(const char *name) { ofwcell_t args[] = { (ofwcell_t)"finddevice", 1, 1, (ofwcell_t)name, 0 }; if ((*ofw)(args)) { printf("ofw_finddevice: name=\"%s\"\n", name); return (1); } return (args[4]); } static int ofw_getprop(ofwh_t ofwh, const char *name, void *buf, size_t len) { ofwcell_t args[] = { (ofwcell_t)"getprop", 4, 1, (u_ofwh_t)ofwh, (ofwcell_t)name, (ofwcell_t)buf, len, 0 }; if ((*ofw)(args)) { printf("ofw_getprop: ofwh=0x%x buf=%p len=%u\n", ofwh, buf, len); return (1); } return (0); } static int ofw_setprop(ofwh_t ofwh, const char *name, void *buf, size_t len) { ofwcell_t args[] = { (ofwcell_t)"setprop", 4, 1, (u_ofwh_t)ofwh, (ofwcell_t)name, (ofwcell_t)buf, len, 0 }; if ((*ofw)(args)) { printf("ofw_setprop: ofwh=0x%x buf=%p len=%u\n", ofwh, buf, len); return (1); } return (0); } static ofwh_t ofw_open(const char *path) { ofwcell_t args[] = { (ofwcell_t)"open", 1, 1, (ofwcell_t)path, 0 }; if ((*ofw)(args)) { printf("ofw_open: path=\"%s\"\n", path); return (-1); } return (args[4]); } static int ofw_close(ofwh_t devh) { ofwcell_t args[] = { (ofwcell_t)"close", 1, 0, (u_ofwh_t)devh }; if ((*ofw)(args)) { printf("ofw_close: devh=0x%x\n", devh); return (1); } return (0); } static int ofw_claim(void *virt, size_t len, u_int align) { ofwcell_t args[] = { (ofwcell_t)"claim", 3, 1, (ofwcell_t)virt, len, align, 0, 0 }; if ((*ofw)(args)) { printf("ofw_claim: virt=%p len=%u\n", virt, len); return (1); } return (0); } static int ofw_read(ofwh_t devh, void *buf, size_t len) { ofwcell_t args[] = { (ofwcell_t)"read", 3, 1, (u_ofwh_t)devh, (ofwcell_t)buf, len, 0 }; if ((*ofw)(args)) { printf("ofw_read: devh=0x%x buf=%p len=%u\n", devh, buf, len); return (1); } return (0); } static int ofw_write(ofwh_t devh, const void *buf, size_t len) { ofwcell_t args[] = { (ofwcell_t)"write", 3, 1, (u_ofwh_t)devh, (ofwcell_t)buf, len, 0 }; if ((*ofw)(args)) { printf("ofw_write: devh=0x%x buf=%p len=%u\n", devh, buf, len); return (1); } return (0); } static int ofw_seek(ofwh_t devh, u_int64_t off) { ofwcell_t args[] = { (ofwcell_t)"seek", 3, 1, (u_ofwh_t)devh, off >> 32, off, 0 }; if ((*ofw)(args)) { printf("ofw_seek: devh=0x%x off=0x%lx\n", devh, off); return (1); } return (0); } static void ofw_exit(void) { ofwcell_t args[3]; args[0] = (ofwcell_t)"exit"; args[1] = 0; args[2] = 0; for (;;) (*ofw)(args); } static void bcopy(const void *src, void *dst, size_t len) { const char *s = src; char *d = dst; while (len-- != 0) *d++ = *s++; } static void memcpy(void *dst, const void *src, size_t len) { bcopy(src, dst, len); } static void bzero(void *b, size_t len) { char *p = b; while (len-- != 0) *p++ = 0; } static int strcmp(const char *s1, const char *s2) { for (; *s1 == *s2 && *s1; s1++, s2++) ; return ((u_char)*s1 - (u_char)*s2); } #include "ufsread.c" int main(int ac, char **av) { const char *path; char bootpath_full[255]; int i, len; path = _PATH_LOADER; for (i = 0; i < ac; i++) { switch (av[i][0]) { case '-': switch (av[i][1]) { default: usage(); } break; default: path = av[i]; break; } } printf(" \n>> FreeBSD/powerpc Open Firmware boot block\n" " Boot path: %s\n" " Boot loader: %s\n", bootpath, path); len = 0; while (bootpath[len] != '\0') len++; memcpy(bootpath_full,bootpath,len+1); if (bootpath_full[len-1] == ':') { for (i = 0; i < 16; i++) { if (i < 10) { bootpath_full[len] = i + '0'; bootpath_full[len+1] = '\0'; } else { bootpath_full[len] = '1'; bootpath_full[len+1] = i - 10 + '0'; bootpath_full[len+2] = '\0'; } if (domount(bootpath_full,1) >= 0) break; if (bootdev > 0) ofw_close(bootdev); } if (i >= 16) panic("domount"); } else { if (domount(bootpath_full,0) == -1) panic("domount"); } printf(" Boot volume: %s\n",bootpath_full); ofw_setprop(chosenh, "bootargs", bootpath_full, len+2); load(path); return (1); } static void usage(void) { printf("usage: boot device [/path/to/loader]\n"); exit(1); } static void exit(int code) { ofw_exit(); } static struct dmadat __dmadat; static int domount(const char *device, int quiet) { dmadat = &__dmadat; if ((bootdev = ofw_open(device)) == -1) { printf("domount: can't open device\n"); return (-1); } if (fsread(0, NULL, 0)) { if (!quiet) printf("domount: can't read superblock\n"); return (-1); } return (0); } static void load(const char *fname) { Elf32_Ehdr eh; Elf32_Phdr ph; caddr_t p; ufs_ino_t ino; int i; if ((ino = lookup(fname)) == 0) { printf("File %s not found\n", fname); return; } if (fsread(ino, &eh, sizeof(eh)) != sizeof(eh)) { printf("Can't read elf header\n"); return; } if (!IS_ELF(eh)) { printf("Not an ELF file\n"); return; } for (i = 0; i < eh.e_phnum; i++) { fs_off = eh.e_phoff + i * eh.e_phentsize; if (fsread(ino, &ph, sizeof(ph)) != sizeof(ph)) { printf("Can't read program header %d\n", i); return; } if (ph.p_type != PT_LOAD) continue; fs_off = ph.p_offset; p = (caddr_t)ph.p_vaddr; ofw_claim(p,(ph.p_filesz > ph.p_memsz) ? ph.p_filesz : ph.p_memsz,0); if (fsread(ino, p, ph.p_filesz) != ph.p_filesz) { printf("Can't read content of section %d\n", i); return; } if (ph.p_filesz != ph.p_memsz) bzero(p + ph.p_filesz, ph.p_memsz - ph.p_filesz); __syncicache(p, ph.p_memsz); } ofw_close(bootdev); (*(void (*)(void *, int, ofwfp_t, char *, int))eh.e_entry)(NULL, 0, ofw,NULL,0); } static int dskread(void *buf, u_int64_t lba, int nblk) { /* * The Open Firmware should open the correct partition for us. * That means, if we read from offset zero on an open instance handle, * we should read from offset zero of that partition. */ ofw_seek(bootdev, lba * DEV_BSIZE); ofw_read(bootdev, buf, nblk * DEV_BSIZE); return (0); } static void panic(const char *fmt, ...) { char buf[128]; va_list ap; va_start(ap, fmt); vsnprintf(buf, sizeof buf, fmt, ap); printf("panic: %s\n", buf); va_end(ap); exit(1); } static int printf(const char *fmt, ...) { va_list ap; int ret; - /* Don't annoy the user as we probe for partitions */ - if (strcmp(fmt,"Not ufs\n") == 0) - return 0; - va_start(ap, fmt); ret = vprintf(fmt, ap); va_end(ap); return (ret); } static int putchar(char c, void *arg) { char buf; if (c == '\n') { buf = '\r'; ofw_write(stdouth, &buf, 1); } buf = c; ofw_write(stdouth, &buf, 1); return (1); } static int vprintf(const char *fmt, va_list ap) { int ret; ret = __printf(fmt, putchar, 0, ap); return (ret); } static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap) { struct sp_data sp; int ret; sp.sp_buf = str; sp.sp_len = 0; sp.sp_size = sz; ret = __printf(fmt, __sputc, &sp, ap); return (ret); } static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap) { char buf[(sizeof(long) * 8) + 1]; char *nbuf; u_long ul; u_int ui; int lflag; int sflag; char *s; int pad; int ret; int c; nbuf = &buf[sizeof buf - 1]; ret = 0; while ((c = *fmt++) != 0) { if (c != '%') { ret += putc(c, arg); continue; } lflag = 0; sflag = 0; pad = 0; reswitch: c = *fmt++; switch (c) { case '#': sflag = 1; goto reswitch; case '%': ret += putc('%', arg); break; case 'c': c = va_arg(ap, int); ret += putc(c, arg); break; case 'd': if (lflag == 0) { ui = (u_int)va_arg(ap, int); if (ui < (int)ui) { ui = -ui; ret += putc('-', arg); } s = __uitoa(nbuf, ui, 10); } else { ul = (u_long)va_arg(ap, long); if (ul < (long)ul) { ul = -ul; ret += putc('-', arg); } s = __ultoa(nbuf, ul, 10); } ret += __puts(s, putc, arg); break; case 'l': lflag = 1; goto reswitch; case 'o': if (lflag == 0) { ui = (u_int)va_arg(ap, u_int); s = __uitoa(nbuf, ui, 8); } else { ul = (u_long)va_arg(ap, u_long); s = __ultoa(nbuf, ul, 8); } ret += __puts(s, putc, arg); break; case 'p': ul = (u_long)va_arg(ap, void *); s = __ultoa(nbuf, ul, 16); ret += __puts("0x", putc, arg); ret += __puts(s, putc, arg); break; case 's': s = va_arg(ap, char *); ret += __puts(s, putc, arg); break; case 'u': if (lflag == 0) { ui = va_arg(ap, u_int); s = __uitoa(nbuf, ui, 10); } else { ul = va_arg(ap, u_long); s = __ultoa(nbuf, ul, 10); } ret += __puts(s, putc, arg); break; case 'x': if (lflag == 0) { ui = va_arg(ap, u_int); s = __uitoa(nbuf, ui, 16); } else { ul = va_arg(ap, u_long); s = __ultoa(nbuf, ul, 16); } if (sflag) ret += __puts("0x", putc, arg); ret += __puts(s, putc, arg); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': pad = pad * 10 + c - '0'; goto reswitch; default: break; } } return (ret); } static int __sputc(char c, void *arg) { struct sp_data *sp; sp = arg; if (sp->sp_len < sp->sp_size) sp->sp_buf[sp->sp_len++] = c; sp->sp_buf[sp->sp_len] = '\0'; return (1); } static int __puts(const char *s, putc_func_t *putc, void *arg) { const char *p; int ret; ret = 0; for (p = s; *p != '\0'; p++) ret += putc(*p, arg); return (ret); } static char * __uitoa(char *buf, u_int ui, int base) { char *p; p = buf; *p = '\0'; do *--p = digits[ui % base]; while ((ui /= base) != 0); return (p); } static char * __ultoa(char *buf, u_long ul, int base) { char *p; p = buf; *p = '\0'; do *--p = digits[ul % base]; while ((ul /= base) != 0); return (p); } Index: projects/clang380-import/sys/boot/powerpc/boot1.chrp =================================================================== --- projects/clang380-import/sys/boot/powerpc/boot1.chrp (revision 293686) +++ projects/clang380-import/sys/boot/powerpc/boot1.chrp (revision 293687) Property changes on: projects/clang380-import/sys/boot/powerpc/boot1.chrp ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/powerpc/boot1.chrp:r292913-293685 Index: projects/clang380-import/sys/boot/userboot/libstand/Makefile =================================================================== --- projects/clang380-import/sys/boot/userboot/libstand/Makefile (revision 293686) +++ projects/clang380-import/sys/boot/userboot/libstand/Makefile (revision 293687) @@ -1,136 +1,12 @@ # $FreeBSD$ -# Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ -# -# Notes: -# - We don't use the libc strerror/sys_errlist because the string table is -# quite large. -# -MAN= +.include -.include -MK_SSP= no - LIBSTAND_SRC= ${.CURDIR}/../../../../lib/libstand -LIBC_SRC= ${LIBSTAND_SRC}/../libc -.PATH: ${LIBSTAND_SRC} -LIB= stand INTERNALLIB= -MK_PROFILE= no -NO_PIC= +INCS= +MAN= +.PATH: ${LIBSTAND_SRC} -WARNS?= 0 - -# standalone components and stuff we have modified locally -SRCS+= gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c gets.c \ - globals.c pager.c printf.c strdup.c strerror.c strtol.c strtoul.c random.c \ - sbrk.c twiddle.c zalloc.c zalloc_malloc.c - -# private (pruned) versions of libc string functions -SRCS+= strcasecmp.c - -.PATH: ${LIBC_SRC}/net - -SRCS+= ntoh.c - -# string functions from libc -.PATH: ${LIBC_SRC}/string -.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "powerpc" || \ - ${MACHINE_CPUARCH} == "sparc64" || ${MACHINE_CPUARCH} == "amd64" || \ - ${MACHINE_CPUARCH} == "arm" -SRCS+= bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \ - memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \ - strcspn.c strlen.c strncat.c strncmp.c strncpy.c strpbrk.c \ - strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c -.endif -.if ${MACHINE_CPUARCH} == "arm" -.PATH: ${LIBC_SRC}/arm/gen -SRCS+= divsi3.S -.endif -.if ${MACHINE_CPUARCH} == "powerpc" -.PATH: ${LIBC_SRC}/quad -SRCS+= ashldi3.c ashrdi3.c -.PATH: ${LIBC_SRC}/powerpc/gen -SRCS+= syncicache.c -.endif - -# uuid functions from libc -.PATH: ${LIBC_SRC}/uuid -SRCS+= uuid_equal.c uuid_is_nil.c - -# _setjmp/_longjmp -.if ${MACHINE_CPUARCH} == "amd64" -.PATH: ${LIBSTAND_SRC}/amd64 -.elif ${MACHINE_ARCH} == "powerpc64" -.PATH: ${LIBSTAND_SRC}/powerpc -.else -.PATH: ${LIBSTAND_SRC}/${MACHINE_CPUARCH} -.endif -SRCS+= _setjmp.S - -# decompression functionality from libbz2 -# NOTE: to actually test this functionality after libbz2 upgrade compile -# loader(8) with LOADER_BZIP2_SUPPORT defined -.PATH: ${LIBSTAND_SRC}/../../contrib/bzip2 -CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS -SRCS+= libstand_bzlib_private.h - -.for file in bzlib.c crctable.c decompress.c huffman.c randtable.c -SRCS+= _${file} -CLEANFILES+= _${file} - -_${file}: ${file} - sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \ - ${.ALLSRC} > ${.TARGET} -.endfor - -CLEANFILES+= libstand_bzlib_private.h -libstand_bzlib_private.h: bzlib_private.h - sed -e 's||"stand.h"|' \ - ${.ALLSRC} > ${.TARGET} - -# decompression functionality from libz -.PATH: ${LIBSTAND_SRC}/../libz -CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz -SRCS+= adler32.c crc32.c libstand_zutil.h libstand_gzguts.h - -.for file in infback.c inffast.c inflate.c inftrees.c zutil.c -SRCS+= _${file} -CLEANFILES+= _${file} - -_${file}: ${file} - sed -e "s|zutil\.h|libstand_zutil.h|" \ - -e "s|gzguts\.h|libstand_gzguts.h|" \ - ${.ALLSRC} > ${.TARGET} -.endfor - -# depend on stand.h being able to be included multiple times -.for file in zutil.h gzguts.h -CLEANFILES+= libstand_${file} -libstand_${file}: ${file} - sed -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - ${.ALLSRC} > ${.TARGET} -.endfor - -# io routines -SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ - fstat.c close.c lseek.c open.c read.c write.c readdir.c - -# network routines -SRCS+= arp.c ether.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c - -# network info services: -SRCS+= bootp.c rarp.c bootparam.c - -# boot filesystems -SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c -SRCS+= dosfs.c ext2fs.c -SRCS+= splitfs.c - -.include -.include +.include "${LIBSTAND_SRC}/Makefile" Index: projects/clang380-import/sys/boot/userboot/libstand/Makefile.depend =================================================================== --- projects/clang380-import/sys/boot/userboot/libstand/Makefile.depend (revision 293686) +++ projects/clang380-import/sys/boot/userboot/libstand/Makefile.depend (revision 293687) @@ -1,58 +1,57 @@ # $FreeBSD$ # Autogenerated - do NOT edit! DIRDEPS = \ include \ include/arpa \ include/xlocale \ lib/libbz2 \ - lib/libstand \ .include .if ${DEP_RELDIR} == ${_DEP_RELDIR} # local dependencies - needed for -jN in clean tree _bzlib.o: _bzlib.c _bzlib.o: libstand_bzlib_private.h _bzlib.po: _bzlib.c _bzlib.po: libstand_bzlib_private.h _crctable.o: _crctable.c _crctable.o: libstand_bzlib_private.h _crctable.po: _crctable.c _crctable.po: libstand_bzlib_private.h _decompress.o: _decompress.c _decompress.o: libstand_bzlib_private.h _decompress.po: _decompress.c _decompress.po: libstand_bzlib_private.h _huffman.o: _huffman.c _huffman.o: libstand_bzlib_private.h _huffman.po: _huffman.c _huffman.po: libstand_bzlib_private.h _infback.o: _infback.c _infback.o: libstand_zutil.h _infback.po: _infback.c _infback.po: libstand_zutil.h _inffast.o: _inffast.c _inffast.o: libstand_zutil.h _inffast.po: _inffast.c _inffast.po: libstand_zutil.h _inflate.o: _inflate.c _inflate.o: libstand_zutil.h _inflate.po: _inflate.c _inflate.po: libstand_zutil.h _inftrees.o: _inftrees.c _inftrees.o: libstand_zutil.h _inftrees.po: _inftrees.c _inftrees.po: libstand_zutil.h _randtable.o: _randtable.c _randtable.o: libstand_bzlib_private.h _randtable.po: _randtable.c _randtable.po: libstand_bzlib_private.h _zutil.o: _zutil.c _zutil.o: libstand_gzguts.h _zutil.o: libstand_zutil.h _zutil.po: _zutil.c _zutil.po: libstand_gzguts.h _zutil.po: libstand_zutil.h .endif Index: projects/clang380-import/sys/boot/userboot/userboot/main.c =================================================================== --- projects/clang380-import/sys/boot/userboot/userboot/main.c (revision 293686) +++ projects/clang380-import/sys/boot/userboot/userboot/main.c (revision 293687) @@ -1,315 +1,320 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 1998,2000 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "bootstrap.h" #include "disk.h" #include "libuserboot.h" #if defined(USERBOOT_ZFS_SUPPORT) #include "../zfs/libzfs.h" static void userboot_zfs_probe(void); static int userboot_zfs_found; static void init_zfs_bootenv(char *currdev); #endif #define USERBOOT_VERSION USERBOOT_VERSION_3 #define MALLOCSZ (10*1024*1024) struct loader_callbacks *callbacks; void *callbacks_arg; extern char bootprog_name[]; extern char bootprog_rev[]; extern char bootprog_date[]; extern char bootprog_maker[]; static jmp_buf jb; struct arch_switch archsw; /* MI/MD interface boundary */ static void extract_currdev(void); void delay(int usec) { CALLBACK(delay, usec); } void exit(int v) { CALLBACK(exit, v); longjmp(jb, 1); } void loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) { static char mallocbuf[MALLOCSZ]; const char *var; int i; if (version != USERBOOT_VERSION) abort(); callbacks = cb; callbacks_arg = arg; userboot_disk_maxunit = ndisks; /* * initialise the heap as early as possible. Once this is done, * alloc() is usable. */ setheap((void *)mallocbuf, (void *)(mallocbuf + sizeof(mallocbuf))); /* * Hook up the console */ cons_probe(); printf("\n"); printf("%s, Revision %s\n", bootprog_name, bootprog_rev); printf("(%s, %s)\n", bootprog_maker, bootprog_date); #if 0 printf("Memory: %ld k\n", memsize() / 1024); #endif setenv("LINES", "24", 1); /* optional */ /* * Set custom environment variables */ i = 0; while (1) { var = CALLBACK(getenv, i++); if (var == NULL) break; putenv(var); } archsw.arch_autoload = userboot_autoload; archsw.arch_getdev = userboot_getdev; archsw.arch_copyin = userboot_copyin; archsw.arch_copyout = userboot_copyout; archsw.arch_readin = userboot_readin; #if defined(USERBOOT_ZFS_SUPPORT) archsw.arch_zfs_probe = userboot_zfs_probe; #endif /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); extract_currdev(); if (setjmp(jb)) return; interact(NULL); /* doesn't return */ exit(0); } /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. */ static void extract_currdev(void) { struct disk_devdesc dev; //bzero(&dev, sizeof(dev)); #if defined(USERBOOT_ZFS_SUPPORT) if (userboot_zfs_found) { struct zfs_devdesc zdev; /* Leave the pool/root guid's unassigned */ bzero(&zdev, sizeof(zdev)); zdev.d_dev = &zfs_dev; zdev.d_type = zdev.d_dev->dv_type; dev = *(struct disk_devdesc *)&zdev; + init_zfs_bootenv(zfs_fmtdev(&dev)); } else #endif if (userboot_disk_maxunit > 0) { dev.d_dev = &userboot_disk; dev.d_type = dev.d_dev->dv_type; dev.d_unit = 0; dev.d_slice = 0; dev.d_partition = 0; /* * If we cannot auto-detect the partition type then * access the disk as a raw device. */ if (dev.d_dev->dv_open(NULL, &dev)) { dev.d_slice = -1; dev.d_partition = -1; } } else { dev.d_dev = &host_dev; dev.d_type = dev.d_dev->dv_type; dev.d_unit = 0; } -#if defined(USERBOOT_ZFS_SUPPORT) - init_zfs_bootenv(zfs_fmtdev(&dev)); -#endif - env_setenv("currdev", EV_VOLATILE, userboot_fmtdev(&dev), userboot_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, userboot_fmtdev(&dev), env_noset, env_nounset); } #if defined(USERBOOT_ZFS_SUPPORT) static void init_zfs_bootenv(char *currdev) { char *beroot; + if (strlen(currdev) == 0) + return; + if(strncmp(currdev, "zfs:", 4) != 0) + return; /* Remove the trailing : */ currdev[strlen(currdev) - 1] = '\0'; setenv("zfs_be_active", currdev, 1); + setenv("zfs_be_currpage", "1", 1); /* Do not overwrite if already set */ setenv("vfs.root.mountfrom", currdev, 0); /* Forward past zfs: */ currdev = strchr(currdev, ':'); currdev++; /* Remove the last element (current bootenv) */ beroot = strrchr(currdev, '/'); if (beroot != NULL) beroot[0] = '\0'; - beroot = currdev; - setenv("zfs_be_root", beroot, 1); } static void userboot_zfs_probe(void) { char devname[32]; uint64_t pool_guid; int unit; /* * Open all the disks we can find and see if we can reconstruct * ZFS pools from them. Record if any were found. */ for (unit = 0; unit < userboot_disk_maxunit; unit++) { sprintf(devname, "disk%d:", unit); pool_guid = 0; zfs_probe_dev(devname, &pool_guid); if (pool_guid != 0) userboot_zfs_found = 1; } } COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset", command_lszfs); static int command_lszfs(int argc, char *argv[]) { int err; if (argc != 2) { command_errmsg = "a single dataset must be supplied"; return (CMD_ERROR); } err = zfs_list(argv[1]); if (err != 0) { command_errmsg = strerror(err); return (CMD_ERROR); } return (CMD_OK); } COMMAND_SET(reloadbe, "reloadbe", "refresh the list of ZFS Boot Environments", command_reloadbe); static int command_reloadbe(int argc, char *argv[]) { int err; + char *root; if (argc > 2) { command_errmsg = "wrong number of arguments"; return (CMD_ERROR); } if (argc == 2) { err = zfs_bootenv(argv[1]); } else { - err = zfs_bootenv(getenv("zfs_be_root")); + root = getenv("zfs_be_root"); + if (root == NULL) { + return (CMD_OK); + } + err = zfs_bootenv(root); } if (err != 0) { command_errmsg = strerror(err); return (CMD_ERROR); } return (CMD_OK); } #endif /* USERBOOT_ZFS_SUPPORT */ COMMAND_SET(quit, "quit", "exit the loader", command_quit); static int command_quit(int argc, char *argv[]) { exit(USERBOOT_EXIT_QUIT); return (CMD_OK); } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { exit(USERBOOT_EXIT_REBOOT); return (CMD_OK); } Index: projects/clang380-import/sys/boot/zfs/zfs.c =================================================================== --- projects/clang380-import/sys/boot/zfs/zfs.c (revision 293686) +++ projects/clang380-import/sys/boot/zfs/zfs.c (revision 293687) @@ -1,860 +1,865 @@ /*- * Copyright (c) 2007 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone file reading package. */ #include #include #include #include #include #include #include #include #include #include #include "libzfs.h" #include "zfsimpl.c" /* Define the range of indexes to be populated with ZFS Boot Environments */ #define ZFS_BE_FIRST 4 #define ZFS_BE_LAST 8 static int zfs_open(const char *path, struct open_file *f); static int zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid); static int zfs_close(struct open_file *f); static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t zfs_seek(struct open_file *f, off_t offset, int where); static int zfs_stat(struct open_file *f, struct stat *sb); static int zfs_readdir(struct open_file *f, struct dirent *d); struct devsw zfs_dev; struct fs_ops zfs_fsops = { "zfs", zfs_open, zfs_close, zfs_read, zfs_write, zfs_seek, zfs_stat, zfs_readdir }; /* * In-core open file. */ struct file { off_t f_seekp; /* seek pointer */ dnode_phys_t f_dnode; uint64_t f_zap_type; /* zap type for readdir */ uint64_t f_num_leafs; /* number of fzap leaf blocks */ zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */ }; static int zfs_env_index; static int zfs_env_count; SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head); struct zfs_be_list *zfs_be_headp; struct zfs_be_entry { const char *name; SLIST_ENTRY(zfs_be_entry) entries; } *zfs_be, *zfs_be_tmp; /* * Open a file. */ static int zfs_open(const char *upath, struct open_file *f) { struct zfsmount *mount = (struct zfsmount *)f->f_devdata; struct file *fp; int rc; if (f->f_dev != &zfs_dev) return (EINVAL); /* allocate file system specific data structure */ fp = malloc(sizeof(struct file)); bzero(fp, sizeof(struct file)); f->f_fsdata = (void *)fp; rc = zfs_lookup(mount, upath, &fp->f_dnode); fp->f_seekp = 0; if (rc) { f->f_fsdata = NULL; free(fp); } return (rc); } static int zfs_close(struct open_file *f) { struct file *fp = (struct file *)f->f_fsdata; dnode_cache_obj = 0; f->f_fsdata = (void *)0; if (fp == (struct file *)0) return (0); free(fp); return (0); } /* * Copy a portion of a file into kernel memory. * Cross block boundaries when necessary. */ static int zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; struct stat sb; size_t n; int rc; rc = zfs_stat(f, &sb); if (rc) return (rc); n = size; if (fp->f_seekp + n > sb.st_size) n = sb.st_size - fp->f_seekp; rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); if (rc) return (rc); if (0) { int i; for (i = 0; i < n; i++) putchar(((char*) start)[i]); } fp->f_seekp += n; if (resid) *resid = size - n; return (0); } /* * Don't be silly - the bootstrap has no business writing anything. */ static int zfs_write(struct open_file *f, void *start, size_t size, size_t *resid /* out */) { return (EROFS); } static off_t zfs_seek(struct open_file *f, off_t offset, int where) { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_seekp = offset; break; case SEEK_CUR: fp->f_seekp += offset; break; case SEEK_END: { struct stat sb; int error; error = zfs_stat(f, &sb); if (error != 0) { errno = error; return (-1); } fp->f_seekp = sb.st_size - offset; break; } default: errno = EINVAL; return (-1); } return (fp->f_seekp); } static int zfs_stat(struct open_file *f, struct stat *sb) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; return (zfs_dnode_stat(spa, &fp->f_dnode, sb)); } static int zfs_readdir(struct open_file *f, struct dirent *d) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; mzap_ent_phys_t mze; struct stat sb; size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; int rc; rc = zfs_stat(f, &sb); if (rc) return (rc); if (!S_ISDIR(sb.st_mode)) return (ENOTDIR); /* * If this is the first read, get the zap type. */ if (fp->f_seekp == 0) { rc = dnode_read(spa, &fp->f_dnode, 0, &fp->f_zap_type, sizeof(fp->f_zap_type)); if (rc) return (rc); if (fp->f_zap_type == ZBT_MICRO) { fp->f_seekp = offsetof(mzap_phys_t, mz_chunk); } else { rc = dnode_read(spa, &fp->f_dnode, offsetof(zap_phys_t, zap_num_leafs), &fp->f_num_leafs, sizeof(fp->f_num_leafs)); if (rc) return (rc); fp->f_seekp = bsize; fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, fp->f_zap_leaf, bsize); if (rc) return (rc); } } if (fp->f_zap_type == ZBT_MICRO) { mzap_next: if (fp->f_seekp >= bsize) return (ENOENT); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, &mze, sizeof(mze)); if (rc) return (rc); fp->f_seekp += sizeof(mze); if (!mze.mze_name[0]) goto mzap_next; d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value); d->d_type = ZFS_DIRENT_TYPE(mze.mze_value); strcpy(d->d_name, mze.mze_name); d->d_namlen = strlen(d->d_name); return (0); } else { zap_leaf_t zl; zap_leaf_chunk_t *zc, *nc; int chunk; size_t namelen; char *p; uint64_t value; /* * Initialise this so we can use the ZAP size * calculating macros. */ zl.l_bs = ilog2(bsize); zl.l_phys = fp->f_zap_leaf; /* * Figure out which chunk we are currently looking at * and consider seeking to the next leaf. We use the * low bits of f_seekp as a simple chunk index. */ fzap_next: chunk = fp->f_seekp & (bsize - 1); if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) { fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize; chunk = 0; /* * Check for EOF and read the new leaf. */ if (fp->f_seekp >= bsize * fp->f_num_leafs) return (ENOENT); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, fp->f_zap_leaf, bsize); if (rc) return (rc); } zc = &ZAP_LEAF_CHUNK(&zl, chunk); fp->f_seekp++; if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) goto fzap_next; namelen = zc->l_entry.le_name_numints; if (namelen > sizeof(d->d_name)) namelen = sizeof(d->d_name); /* * Paste the name back together. */ nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); p = d->d_name; while (namelen > 0) { int len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; memcpy(p, nc->l_array.la_array, len); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); } d->d_name[sizeof(d->d_name) - 1] = 0; /* * Assume the first eight bytes of the value are * a uint64_t. */ value = fzap_leaf_value(&zl, zc); d->d_fileno = ZFS_DIRENT_OBJ(value); d->d_type = ZFS_DIRENT_TYPE(value); d->d_namlen = strlen(d->d_name); return (0); } } static int vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size) { int fd; fd = (uintptr_t) priv; lseek(fd, offset, SEEK_SET); if (read(fd, buf, size) == size) { return 0; } else { return (EIO); } } static int zfs_dev_init(void) { spa_t *spa; spa_t *next; spa_t *prev; zfs_init(); if (archsw.arch_zfs_probe == NULL) return (ENXIO); archsw.arch_zfs_probe(); prev = NULL; spa = STAILQ_FIRST(&zfs_pools); while (spa != NULL) { next = STAILQ_NEXT(spa, spa_link); if (zfs_spa_init(spa)) { if (prev == NULL) STAILQ_REMOVE_HEAD(&zfs_pools, spa_link); else STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link); } else prev = spa; spa = next; } return (0); } struct zfs_probe_args { int fd; const char *devname; uint64_t *pool_guid; - uint16_t secsz; + u_int secsz; }; static int zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset) { struct zfs_probe_args *ppa; ppa = (struct zfs_probe_args *)arg; return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd, offset * ppa->secsz, buf, blocks * ppa->secsz)); } static int zfs_probe(int fd, uint64_t *pool_guid) { spa_t *spa; int ret; ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); if (ret == 0 && pool_guid != NULL) *pool_guid = spa->spa_guid; return (ret); } static void zfs_probe_partition(void *arg, const char *partname, const struct ptable_entry *part) { struct zfs_probe_args *ppa, pa; struct ptable *table; char devname[32]; int ret; /* Probe only freebsd-zfs and freebsd partitions */ if (part->type != PART_FREEBSD && part->type != PART_FREEBSD_ZFS) return; ppa = (struct zfs_probe_args *)arg; strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); devname[strlen(ppa->devname) - 1] = '\0'; sprintf(devname, "%s%s:", devname, partname); pa.fd = open(devname, O_RDONLY); if (pa.fd == -1) return; ret = zfs_probe(pa.fd, ppa->pool_guid); if (ret == 0) return; /* Do we have BSD label here? */ if (part->type == PART_FREEBSD) { pa.devname = devname; pa.pool_guid = ppa->pool_guid; pa.secsz = ppa->secsz; table = ptable_open(&pa, part->end - part->start + 1, ppa->secsz, zfs_diskread); if (table != NULL) { ptable_iterate(table, &pa, zfs_probe_partition); ptable_close(table); } } close(pa.fd); } int zfs_probe_dev(const char *devname, uint64_t *pool_guid) { struct ptable *table; struct zfs_probe_args pa; off_t mediasz; int ret; pa.fd = open(devname, O_RDONLY); if (pa.fd == -1) return (ENXIO); /* Probe the whole disk */ ret = zfs_probe(pa.fd, pool_guid); if (ret == 0) return (0); /* Probe each partition */ ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); if (ret == 0) ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz); if (ret == 0) { pa.devname = devname; pa.pool_guid = pool_guid; table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz, zfs_diskread); if (table != NULL) { ptable_iterate(table, &pa, zfs_probe_partition); ptable_close(table); } } close(pa.fd); return (ret); } /* * Print information about ZFS pools */ static void zfs_dev_print(int verbose) { spa_t *spa; char line[80]; if (verbose) { spa_all_status(); return; } STAILQ_FOREACH(spa, &zfs_pools, spa_link) { sprintf(line, " zfs:%s\n", spa->spa_name); pager_output(line); } } /* * Attempt to open the pool described by (dev) for use by (f). */ static int zfs_dev_open(struct open_file *f, ...) { va_list args; struct zfs_devdesc *dev; struct zfsmount *mount; spa_t *spa; int rv; va_start(args, f); dev = va_arg(args, struct zfs_devdesc *); va_end(args); if (dev->pool_guid == 0) spa = STAILQ_FIRST(&zfs_pools); else spa = spa_find_by_guid(dev->pool_guid); if (!spa) return (ENXIO); mount = malloc(sizeof(*mount)); rv = zfs_mount(spa, dev->root_guid, mount); if (rv != 0) { free(mount); return (rv); } if (mount->objset.os_type != DMU_OST_ZFS) { printf("Unexpected object set type %ju\n", (uintmax_t)mount->objset.os_type); free(mount); return (EIO); } f->f_devdata = mount; free(dev); return (0); } static int zfs_dev_close(struct open_file *f) { free(f->f_devdata); f->f_devdata = NULL; return (0); } static int zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) { return (ENOSYS); } struct devsw zfs_dev = { .dv_name = "zfs", .dv_type = DEVT_ZFS, .dv_init = zfs_dev_init, .dv_strategy = zfs_dev_strategy, .dv_open = zfs_dev_open, .dv_close = zfs_dev_close, .dv_ioctl = noioctl, .dv_print = zfs_dev_print, .dv_cleanup = NULL }; int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) { static char rootname[ZFS_MAXNAMELEN]; static char poolname[ZFS_MAXNAMELEN]; spa_t *spa; const char *end; const char *np; const char *sep; int rv; np = devspec; if (*np != ':') return (EINVAL); np++; end = strchr(np, ':'); if (end == NULL) return (EINVAL); sep = strchr(np, '/'); if (sep == NULL || sep >= end) sep = end; memcpy(poolname, np, sep - np); poolname[sep - np] = '\0'; if (sep < end) { sep++; memcpy(rootname, sep, end - sep); rootname[end - sep] = '\0'; } else rootname[0] = '\0'; spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); dev->pool_guid = spa->spa_guid; rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid); if (rv != 0) return (rv); if (path != NULL) *path = (*end == '\0') ? end : end + 1; dev->d_dev = &zfs_dev; dev->d_type = zfs_dev.dv_type; return (0); } char * zfs_fmtdev(void *vdev) { static char rootname[ZFS_MAXNAMELEN]; static char buf[2 * ZFS_MAXNAMELEN + 8]; struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; buf[0] = '\0'; if (dev->d_type != DEVT_ZFS) return (buf); if (dev->pool_guid == 0) { spa = STAILQ_FIRST(&zfs_pools); dev->pool_guid = spa->spa_guid; } else spa = spa_find_by_guid(dev->pool_guid); if (spa == NULL) { printf("ZFS: can't find pool by guid\n"); return (buf); } if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) { printf("ZFS: can't find root filesystem\n"); return (buf); } if (zfs_rlookup(spa, dev->root_guid, rootname)) { printf("ZFS: can't find filesystem by guid\n"); return (buf); } if (rootname[0] == '\0') sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name); else sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name, rootname); return (buf); } int zfs_list(const char *name) { static char poolname[ZFS_MAXNAMELEN]; uint64_t objid; spa_t *spa; const char *dsname; int len; int rv; len = strlen(name); dsname = strchr(name, '/'); if (dsname != NULL) { len = dsname - name; dsname++; } else dsname = ""; memcpy(poolname, name, len); poolname[len] = '\0'; spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); rv = zfs_lookup_dataset(spa, dsname, &objid); if (rv != 0) return (rv); return (zfs_list_dataset(spa, objid)); } int zfs_bootenv(const char *name) { - static char poolname[ZFS_MAXNAMELEN], *dsname; + static char poolname[ZFS_MAXNAMELEN], *dsname, *root; char becount[4]; uint64_t objid; spa_t *spa; int len, rv, pages, perpage, currpage; - if (strcmp(name, getenv("zfs_be_root")) != 0) { + if (name == NULL) + return (EINVAL); + if ((root = getenv("zfs_be_root")) == NULL) + return (EINVAL); + + if (strcmp(name, root) != 0) { if (setenv("zfs_be_root", name, 1) != 0) return (ENOMEM); } SLIST_INIT(&zfs_be_head); zfs_env_count = 0; len = strlen(name); dsname = strchr(name, '/'); if (dsname != NULL) { len = dsname - name; dsname++; } else dsname = ""; memcpy(poolname, name, len); poolname[len] = '\0'; spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); rv = zfs_lookup_dataset(spa, dsname, &objid); if (rv != 0) return (rv); rv = zfs_callback_dataset(spa, objid, zfs_belist_add); /* Calculate and store the number of pages of BEs */ perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1); pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0); snprintf(becount, 4, "%d", pages); if (setenv("zfs_be_pages", becount, 1) != 0) return (ENOMEM); /* Roll over the page counter if it has exceeded the maximum */ currpage = strtol(getenv("zfs_be_currpage"), NULL, 10); if (currpage > pages) { if (setenv("zfs_be_currpage", "1", 1) != 0) return (ENOMEM); } /* Populate the menu environment variables */ zfs_set_env(); /* Clean up the SLIST of ZFS BEs */ while (!SLIST_EMPTY(&zfs_be_head)) { zfs_be = SLIST_FIRST(&zfs_be_head); SLIST_REMOVE_HEAD(&zfs_be_head, entries); free(zfs_be); } return (rv); } int zfs_belist_add(const char *name) { /* Add the boot environment to the head of the SLIST */ zfs_be = malloc(sizeof(struct zfs_be_entry)); zfs_be->name = name; SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries); zfs_env_count++; return (0); } int zfs_set_env(void) { char envname[32], envval[256]; char *beroot, *pagenum; int rv, page, ctr; beroot = getenv("zfs_be_root"); if (beroot == NULL) { return (1); } pagenum = getenv("zfs_be_currpage"); if (pagenum != NULL) { page = strtol(pagenum, NULL, 10); } else { page = 1; } ctr = 1; rv = 0; zfs_env_index = ZFS_BE_FIRST; SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { /* Skip to the requested page number */ if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) { ctr++; continue; } snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); snprintf(envval, sizeof(envval), "%s", zfs_be->name); rv = setenv(envname, envval, 1); if (rv != 0) { break; } snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); rv = setenv(envname, envval, 1); if (rv != 0){ break; } snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); rv = setenv(envname, "set_bootenv", 1); if (rv != 0){ break; } snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name); rv = setenv(envname, envval, 1); if (rv != 0){ break; } zfs_env_index++; if (zfs_env_index > ZFS_BE_LAST) { break; } } for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) { snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); (void)unsetenv(envname); } return (rv); } \ No newline at end of file Index: projects/clang380-import/sys/boot =================================================================== --- projects/clang380-import/sys/boot (revision 293686) +++ projects/clang380-import/sys/boot (revision 293687) Property changes on: projects/clang380-import/sys/boot ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot:r293430-293685 Index: projects/clang380-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c =================================================================== --- projects/clang380-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c (revision 293686) +++ projects/clang380-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c (revision 293687) @@ -1,917 +1,983 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2006 Pawel Jakub Dawidek * All rights reserved. * * Portions Copyright (c) 2012 Martin Matuska */ #include #include #include #include #include #include #include #include #include #include #include #include /* * Virtual device vector for GEOM. */ static g_attrchanged_t vdev_geom_attrchanged; struct g_class zfs_vdev_class = { .name = "ZFS::VDEV", .version = G_VERSION, .attrchanged = vdev_geom_attrchanged, }; DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); SYSCTL_DECL(_vfs_zfs_vdev); /* Don't send BIO_FLUSH. */ static int vdev_geom_bio_flush_disable; SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN, &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); /* Don't send BIO_DELETE. */ static int vdev_geom_bio_delete_disable; SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN, &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); static void vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) { int error; uint16_t rate; error = g_getattr("GEOM::rotation_rate", cp, &rate); if (error == 0) vd->vdev_rotation_rate = rate; else vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; } static void vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) { vdev_t *vd; + spa_t *spa; + char *physpath; + int error, physpath_len; vd = cp->private; if (vd == NULL) return; if (strcmp(attr, "GEOM::rotation_rate") == 0) { vdev_geom_set_rotation_rate(vd, cp); return; } + + if (strcmp(attr, "GEOM::physpath") != 0) + return; + + if (g_access(cp, 1, 0, 0) != 0) + return; + + /* + * Record/Update physical path information for this device. + */ + spa = vd->vdev_spa; + physpath_len = MAXPATHLEN; + physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); + error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); + g_access(cp, -1, 0, 0); + if (error == 0) { + char *old_physpath; + + old_physpath = vd->vdev_physpath; + vd->vdev_physpath = spa_strdup(physpath); + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); + + if (old_physpath != NULL) { + int held_lock; + + held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER); + if (held_lock == 0) { + g_topology_unlock(); + spa_config_enter(spa, SCL_STATE, FTAG, + RW_WRITER); + } + + spa_strfree(old_physpath); + + if (held_lock == 0) { + spa_config_exit(spa, SCL_STATE, FTAG); + g_topology_lock(); + } + } + } + g_free(physpath); } static void vdev_geom_orphan(struct g_consumer *cp) { vdev_t *vd; g_topology_assert(); vd = cp->private; - if (vd == NULL) + if (vd == NULL) { + /* Vdev close in progress. Ignore the event. */ return; + } /* * Orphan callbacks occur from the GEOM event thread. * Concurrent with this call, new I/O requests may be * working their way through GEOM about to find out * (only once executed by the g_down thread) that we've * been orphaned from our disk provider. These I/Os * must be retired before we can detach our consumer. * This is most easily achieved by acquiring the * SPA ZIO configuration lock as a writer, but doing * so with the GEOM topology lock held would cause * a lock order reversal. Instead, rely on the SPA's * async removal support to invoke a close on this * vdev once it is safe to do so. */ zfs_post_remove(vd->vdev_spa, vd); vd->vdev_remove_wanted = B_TRUE; spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); } static struct g_consumer * -vdev_geom_attach(struct g_provider *pp) +vdev_geom_attach(struct g_provider *pp, vdev_t *vd) { struct g_geom *gp; struct g_consumer *cp; g_topology_assert(); ZFS_LOG(1, "Attaching to %s.", pp->name); /* Do we have geom already? No? Create one. */ LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { if (gp->flags & G_GEOM_WITHER) continue; if (strcmp(gp->name, "zfs::vdev") != 0) continue; break; } if (gp == NULL) { gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); gp->orphan = vdev_geom_orphan; + gp->attrchanged = vdev_geom_attrchanged; cp = g_new_consumer(gp); if (g_attach(cp, pp) != 0) { g_wither_geom(gp, ENXIO); return (NULL); } if (g_access(cp, 1, 0, 1) != 0) { g_wither_geom(gp, ENXIO); return (NULL); } ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); } else { /* Check if we are already connected to this provider. */ LIST_FOREACH(cp, &gp->consumer, consumer) { if (cp->provider == pp) { ZFS_LOG(1, "Found consumer for %s.", pp->name); break; } } if (cp == NULL) { cp = g_new_consumer(gp); if (g_attach(cp, pp) != 0) { g_destroy_consumer(cp); return (NULL); } if (g_access(cp, 1, 0, 1) != 0) { g_detach(cp); g_destroy_consumer(cp); return (NULL); } ZFS_LOG(1, "Created consumer for %s.", pp->name); } else { if (g_access(cp, 1, 0, 1) != 0) return (NULL); ZFS_LOG(1, "Used existing consumer for %s.", pp->name); } } + + /* + * BUG: cp may already belong to a vdev. This could happen if: + * 1) That vdev is a shared spare, or + * 2) We are trying to reopen a missing vdev and we are scanning by + * guid. In that case, we'll ultimately fail to open this consumer, + * but not until after setting the private field. + * The solution is to: + * 1) Don't set the private field until after the open succeeds, and + * 2) Set it to a linked list of vdevs, not just a single vdev + */ + cp->private = vd; + vd->vdev_tsd = cp; + + /* Fetch initial physical path information for this device. */ + vdev_geom_attrchanged(cp, "GEOM::physpath"); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; return (cp); } static void -vdev_geom_detach(void *arg, int flag __unused) +vdev_geom_close_locked(vdev_t *vd) { struct g_geom *gp; struct g_consumer *cp; g_topology_assert(); - cp = arg; - gp = cp->geom; + cp = vd->vdev_tsd; + if (cp == NULL) + return; + ZFS_LOG(1, "Closing access to %s.", cp->provider->name); + KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__)); + vd->vdev_tsd = NULL; + vd->vdev_delayed_close = B_FALSE; + cp->private = NULL; + + gp = cp->geom; g_access(cp, -1, 0, -1); /* Destroy consumer on last close. */ if (cp->acr == 0 && cp->ace == 0) { - ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); if (cp->acw > 0) g_access(cp, 0, -cp->acw, 0); - g_detach(cp); + if (cp->provider != NULL) { + ZFS_LOG(1, "Destroyed consumer to %s.", + cp->provider->name); + g_detach(cp); + } g_destroy_consumer(cp); } /* Destroy geom if there are no consumers left. */ if (LIST_EMPTY(&gp->consumer)) { ZFS_LOG(1, "Destroyed geom %s.", gp->name); g_wither_geom(gp, ENXIO); } } static void nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) { nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); } static int vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size) { struct bio *bp; u_char *p; off_t off, maxio; int error; ASSERT((offset % cp->provider->sectorsize) == 0); ASSERT((size % cp->provider->sectorsize) == 0); bp = g_alloc_bio(); off = offset; offset += size; p = data; maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); error = 0; for (; off < offset; off += maxio, p += maxio, size -= maxio) { bzero(bp, sizeof(*bp)); bp->bio_cmd = cmd; bp->bio_done = NULL; bp->bio_offset = off; bp->bio_length = MIN(size, maxio); bp->bio_data = p; g_io_request(bp, cp); error = biowait(bp, "vdev_geom_io"); if (error != 0) break; } g_destroy_bio(bp); return (error); } static void vdev_geom_taste_orphan(struct g_consumer *cp) { KASSERT(1 == 0, ("%s called while tasting %s.", __func__, cp->provider->name)); } static int vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) { struct g_provider *pp; vdev_label_t *label; char *p, *buf; size_t buflen; uint64_t psize; off_t offset, size; uint64_t state, txg; int error, l, len; g_topology_assert_not(); pp = cp->provider; ZFS_LOG(1, "Reading config from %s...", pp->name); psize = pp->mediasize; psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); size = sizeof(*label) + pp->sectorsize - ((sizeof(*label) - 1) % pp->sectorsize) - 1; label = kmem_alloc(size, KM_SLEEP); buflen = sizeof(label->vl_vdev_phys.vp_nvlist); *config = NULL; for (l = 0; l < VDEV_LABELS; l++) { offset = vdev_label_offset(psize, l, 0); if ((offset % pp->sectorsize) != 0) continue; if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0) continue; buf = label->vl_vdev_phys.vp_nvlist; if (nvlist_unpack(buf, buflen, config, 0) != 0) continue; if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, &state) != 0 || state > POOL_STATE_L2CACHE) { nvlist_free(*config); *config = NULL; continue; } if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, &txg) != 0 || txg == 0)) { nvlist_free(*config); *config = NULL; continue; } break; } kmem_free(label, size); return (*config == NULL ? ENOENT : 0); } static void resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) { nvlist_t **new_configs; uint64_t i; if (id < *count) return; new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *), KM_SLEEP); for (i = 0; i < *count; i++) new_configs[i] = (*configs)[i]; if (*configs != NULL) kmem_free(*configs, *count * sizeof(void *)); *configs = new_configs; *count = id + 1; } static void process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, const char *name, uint64_t* known_pool_guid) { nvlist_t *vdev_tree; uint64_t pool_guid; uint64_t vdev_guid, known_guid; uint64_t id, txg, known_txg; char *pname; int i; if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || strcmp(pname, name) != 0) goto ignore; if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) goto ignore; if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) goto ignore; if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) goto ignore; if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) goto ignore; VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); if (*known_pool_guid != 0) { if (pool_guid != *known_pool_guid) goto ignore; } else *known_pool_guid = pool_guid; resize_configs(configs, count, id); if ((*configs)[id] != NULL) { VERIFY(nvlist_lookup_uint64((*configs)[id], ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); if (txg <= known_txg) goto ignore; nvlist_free((*configs)[id]); } (*configs)[id] = cfg; return; ignore: nvlist_free(cfg); } static int vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp) { int error; if (pp->flags & G_PF_WITHER) return (EINVAL); g_attach(cp, pp); error = g_access(cp, 1, 0, 0); if (error == 0) { if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) error = EINVAL; else if (pp->mediasize < SPA_MINDEVSIZE) error = EINVAL; if (error != 0) g_access(cp, -1, 0, 0); } if (error != 0) g_detach(cp); return (error); } static void vdev_geom_detach_taster(struct g_consumer *cp) { g_access(cp, -1, 0, 0); g_detach(cp); } int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs, uint64_t *count) { struct g_class *mp; struct g_geom *gp, *zgp; struct g_provider *pp; struct g_consumer *zcp; nvlist_t *vdev_cfg; uint64_t pool_guid; int error; DROP_GIANT(); g_topology_lock(); zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); /* This orphan function should be never called. */ zgp->orphan = vdev_geom_taste_orphan; zcp = g_new_consumer(zgp); *configs = NULL; *count = 0; pool_guid = 0; LIST_FOREACH(mp, &g_classes, class) { if (mp == &zfs_vdev_class) continue; LIST_FOREACH(gp, &mp->geom, geom) { if (gp->flags & G_GEOM_WITHER) continue; LIST_FOREACH(pp, &gp->provider, provider) { if (pp->flags & G_PF_WITHER) continue; if (vdev_geom_attach_taster(zcp, pp) != 0) continue; g_topology_unlock(); error = vdev_geom_read_config(zcp, &vdev_cfg); g_topology_lock(); vdev_geom_detach_taster(zcp); if (error) continue; ZFS_LOG(1, "successfully read vdev config"); process_vdev_config(configs, count, vdev_cfg, name, &pool_guid); } } } g_destroy_consumer(zcp); g_destroy_geom(zgp); g_topology_unlock(); PICKUP_GIANT(); return (*count > 0 ? 0 : ENOENT); } static void vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) { nvlist_t *config; g_topology_assert_not(); *pguid = 0; *vguid = 0; if (vdev_geom_read_config(cp, &config) == 0) { nvlist_get_guids(config, pguid, vguid); nvlist_free(config); } } static struct g_consumer * -vdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid) +vdev_geom_attach_by_guids(vdev_t *vd) { struct g_class *mp; struct g_geom *gp, *zgp; struct g_provider *pp; struct g_consumer *cp, *zcp; uint64_t pguid, vguid; g_topology_assert(); zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); /* This orphan function should be never called. */ zgp->orphan = vdev_geom_taste_orphan; zcp = g_new_consumer(zgp); cp = NULL; LIST_FOREACH(mp, &g_classes, class) { if (mp == &zfs_vdev_class) continue; LIST_FOREACH(gp, &mp->geom, geom) { if (gp->flags & G_GEOM_WITHER) continue; LIST_FOREACH(pp, &gp->provider, provider) { if (vdev_geom_attach_taster(zcp, pp) != 0) continue; g_topology_unlock(); vdev_geom_read_guids(zcp, &pguid, &vguid); g_topology_lock(); vdev_geom_detach_taster(zcp); - if (pguid != pool_guid || vguid != vdev_guid) + if (pguid != spa_guid(vd->vdev_spa) || + vguid != vd->vdev_guid) continue; - cp = vdev_geom_attach(pp); + cp = vdev_geom_attach(pp, vd); if (cp == NULL) { printf("ZFS WARNING: Unable to " "attach to %s.\n", pp->name); continue; } break; } if (cp != NULL) break; } if (cp != NULL) break; } end: g_destroy_consumer(zcp); g_destroy_geom(zgp); return (cp); } static struct g_consumer * vdev_geom_open_by_guids(vdev_t *vd) { struct g_consumer *cp; char *buf; size_t len; g_topology_assert(); ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); - cp = vdev_geom_attach_by_guids(spa_guid(vd->vdev_spa), vd->vdev_guid); + cp = vdev_geom_attach_by_guids(vd); if (cp != NULL) { len = strlen(cp->provider->name) + strlen("/dev/") + 1; buf = kmem_alloc(len, KM_SLEEP); snprintf(buf, len, "/dev/%s", cp->provider->name); spa_strfree(vd->vdev_path); vd->vdev_path = buf; ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid, vd->vdev_path); } else { ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); } return (cp); } static struct g_consumer * vdev_geom_open_by_path(vdev_t *vd, int check_guid) { struct g_provider *pp; struct g_consumer *cp; uint64_t pguid, vguid; g_topology_assert(); cp = NULL; pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); if (pp != NULL) { ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); - cp = vdev_geom_attach(pp); + cp = vdev_geom_attach(pp, vd); if (cp != NULL && check_guid && ISP2(pp->sectorsize) && pp->sectorsize <= VDEV_PAD_SIZE) { g_topology_unlock(); vdev_geom_read_guids(cp, &pguid, &vguid); g_topology_lock(); if (pguid != spa_guid(vd->vdev_spa) || vguid != vd->vdev_guid) { - vdev_geom_detach(cp, 0); + vdev_geom_close_locked(vd); cp = NULL; ZFS_LOG(1, "guid mismatch for provider %s: " "%ju:%ju != %ju:%ju.", vd->vdev_path, (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid, (uintmax_t)pguid, (uintmax_t)vguid); } else { ZFS_LOG(1, "guid match for provider %s.", vd->vdev_path); } } } return (cp); } static int vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *logical_ashift, uint64_t *physical_ashift) { struct g_provider *pp; struct g_consumer *cp; size_t bufsize; int error; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } vd->vdev_tsd = NULL; DROP_GIANT(); g_topology_lock(); error = 0; if (vd->vdev_spa->spa_splitting_newspa || (vd->vdev_prevstate == VDEV_STATE_UNKNOWN && vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) { /* * We are dealing with a vdev that hasn't been previously * opened (since boot), and we are not loading an * existing pool configuration. This looks like a * vdev add operation to a new or existing pool. * Assume the user knows what he/she is doing and find * GEOM provider by its name, ignoring GUID mismatches. * * XXPOLICY: It would be safer to only allow a device * that is unlabeled or labeled but missing * GUID information to be opened in this fashion, * unless we are doing a split, in which case we * should allow any guid. */ cp = vdev_geom_open_by_path(vd, 0); } else { /* * Try using the recorded path for this device, but only * accept it if its label data contains the expected GUIDs. */ cp = vdev_geom_open_by_path(vd, 1); if (cp == NULL) { /* * The device at vd->vdev_path doesn't have the * expected GUIDs. The disks might have merely * moved around so try all other GEOM providers * to find one with the right GUIDs. */ cp = vdev_geom_open_by_guids(vd); } } if (cp == NULL) { ZFS_LOG(1, "Provider %s not found.", vd->vdev_path); error = ENOENT; } else if (cp->provider->sectorsize > VDEV_PAD_SIZE || !ISP2(cp->provider->sectorsize)) { ZFS_LOG(1, "Provider %s has unsupported sectorsize.", vd->vdev_path); - vdev_geom_detach(cp, 0); + + vdev_geom_close_locked(vd); error = EINVAL; cp = NULL; } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { int i; for (i = 0; i < 5; i++) { error = g_access(cp, 0, 1, 0); if (error == 0) break; g_topology_unlock(); tsleep(vd, 0, "vdev", hz / 2); g_topology_lock(); } if (error != 0) { printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", vd->vdev_path, error); - vdev_geom_detach(cp, 0); + vdev_geom_close_locked(vd); cp = NULL; } } + g_topology_unlock(); PICKUP_GIANT(); if (cp == NULL) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } - - cp->private = vd; - vd->vdev_tsd = cp; pp = cp->provider; /* * Determine the actual size of the device. */ *max_psize = *psize = pp->mediasize; /* * Determine the device's minimum transfer size and preferred * transfer size. */ *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; *physical_ashift = 0; if (pp->stripesize) *physical_ashift = highbit(pp->stripesize) - 1; /* * Clear the nowritecache settings, so that on a vdev_reopen() * we will try again. */ vd->vdev_nowritecache = B_FALSE; - if (vd->vdev_physpath != NULL) - spa_strfree(vd->vdev_physpath); - bufsize = sizeof("/dev/") + strlen(pp->name); - vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); - snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); - /* * Determine the device's rotation rate. */ vdev_geom_set_rotation_rate(vd, cp); return (0); } static void vdev_geom_close(vdev_t *vd) { - struct g_consumer *cp; - cp = vd->vdev_tsd; - if (cp == NULL) - return; - vd->vdev_tsd = NULL; - vd->vdev_delayed_close = B_FALSE; - cp->private = NULL; /* XXX locking */ - g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); + DROP_GIANT(); + g_topology_lock(); + vdev_geom_close_locked(vd); + g_topology_unlock(); + PICKUP_GIANT(); } static void vdev_geom_io_intr(struct bio *bp) { vdev_t *vd; zio_t *zio; zio = bp->bio_caller1; vd = zio->io_vd; zio->io_error = bp->bio_error; if (zio->io_error == 0 && bp->bio_resid != 0) zio->io_error = SET_ERROR(EIO); switch(zio->io_error) { case ENOTSUP: /* * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know * that future attempts will never succeed. In this case * we set a persistent flag so that we don't bother with * requests in the future. */ switch(bp->bio_cmd) { case BIO_FLUSH: vd->vdev_nowritecache = B_TRUE; break; case BIO_DELETE: vd->vdev_notrim = B_TRUE; break; } break; case ENXIO: if (!vd->vdev_remove_wanted) { /* * If provider's error is set we assume it is being * removed. */ if (bp->bio_to->error != 0) { vd->vdev_remove_wanted = B_TRUE; spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); } else if (!vd->vdev_delayed_close) { vd->vdev_delayed_close = B_TRUE; } } break; } g_destroy_bio(bp); zio_interrupt(zio); } static void vdev_geom_io_start(zio_t *zio) { vdev_t *vd; struct g_consumer *cp; struct bio *bp; int error; vd = zio->io_vd; switch (zio->io_type) { case ZIO_TYPE_IOCTL: /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); zio_interrupt(zio); return; } else { switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush || vdev_geom_bio_flush_disable) break; if (vd->vdev_nowritecache) { zio->io_error = SET_ERROR(ENOTSUP); break; } goto sendreq; default: zio->io_error = SET_ERROR(ENOTSUP); } } zio_execute(zio); return; case ZIO_TYPE_FREE: if (vd->vdev_notrim) { zio->io_error = SET_ERROR(ENOTSUP); } else if (!vdev_geom_bio_delete_disable) { goto sendreq; } zio_execute(zio); return; } sendreq: ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_FREE || zio->io_type == ZIO_TYPE_IOCTL); cp = vd->vdev_tsd; if (cp == NULL) { zio->io_error = SET_ERROR(ENXIO); zio_interrupt(zio); return; } bp = g_alloc_bio(); bp->bio_caller1 = zio; switch (zio->io_type) { case ZIO_TYPE_READ: case ZIO_TYPE_WRITE: bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; bp->bio_data = zio->io_data; bp->bio_offset = zio->io_offset; bp->bio_length = zio->io_size; break; case ZIO_TYPE_FREE: bp->bio_cmd = BIO_DELETE; bp->bio_data = NULL; bp->bio_offset = zio->io_offset; bp->bio_length = zio->io_size; break; case ZIO_TYPE_IOCTL: bp->bio_cmd = BIO_FLUSH; bp->bio_flags |= BIO_ORDERED; bp->bio_data = NULL; bp->bio_offset = cp->provider->mediasize; bp->bio_length = 0; break; } bp->bio_done = vdev_geom_io_intr; g_io_request(bp, cp); } static void vdev_geom_io_done(zio_t *zio) { } static void vdev_geom_hold(vdev_t *vd) { } static void vdev_geom_rele(vdev_t *vd) { } vdev_ops_t vdev_geom_ops = { vdev_geom_open, vdev_geom_close, vdev_default_asize, vdev_geom_io_start, vdev_geom_io_done, NULL, vdev_geom_hold, vdev_geom_rele, VDEV_TYPE_DISK, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; Index: projects/clang380-import/sys/cddl/contrib/opensolaris =================================================================== --- projects/clang380-import/sys/cddl/contrib/opensolaris (revision 293686) +++ projects/clang380-import/sys/cddl/contrib/opensolaris (revision 293687) Property changes on: projects/clang380-import/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r292913-293685 Index: projects/clang380-import/sys/compat/ia32/ia32_sysvec.c =================================================================== --- projects/clang380-import/sys/compat/ia32/ia32_sysvec.c (revision 293686) +++ projects/clang380-import/sys/compat/ia32/ia32_sysvec.c (revision 293687) @@ -1,239 +1,240 @@ /*- * Copyright (c) 2002 Doug Rabson * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct ia32_mcontext) == 640); CTASSERT(sizeof(struct ia32_ucontext) == 704); CTASSERT(sizeof(struct ia32_sigframe) == 800); CTASSERT(sizeof(struct siginfo32) == 64); #ifdef COMPAT_FREEBSD4 CTASSERT(sizeof(struct ia32_mcontext4) == 260); CTASSERT(sizeof(struct ia32_ucontext4) == 324); CTASSERT(sizeof(struct ia32_sigframe4) == 408); #endif extern const char *freebsd32_syscallnames[]; static SYSCTL_NODE(_compat, OID_AUTO, ia32, CTLFLAG_RW, 0, "ia32 mode"); static u_long ia32_maxdsiz = IA32_MAXDSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ia32_maxdsiz, 0, ""); u_long ia32_maxssiz = IA32_MAXSSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ia32_maxssiz, 0, ""); static u_long ia32_maxvmem = IA32_MAXVMEM; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxvmem, CTLFLAG_RWTUN, &ia32_maxvmem, 0, ""); struct sysentvec ia32_freebsd_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = ia32_sigcode, .sv_szsigcode = &sz_ia32_sigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = IA32_PAGE_SIZE, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | #ifdef __amd64__ SV_SHP | SV_TIMEKEEP #else 0 #endif , .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec); static Elf32_Brandinfo ia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_info); static Elf32_Brandinfo ia32_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_oinfo); static Elf32_Brandinfo kia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld.so.1", .sysvec = &ia32_freebsd_sysvec, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kia32_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } void ia32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (ia32_maxdsiz != 0) { if (rl->rlim_cur > ia32_maxdsiz) rl->rlim_cur = ia32_maxdsiz; if (rl->rlim_max > ia32_maxdsiz) rl->rlim_max = ia32_maxdsiz; } break; case RLIMIT_STACK: if (ia32_maxssiz != 0) { if (rl->rlim_cur > ia32_maxssiz) rl->rlim_cur = ia32_maxssiz; if (rl->rlim_max > ia32_maxssiz) rl->rlim_max = ia32_maxssiz; } break; case RLIMIT_VMEM: if (ia32_maxvmem != 0) { if (rl->rlim_cur > ia32_maxvmem) rl->rlim_cur = ia32_maxvmem; if (rl->rlim_max > ia32_maxvmem) rl->rlim_max = ia32_maxvmem; } break; } } Index: projects/clang380-import/sys/compat/linux/linux_futex.c =================================================================== --- projects/clang380-import/sys/compat/linux/linux_futex.c (revision 293686) +++ projects/clang380-import/sys/compat/linux/linux_futex.c (revision 293687) @@ -1,1276 +1,1280 @@ /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ /*- * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Emmanuel Dreyfus * 4. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #if 0 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); #endif #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /** * Futex part for the special DTrace module "locks". */ LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *"); LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *"); /** * Per futex probes. */ LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *"); LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *"); /** * DTrace probes in this module. */ LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *", "struct waiting_proc *"); LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t", "int"); LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t", "int"); LIN_SDT_PROBE_DEFINE0(futex, futex_put, return); LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **", "uint32_t"); LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int"); LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t", "int"); LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *"); LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int"); LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *", "struct waiting_proc **", "struct futex **"); LIN_SDT_PROBE_DEFINE0(futex, futex_get, error); LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *", "struct waiting_proc **", "int"); LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *", "struct waiting_proc *", "uint32_t *", "uint32_t"); LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *", "struct waiting_proc *"); LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int", "uint32_t"); LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t", "struct waiting_proc *", "uint32_t"); LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *"); LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int"); LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int", "struct futex *", "int"); LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *"); LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *", "struct waiting_proc *", "uint32_t"); LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int"); LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *", "struct waiting_proc **", "int", "uint32_t"); LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *", "int", "uint32_t"); LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int", "int"); LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check); LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int"); LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int"); LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int"); LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *", "struct linux_sys_futex_args *"); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch); LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, itimerfix_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int"); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use); LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *", "uint32_t", "uint32_t"); LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq, "uint32_t *", "uint32_t", "int", "uint32_t"); LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *", "uint32_t", "uint32_t"); LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *", "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *"); LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, "uint32_t", "int"); LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *", "int", "uint32_t", "uint32_t *", "uint32_t"); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int"); LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int"); LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *", "struct linux_set_robust_list_args *"); LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error); LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int"); LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", "struct linux_get_robust_list_args *"); LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, "struct linux_emuldata *", "uint32_t *", "unsigned int"); LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, "struct linux_robust_list **", "struct linux_robust_list **", "unsigned int *"); LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", "struct linux_emuldata *"); LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); struct futex; struct waiting_proc { uint32_t wp_flags; struct futex *wp_futex; TAILQ_ENTRY(waiting_proc) wp_list; }; struct futex { struct sx f_lck; uint32_t *f_uaddr; /* user-supplied value, for debug */ struct umtx_key f_key; uint32_t f_refcount; uint32_t f_bitset; LIST_ENTRY(futex) f_list; TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; }; struct futex_list futex_list; #define FUTEX_LOCK(f) sx_xlock(&(f)->f_lck) #define FUTEX_UNLOCK(f) sx_xunlock(&(f)->f_lck) #define FUTEX_INIT(f) do { \ sx_init_flags(&(f)->f_lck, "ftlk", \ SX_DUPOK); \ LIN_SDT_PROBE1(futex, futex, create, \ &(f)->f_lck); \ } while (0) #define FUTEX_DESTROY(f) do { \ LIN_SDT_PROBE1(futex, futex, destroy, \ &(f)->f_lck); \ sx_destroy(&(f)->f_lck); \ } while (0) #define FUTEX_ASSERT_LOCKED(f) sx_assert(&(f)->f_lck, SA_XLOCKED) struct mtx futex_mtx; /* protects the futex list */ #define FUTEXES_LOCK do { \ mtx_lock(&futex_mtx); \ LIN_SDT_PROBE1(locks, futex_mtx, \ locked, &futex_mtx); \ } while (0) #define FUTEXES_UNLOCK do { \ LIN_SDT_PROBE1(locks, futex_mtx, \ unlock, &futex_mtx); \ mtx_unlock(&futex_mtx); \ } while (0) /* flags for futex_get() */ #define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ #define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ #define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ #define FUTEX_SHARED 0x8 /* shared futex */ /* wp_flags */ #define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list * of futex where thread sleep to wp_list * of another futex. */ #define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex * wp_list to prevent double wakeup. */ static void futex_put(struct futex *, struct waiting_proc *); static int futex_get0(uint32_t *, struct futex **f, uint32_t); static int futex_get(uint32_t *, struct waiting_proc **, struct futex **, uint32_t); static int futex_sleep(struct futex *, struct waiting_proc *, int); static int futex_wake(struct futex *, int, uint32_t); static int futex_requeue(struct futex *, int, struct futex *, int); static int futex_wait(struct futex *, struct waiting_proc *, int, uint32_t); static int futex_atomic_op(struct thread *, int, uint32_t *); static int handle_futex_death(struct linux_emuldata *, uint32_t *, unsigned int); static int fetch_robust_entry(struct linux_robust_list **, struct linux_robust_list **, unsigned int *); /* support.s */ int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); int futex_addl(int oparg, uint32_t *uaddr, int *oldval); int futex_orl(int oparg, uint32_t *uaddr, int *oldval); int futex_andl(int oparg, uint32_t *uaddr, int *oldval); int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); static void futex_put(struct futex *f, struct waiting_proc *wp) { LIN_SDT_PROBE2(futex, futex_put, entry, f, wp); FUTEX_ASSERT_LOCKED(f); if (wp != NULL) { if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); free(wp, M_FUTEX_WP); } FUTEXES_LOCK; if (--f->f_refcount == 0) { LIST_REMOVE(f, f_list); FUTEXES_UNLOCK; FUTEX_UNLOCK(f); LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr, f->f_refcount, f->f_key.shared); LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d " "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); umtx_key_release(&f->f_key); FUTEX_DESTROY(f); free(f, M_FUTEX); LIN_SDT_PROBE0(futex, futex_put, return); return; } LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount, f->f_key.shared); LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); FUTEXES_UNLOCK; FUTEX_UNLOCK(f); LIN_SDT_PROBE0(futex, futex_put, return); } static int futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) { struct futex *f, *tmpf; struct umtx_key key; int error; LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags); *newf = tmpf = NULL; error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ? AUTO_SHARE : THREAD_SHARE, &key); if (error) { LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error); LIN_SDT_PROBE1(futex, futex_get0, return, error); return (error); } retry: FUTEXES_LOCK; LIST_FOREACH(f, &futex_list, f_list) { if (umtx_key_match(&f->f_key, &key)) { if (tmpf != NULL) { FUTEX_UNLOCK(tmpf); FUTEX_DESTROY(tmpf); free(tmpf, M_FUTEX); } if (flags & FUTEX_DONTEXISTS) { FUTEXES_UNLOCK; umtx_key_release(&key); LIN_SDT_PROBE1(futex, futex_get0, return, EINVAL); return (EINVAL); } /* * Increment refcount of the found futex to * prevent it from deallocation before FUTEX_LOCK() */ ++f->f_refcount; FUTEXES_UNLOCK; umtx_key_release(&key); FUTEX_LOCK(f); *newf = f; LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr, f->f_refcount, f->f_key.shared); LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d", uaddr, f->f_refcount, f->f_key.shared); LIN_SDT_PROBE1(futex, futex_get0, return, 0); return (0); } } if (flags & FUTEX_DONTCREATE) { FUTEXES_UNLOCK; umtx_key_release(&key); LIN_SDT_PROBE1(futex, futex_get0, null, uaddr); LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); LIN_SDT_PROBE1(futex, futex_get0, return, 0); return (0); } if (tmpf == NULL) { FUTEXES_UNLOCK; tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); tmpf->f_uaddr = uaddr; tmpf->f_key = key; tmpf->f_refcount = 1; tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY; FUTEX_INIT(tmpf); TAILQ_INIT(&tmpf->f_waiting_proc); /* * Lock the new futex before an insert into the futex_list * to prevent futex usage by other. */ FUTEX_LOCK(tmpf); goto retry; } LIST_INSERT_HEAD(&futex_list, tmpf, f_list); FUTEXES_UNLOCK; LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount, tmpf->f_key.shared); LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new", uaddr, tmpf->f_refcount, tmpf->f_key.shared); *newf = tmpf; LIN_SDT_PROBE1(futex, futex_get0, return, 0); return (0); } static int futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, uint32_t flags) { int error; LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f); if (flags & FUTEX_CREATE_WP) { *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); (*wp)->wp_flags = 0; } error = futex_get0(uaddr, f, flags); if (error) { LIN_SDT_PROBE0(futex, futex_get, error); if (flags & FUTEX_CREATE_WP) free(*wp, M_FUTEX_WP); LIN_SDT_PROBE1(futex, futex_get, return, error); return (error); } if (flags & FUTEX_CREATE_WP) { TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); (*wp)->wp_futex = *f; } LIN_SDT_PROBE1(futex, futex_get, return, error); return (error); } static int futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout) { int error; FUTEX_ASSERT_LOCKED(f); LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout); LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d", f->f_uaddr, wp, timeout, f->f_refcount); error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout); if (wp->wp_flags & FUTEX_WP_REQUEUED) { KASSERT(f != wp->wp_futex, ("futex != wp_futex")); if (error) { LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, wp->wp_futex->f_refcount); } LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp" " %p requeued uaddr %p ref %d", error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, wp->wp_futex->f_refcount); futex_put(f, NULL); f = wp->wp_futex; FUTEX_LOCK(f); } else { if (error) { LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error, f->f_uaddr, wp); } LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p", error, f->f_uaddr, wp); } futex_put(f, wp); LIN_SDT_PROBE1(futex, futex_sleep, return, error); return (error); } static int futex_wake(struct futex *f, int n, uint32_t bitset) { struct waiting_proc *wp, *wpt; int count = 0; LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset); if (bitset == 0) { LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL); return (EINVAL); } FUTEX_ASSERT_LOCKED(f); TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp, f->f_refcount); LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", f->f_uaddr, wp, f->f_refcount); /* * Unless we find a matching bit in * the bitset, continue searching. */ if (!(wp->wp_futex->f_bitset & bitset)) continue; wp->wp_flags |= FUTEX_WP_REMOVED; TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp); wakeup_one(wp); if (++count == n) break; } LIN_SDT_PROBE1(futex, futex_wake, return, count); return (count); } static int futex_requeue(struct futex *f, int n, struct futex *f2, int n2) { struct waiting_proc *wp, *wpt; int count = 0; LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2); FUTEX_ASSERT_LOCKED(f); FUTEX_ASSERT_LOCKED(f2); TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { if (++count <= n) { LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p", f->f_uaddr, wp); wp->wp_flags |= FUTEX_WP_REMOVED; TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp); wakeup_one(wp); } else { LIN_SDT_PROBE3(futex, futex_requeue, requeue, f->f_uaddr, wp, f2->f_uaddr); LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", f->f_uaddr, wp, f2->f_uaddr); wp->wp_flags |= FUTEX_WP_REQUEUED; /* Move wp to wp_list of f2 futex */ TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); /* * Thread which sleeps on wp after waking should * acquire f2 lock, so increment refcount of f2 to * prevent it from premature deallocation. */ wp->wp_futex = f2; FUTEXES_LOCK; ++f2->f_refcount; FUTEXES_UNLOCK; if (count - n >= n2) break; } } LIN_SDT_PROBE1(futex, futex_requeue, return, count); return (count); } static int futex_wait(struct futex *f, struct waiting_proc *wp, int timeout_hz, uint32_t bitset) { int error; LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, timeout_hz, bitset); if (bitset == 0) { LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL); return (EINVAL); } f->f_bitset = bitset; error = futex_sleep(f, wp, timeout_hz); if (error) LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error); if (error == EWOULDBLOCK) error = ETIMEDOUT; LIN_SDT_PROBE1(futex, futex_wait, return, error); return (error); } static int futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; int oldval = 0, ret; LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr); if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg, cmparg); /* XXX: Linux verifies access here and returns EFAULT */ LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check); switch (op) { case FUTEX_OP_SET: ret = futex_xchgl(oparg, uaddr, &oldval); break; case FUTEX_OP_ADD: ret = futex_addl(oparg, uaddr, &oldval); break; case FUTEX_OP_OR: ret = futex_orl(oparg, uaddr, &oldval); break; case FUTEX_OP_ANDN: ret = futex_andl(~oparg, uaddr, &oldval); break; case FUTEX_OP_XOR: ret = futex_xorl(oparg, uaddr, &oldval); break; default: LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op); ret = -ENOSYS; break; } if (ret) { LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); return (ret); } switch (cmp) { case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; default: LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp); ret = -ENOSYS; } LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); return (ret); } int linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) { int clockrt, nrwake, op_ret, ret; struct linux_pemuldata *pem; struct waiting_proc *wp; struct futex *f, *f2; struct l_timespec ltimeout; struct timespec timeout; struct timeval utv, ctv; int timeout_hz; int error; uint32_t flags, val; LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args); if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { flags = 0; args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; } else flags = FUTEX_SHARED; /* * Currently support for switching between CLOCK_MONOTONIC and * CLOCK_REALTIME is not present. However Linux forbids the use of * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and * FUTEX_WAIT_REQUEUE_PI. */ clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET && args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) { LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_clockswitch); LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); } error = 0; f = f2 = NULL; switch (args->op) { case LINUX_FUTEX_WAIT: args->val3 = FUTEX_BITSET_MATCH_ANY; /* FALLTHROUGH */ case LINUX_FUTEX_WAIT_BITSET: LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr, args->val, args->val3); LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", args->uaddr, args->val, args->val3); if (args->timeout != NULL) { error = copyin(args->timeout, <imeout, sizeof(ltimeout)); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, error); LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } error = linux_to_native_timespec(&timeout, <imeout); if (error) return (error); TIMESPEC_TO_TIMEVAL(&utv, &timeout); error = itimerfix(&utv); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error, error); LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } if (clockrt) { microtime(&ctv); timevalsub(&utv, &ctv); } else if (args->op == LINUX_FUTEX_WAIT_BITSET) { microuptime(&ctv); timevalsub(&utv, &ctv); } if (utv.tv_sec < 0) timevalclear(&utv); timeout_hz = tvtohz(&utv); } else timeout_hz = 0; error = futex_get(args->uaddr, &wp, &f, flags | FUTEX_CREATE_WP); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } error = copyin(args->uaddr, &val, sizeof(val)); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, error); LINUX_CTR1(sys_futex, "WAIT copyin failed %d", error); futex_put(f, wp); LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } if (val != args->val) { LIN_SDT_PROBE4(futex, linux_sys_futex, debug_wait_value_neq, args->uaddr, args->val, val, args->val3); LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x != uval 0x%x", args->uaddr, args->val, val); futex_put(f, wp); LIN_SDT_PROBE1(futex, linux_sys_futex, return, EWOULDBLOCK); return (EWOULDBLOCK); } error = futex_wait(f, wp, timeout_hz, args->val3); break; case LINUX_FUTEX_WAKE: args->val3 = FUTEX_BITSET_MATCH_ANY; /* FALLTHROUGH */ case LINUX_FUTEX_WAKE_BITSET: LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr, args->val, args->val3); LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x", args->uaddr, args->val, args->val3); error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTCREATE); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } if (f == NULL) { td->td_retval[0] = 0; LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } td->td_retval[0] = futex_wake(f, args->val, args->val3); futex_put(f, NULL); break; case LINUX_FUTEX_CMP_REQUEUE: LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue, args->uaddr, args->val, args->val3, args->uaddr2, args->timeout); LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x", args->uaddr, args->val, args->val3, args->uaddr2, args->timeout); /* * Linux allows this, we would not, it is an incorrect * usage of declared ABI, so return EINVAL. */ if (args->uaddr == args->uaddr2) { LIN_SDT_PROBE0(futex, linux_sys_futex, invalid_cmp_requeue_use); LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); return (EINVAL); } error = futex_get(args->uaddr, NULL, &f, flags); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } /* * To avoid deadlocks return EINVAL if second futex * exists at this time. * * Glibc fall back to FUTEX_WAKE in case of any error * returned by FUTEX_CMP_REQUEUE. */ error = futex_get(args->uaddr2, NULL, &f2, flags | FUTEX_DONTEXISTS); if (error) { futex_put(f, NULL); LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } error = copyin(args->uaddr, &val, sizeof(val)); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, error); LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", error); futex_put(f2, NULL); futex_put(f, NULL); LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } if (val != args->val3) { LIN_SDT_PROBE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, args->val, val); LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x", args->val, val); futex_put(f2, NULL); futex_put(f, NULL); LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN); return (EAGAIN); } nrwake = (int)(unsigned long)args->timeout; td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake); futex_put(f2, NULL); futex_put(f, NULL); break; case LINUX_FUTEX_WAKE_OP: LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op, args->uaddr, args->op, args->val, args->uaddr2, args->val3); LINUX_CTR5(sys_futex, "WAKE_OP " "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x", args->uaddr, args->val, args->uaddr2, args->val3, args->timeout); error = futex_get(args->uaddr, NULL, &f, flags); if (error) { LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } if (args->uaddr != args->uaddr2) error = futex_get(args->uaddr2, NULL, &f2, flags); if (error) { futex_put(f, NULL); LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } /* * This function returns positive number as results and * negative as errors */ op_ret = futex_atomic_op(td, args->val3, args->uaddr2); LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x", args->uaddr, op_ret); if (op_ret < 0) { /* XXX: We don't handle the EFAULT yet. */ if (op_ret != -EFAULT) { if (f2 != NULL) futex_put(f2, NULL); futex_put(f, NULL); LIN_SDT_PROBE1(futex, linux_sys_futex, return, -op_ret); return (-op_ret); } else { LIN_SDT_PROBE0(futex, linux_sys_futex, unhandled_efault); } if (f2 != NULL) futex_put(f2, NULL); futex_put(f, NULL); LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT); return (EFAULT); } ret = futex_wake(f, args->val, args->val3); if (op_ret > 0) { op_ret = 0; nrwake = (int)(unsigned long)args->timeout; if (f2 != NULL) op_ret += futex_wake(f2, nrwake, args->val3); else op_ret += futex_wake(f, nrwake, args->val3); ret += op_ret; } if (f2 != NULL) futex_put(f2, NULL); futex_put(f, NULL); td->td_retval[0] = ret; break; case LINUX_FUTEX_LOCK_PI: /* not yet implemented */ pem = pem_find(td->td_proc); if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { linux_msg(td, "linux_sys_futex: " "unsupported futex_pi op\n"); pem->flags |= LINUX_XUNSUP_FUTEXPIOP; LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_lock_pi); } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_UNLOCK_PI: /* not yet implemented */ pem = pem_find(td->td_proc); if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { linux_msg(td, "linux_sys_futex: " "unsupported futex_pi op\n"); pem->flags |= LINUX_XUNSUP_FUTEXPIOP; LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_unlock_pi); } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_TRYLOCK_PI: /* not yet implemented */ pem = pem_find(td->td_proc); if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { linux_msg(td, "linux_sys_futex: " "unsupported futex_pi op\n"); pem->flags |= LINUX_XUNSUP_FUTEXPIOP; LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_trylock_pi); } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_REQUEUE: /* * Glibc does not use this operation since version 2.3.3, * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when * FUTEX_REQUEUE returned EINVAL. */ pem = pem_find(td->td_proc); if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { linux_msg(td, "linux_sys_futex: " "unsupported futex_requeue op\n"); pem->flags |= LINUX_XDEPR_REQUEUEOP; LIN_SDT_PROBE0(futex, linux_sys_futex, deprecated_requeue); } LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); return (EINVAL); case LINUX_FUTEX_WAIT_REQUEUE_PI: /* not yet implemented */ pem = pem_find(td->td_proc); if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { linux_msg(td, "linux_sys_futex: " "unsupported futex_pi op\n"); pem->flags |= LINUX_XUNSUP_FUTEXPIOP; LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_CMP_REQUEUE_PI: /* not yet implemented */ pem = pem_find(td->td_proc); if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { linux_msg(td, "linux_sys_futex: " "unsupported futex_pi op\n"); pem->flags |= LINUX_XUNSUP_FUTEXPIOP; LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); default: linux_msg(td, "linux_sys_futex: unknown op %d\n", args->op); LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation, args->op); LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); } LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); return (error); } int linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) { struct linux_emuldata *em; LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args); if (args->len != sizeof(struct linux_robust_list_head)) { LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error); LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL); return (EINVAL); } em = em_find(td); em->robust_futexes = args->head; LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); return (0); } int linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) { struct linux_emuldata *em; struct linux_robust_list_head *head; l_size_t len = sizeof(struct linux_robust_list_head); struct thread *td2; int error = 0; LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); if (!args->pid) { em = em_find(td); KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); head = em->robust_futexes; } else { td2 = tdfind(args->pid, -1); if (td2 == NULL) { LIN_SDT_PROBE1(futex, linux_get_robust_list, return, ESRCH); return (ESRCH); } - if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) + if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { + LIN_SDT_PROBE1(futex, linux_get_robust_list, return, + EPERM); + PROC_UNLOCK(td2->td_proc); return (EPERM); + } em = em_find(td2); KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); /* XXX: ptrace? */ if (priv_check(td, PRIV_CRED_SETUID) || priv_check(td, PRIV_CRED_SETEUID) || p_candebug(td, td2->td_proc)) { PROC_UNLOCK(td2->td_proc); LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EPERM); return (EPERM); } head = em->robust_futexes; PROC_UNLOCK(td2->td_proc); } error = copyout(&len, args->len, sizeof(l_size_t)); if (error) { LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, error); LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT); return (EFAULT); } error = copyout(head, args->head, sizeof(struct linux_robust_list_head)); if (error) { LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, error); } LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error); return (error); } static int handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, unsigned int pi) { uint32_t uval, nval, mval; struct futex *f; int error; LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); retry: error = copyin(uaddr, &uval, 4); if (error) { LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error); LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); return (EFAULT); } if ((uval & FUTEX_TID_MASK) == em->em_tid) { mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; nval = casuword32(uaddr, uval, mval); if (nval == -1) { LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); return (EFAULT); } if (nval != uval) goto retry; if (!pi && (uval & FUTEX_WAITERS)) { error = futex_get(uaddr, NULL, &f, FUTEX_DONTCREATE | FUTEX_SHARED); if (error) { LIN_SDT_PROBE1(futex, handle_futex_death, return, error); return (error); } if (f != NULL) { futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY); futex_put(f, NULL); } } } LIN_SDT_PROBE1(futex, handle_futex_death, return, 0); return (0); } static int fetch_robust_entry(struct linux_robust_list **entry, struct linux_robust_list **head, unsigned int *pi) { l_ulong uentry; int error; LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi); error = copyin((const void *)head, &uentry, sizeof(l_ulong)); if (error) { LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error); LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT); return (EFAULT); } *entry = (void *)(uentry & ~1UL); *pi = uentry & 1; LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0); return (0); } /* This walks the list of robust futexes releasing them. */ void release_futexes(struct thread *td, struct linux_emuldata *em) { struct linux_robust_list_head *head = NULL; struct linux_robust_list *entry, *next_entry, *pending; unsigned int limit = 2048, pi, next_pi, pip; l_long futex_offset; int rc, error; LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); head = em->robust_futexes; if (head == NULL) { LIN_SDT_PROBE0(futex, release_futexes, return); return; } if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) { LIN_SDT_PROBE0(futex, release_futexes, return); return; } error = copyin(&head->futex_offset, &futex_offset, sizeof(futex_offset)); if (error) { LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error); LIN_SDT_PROBE0(futex, release_futexes, return); return; } if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) { LIN_SDT_PROBE0(futex, release_futexes, return); return; } while (entry != &head->list) { rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); if (entry != pending) if (handle_futex_death(em, (uint32_t *)((caddr_t)entry + futex_offset), pi)) { LIN_SDT_PROBE0(futex, release_futexes, return); return; } if (rc) { LIN_SDT_PROBE0(futex, release_futexes, return); return; } entry = next_entry; pi = next_pi; if (!--limit) break; sched_relinquish(curthread); } if (pending) handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); LIN_SDT_PROBE0(futex, release_futexes, return); } Index: projects/clang380-import/sys/compat/svr4/svr4_sysvec.c =================================================================== --- projects/clang380-import/sys/compat/svr4/svr4_sysvec.c (revision 293686) +++ projects/clang380-import/sys/compat/svr4/svr4_sysvec.c (revision 293687) @@ -1,312 +1,313 @@ /*- * Copyright (c) 1998 Mark Newton * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christos Zoulas. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* XXX we use functions that might not exist. */ #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int bsd_to_svr4_errno[ELAST+1] = { 0, SVR4_EPERM, SVR4_ENOENT, SVR4_ESRCH, SVR4_EINTR, SVR4_EIO, SVR4_ENXIO, SVR4_E2BIG, SVR4_ENOEXEC, SVR4_EBADF, SVR4_ECHILD, SVR4_EDEADLK, SVR4_ENOMEM, SVR4_EACCES, SVR4_EFAULT, SVR4_ENOTBLK, SVR4_EBUSY, SVR4_EEXIST, SVR4_EXDEV, SVR4_ENODEV, SVR4_ENOTDIR, SVR4_EISDIR, SVR4_EINVAL, SVR4_ENFILE, SVR4_EMFILE, SVR4_ENOTTY, SVR4_ETXTBSY, SVR4_EFBIG, SVR4_ENOSPC, SVR4_ESPIPE, SVR4_EROFS, SVR4_EMLINK, SVR4_EPIPE, SVR4_EDOM, SVR4_ERANGE, SVR4_EAGAIN, SVR4_EINPROGRESS, SVR4_EALREADY, SVR4_ENOTSOCK, SVR4_EDESTADDRREQ, SVR4_EMSGSIZE, SVR4_EPROTOTYPE, SVR4_ENOPROTOOPT, SVR4_EPROTONOSUPPORT, SVR4_ESOCKTNOSUPPORT, SVR4_EOPNOTSUPP, SVR4_EPFNOSUPPORT, SVR4_EAFNOSUPPORT, SVR4_EADDRINUSE, SVR4_EADDRNOTAVAIL, SVR4_ENETDOWN, SVR4_ENETUNREACH, SVR4_ENETRESET, SVR4_ECONNABORTED, SVR4_ECONNRESET, SVR4_ENOBUFS, SVR4_EISCONN, SVR4_ENOTCONN, SVR4_ESHUTDOWN, SVR4_ETOOMANYREFS, SVR4_ETIMEDOUT, SVR4_ECONNREFUSED, SVR4_ELOOP, SVR4_ENAMETOOLONG, SVR4_EHOSTDOWN, SVR4_EHOSTUNREACH, SVR4_ENOTEMPTY, SVR4_EPROCLIM, SVR4_EUSERS, SVR4_EDQUOT, SVR4_ESTALE, SVR4_EREMOTE, SVR4_EBADRPC, SVR4_ERPCMISMATCH, SVR4_EPROGUNAVAIL, SVR4_EPROGMISMATCH, SVR4_EPROCUNAVAIL, SVR4_ENOLCK, SVR4_ENOSYS, SVR4_EFTYPE, SVR4_EAUTH, SVR4_ENEEDAUTH, SVR4_EIDRM, SVR4_ENOMSG, }; static int svr4_fixup(register_t **stack_base, struct image_params *imgp); extern struct sysent svr4_sysent[]; #undef szsigcode #undef sigcode extern int svr4_szsigcode; extern char svr4_sigcode[]; struct sysentvec svr4_sysvec = { .sv_size = SVR4_SYS_MAXSYSCALL, .sv_table = svr4_sysent, .sv_mask = 0xff, .sv_errsize = ELAST, /* ELAST */ .sv_errtbl = bsd_to_svr4_errno, .sv_transtrap = NULL, .sv_fixup = svr4_fixup, .sv_sendsig = svr4_sendsig, .sv_sigcode = svr4_sigcode, .sv_szsigcode = &svr4_szsigcode, .sv_name = "SVR4", .sv_coredump = elf32_coredump, .sv_imgact_try = NULL, .sv_minsigstksz = SVR4_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_UNDEF | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; const char svr4_emul_path[] = "/compat/svr4"; Elf32_Brandinfo svr4_brand = { .brand = ELFOSABI_SYSV, .machine = EM_386, /* XXX only implemented for x86 so far. */ .compat_3_brand = "SVR4", .emul_path = svr4_emul_path, .interp_path = "/lib/libc.so.1", .sysvec = &svr4_sysvec, .interp_newpath = NULL, .brand_note = NULL, .flags = 0 }; static int svr4_fixup(register_t **stack_base, struct image_params *imgp) { Elf32_Auxargs *args; register_t *pos; KASSERT(curthread->td_proc == imgp->proc, ("unsafe svr4_fixup(), should be curproc")); args = (Elf32_Auxargs *)imgp->auxargs; pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; (*stack_base)--; **stack_base = (register_t)imgp->args->argc; return 0; } /* * Search an alternate path before passing pathname arguments on * to system calls. Useful for keeping a separate 'emulation tree'. * * If cflag is set, we check if an attempt can be made to create * the named file, i.e. we check if the directory it should * be in exists. */ int svr4_emul_find(struct thread *td, char *path, enum uio_seg pathseg, char **pbuf, int create) { return (kern_alternate_path(td, svr4_emul_path, path, pathseg, pbuf, create, AT_FDCWD)); } static int svr4_elf_modevent(module_t mod, int type, void *data) { int error; error = 0; switch(type) { case MOD_LOAD: if (elf32_insert_brand_entry(&svr4_brand) < 0) { printf("cannot insert svr4 elf brand handler\n"); error = EINVAL; break; } if (bootverbose) printf("svr4 ELF exec handler installed\n"); svr4_sockcache_init(); break; case MOD_UNLOAD: /* Only allow the emulator to be removed if it isn't in use. */ if (elf32_brand_inuse(&svr4_brand) != 0) { error = EBUSY; } else if (elf32_remove_brand_entry(&svr4_brand) < 0) { error = EINVAL; } if (error) { printf("Could not deinstall ELF interpreter entry (error %d)\n", error); break; } if (bootverbose) printf("svr4 ELF exec handler removed\n"); svr4_sockcache_destroy(); break; default: return (EOPNOTSUPP); break; } return error; } static moduledata_t svr4_elf_mod = { "svr4elf", svr4_elf_modevent, 0 }; DECLARE_MODULE_TIED(svr4elf, svr4_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(svr4elf, streams, 1, 1, 1); Index: projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c =================================================================== --- projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c (revision 293686) +++ projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c (revision 293687) @@ -1,1464 +1,1453 @@ /* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ #if !defined(lint) static const char sccsid[] = "@(#)ip_fil.c 2.41 6/5/96 (C) 1993-2000 Darren Reed"; static const char rcsid[] = "@(#)$Id$"; #endif #if defined(KERNEL) || defined(_KERNEL) # undef KERNEL # undef _KERNEL # define KERNEL 1 # define _KERNEL 1 #endif #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \ !defined(KLD_MODULE) && !defined(IPFILTER_LKM) # include "opt_inet6.h" #endif #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \ !defined(KLD_MODULE) && !defined(IPFILTER_LKM) # include "opt_random_ip_id.h" #endif #include #include #include #include # include # include #include #include # include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) #include #endif # include # include #if !defined(__hpux) # include #endif #include # include # include #include # include # include #include #include +#include #include #include #include #include #include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) #include #else #define CURVNET_SET(arg) #define CURVNET_RESTORE() #endif #if defined(__osf__) # include #endif #include #include #include #include "netinet/ip_compat.h" #ifdef USE_INET6 # include #endif #include "netinet/ip_fil.h" #include "netinet/ip_nat.h" #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" #include "netinet/ip_auth.h" #include "netinet/ip_sync.h" #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #ifdef IPFILTER_SCAN #include "netinet/ip_scan.h" #endif #include "netinet/ip_pool.h" # include #include #ifdef CSUM_DATA_VALID #include #endif extern int ip_optcopy __P((struct ip *, struct ip *)); # ifdef IPFILTER_M_IPFILTER MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures"); # endif static int (*ipf_savep) __P((void *, ip_t *, int, void *, int, struct mbuf **)); static int ipf_send_ip __P((fr_info_t *, mb_t *)); static void ipf_timer_func __P((void *arg)); int ipf_locks_done = 0; ipf_main_softc_t ipfmain; # include # if defined(NETBSD_PF) # include # endif /* NETBSD_PF */ /* * We provide the ipf_checkp name just to minimize changes later. */ int (*ipf_checkp) __P((void *, ip_t *ip, int hlen, void *ifp, int out, mb_t **mp)); static eventhandler_tag ipf_arrivetag, ipf_departtag, ipf_clonetag; static void ipf_ifevent(void *arg); static void ipf_ifevent(arg) void *arg; { ipf_sync(arg, NULL); } static int ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) { struct ip *ip = mtod(*mp, struct ip *); int rv; /* * IPFilter expects evreything in network byte order */ #if (__FreeBSD_version < 1000019) ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); #endif rv = ipf_check(&ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), mp); #if (__FreeBSD_version < 1000019) if ((rv == 0) && (*mp != NULL)) { ip = mtod(*mp, struct ip *); ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); } #endif return rv; } # ifdef USE_INET6 # include static int ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) { return (ipf_check(&ipfmain, mtod(*mp, struct ip *), sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp)); } # endif #if defined(IPFILTER_LKM) int ipf_identify(s) char *s; { if (strcmp(s, "ipl") == 0) return 1; return 0; } #endif /* IPFILTER_LKM */ static void ipf_timer_func(arg) void *arg; { ipf_main_softc_t *softc = arg; SPL_INT(s); SPL_NET(s); READ_ENTER(&softc->ipf_global); if (softc->ipf_running > 0) ipf_slowtimer(softc); if (softc->ipf_running == -1 || softc->ipf_running == 1) { #if 0 softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2); #endif callout_init(&softc->ipf_slow_ch, 1); callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT, ipf_timer_func, softc); } RWLOCK_EXIT(&softc->ipf_global); SPL_X(s); } int ipfattach(softc) ipf_main_softc_t *softc; { #ifdef USE_SPL int s; #endif SPL_NET(s); if (softc->ipf_running > 0) { SPL_X(s); return EBUSY; } if (ipf_init_all(softc) < 0) { SPL_X(s); return EIO; } if (ipf_checkp != ipf_check) { ipf_savep = ipf_checkp; ipf_checkp = ipf_check; } bzero((char *)ipfmain.ipf_selwait, sizeof(ipfmain.ipf_selwait)); softc->ipf_running = 1; if (softc->ipf_control_forwarding & 1) V_ipforwarding = 1; SPL_X(s); #if 0 softc->ipf_slow_ch = timeout(ipf_timer_func, softc, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT); #endif callout_init(&softc->ipf_slow_ch, 1); callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT, ipf_timer_func, softc); return 0; } /* * Disable the filter by removing the hooks from the IP input/output * stream. */ int ipfdetach(softc) ipf_main_softc_t *softc; { #ifdef USE_SPL int s; #endif if (softc->ipf_control_forwarding & 2) V_ipforwarding = 0; SPL_NET(s); #if 0 if (softc->ipf_slow_ch.callout != NULL) untimeout(ipf_timer_func, softc, softc->ipf_slow_ch); bzero(&softc->ipf_slow, sizeof(softc->ipf_slow)); #endif callout_drain(&softc->ipf_slow_ch); #ifndef NETBSD_PF if (ipf_checkp != NULL) ipf_checkp = ipf_savep; ipf_savep = NULL; #endif ipf_fini_all(softc); softc->ipf_running = -2; SPL_X(s); return 0; } /* * Filter ioctl interface. */ int ipfioctl(dev, cmd, data, mode , p) struct thread *p; # define p_cred td_ucred # define p_uid td_ucred->cr_ruid struct cdev *dev; ioctlcmd_t cmd; caddr_t data; int mode; { int error = 0, unit = 0; SPL_INT(s); #if (BSD >= 199306) if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE)) { ipfmain.ipf_interror = 130001; return EPERM; } #endif unit = GET_MINOR(dev); if ((IPL_LOGMAX < unit) || (unit < 0)) { ipfmain.ipf_interror = 130002; return ENXIO; } if (ipfmain.ipf_running <= 0) { if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) { ipfmain.ipf_interror = 130003; return EIO; } if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && cmd != SIOCIPFSET && cmd != SIOCFRENB && cmd != SIOCGETFS && cmd != SIOCGETFF && cmd != SIOCIPFINTERROR) { ipfmain.ipf_interror = 130004; return EIO; } } SPL_NET(s); CURVNET_SET(TD_TO_VNET(p)); error = ipf_ioctlswitch(&ipfmain, unit, data, cmd, mode, p->p_uid, p); CURVNET_RESTORE(); if (error != -1) { SPL_X(s); return error; } SPL_X(s); return error; } /* * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that * requires a large amount of setting up and isn't any more efficient. */ int ipf_send_reset(fin) fr_info_t *fin; { struct tcphdr *tcp, *tcp2; int tlen = 0, hlen; struct mbuf *m; #ifdef USE_INET6 ip6_t *ip6; #endif ip_t *ip; tcp = fin->fin_dp; if (tcp->th_flags & TH_RST) return -1; /* feedback loop */ if (ipf_checkl4sum(fin) == -1) return -1; tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) + ((tcp->th_flags & TH_SYN) ? 1 : 0) + ((tcp->th_flags & TH_FIN) ? 1 : 0); #ifdef USE_INET6 hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t); #else hlen = sizeof(ip_t); #endif #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == NULL) return -1; if (sizeof(*tcp2) + hlen > MLEN) { if (!(MCLGET(m, M_NOWAIT))) { FREE_MB_T(m); return -1; } } m->m_len = sizeof(*tcp2) + hlen; #if (BSD >= 199103) m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = (struct ifnet *)0; #endif ip = mtod(m, struct ip *); bzero((char *)ip, hlen); #ifdef USE_INET6 ip6 = (ip6_t *)ip; #endif tcp2 = (struct tcphdr *)((char *)ip + hlen); tcp2->th_sport = tcp->th_dport; tcp2->th_dport = tcp->th_sport; if (tcp->th_flags & TH_ACK) { tcp2->th_seq = tcp->th_ack; tcp2->th_flags = TH_RST; tcp2->th_ack = 0; } else { tcp2->th_seq = 0; tcp2->th_ack = ntohl(tcp->th_seq); tcp2->th_ack += tlen; tcp2->th_ack = htonl(tcp2->th_ack); tcp2->th_flags = TH_RST|TH_ACK; } TCP_X2_A(tcp2, 0); TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2); tcp2->th_win = tcp->th_win; tcp2->th_sum = 0; tcp2->th_urp = 0; #ifdef USE_INET6 if (fin->fin_v == 6) { ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_hlim = 0; ip6->ip6_src = fin->fin_dst6.in6; ip6->ip6_dst = fin->fin_src6.in6; tcp2->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*ip6), sizeof(*tcp2)); return ipf_send_ip(fin, m); } #endif ip->ip_p = IPPROTO_TCP; ip->ip_len = htons(sizeof(struct tcphdr)); ip->ip_src.s_addr = fin->fin_daddr; ip->ip_dst.s_addr = fin->fin_saddr; tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2)); ip->ip_len = htons(hlen + sizeof(*tcp2)); return ipf_send_ip(fin, m); } /* * ip_len must be in network byte order when called. */ static int ipf_send_ip(fin, m) fr_info_t *fin; mb_t *m; { fr_info_t fnew; ip_t *ip, *oip; int hlen; ip = mtod(m, ip_t *); bzero((char *)&fnew, sizeof(fnew)); fnew.fin_main_soft = fin->fin_main_soft; IP_V_A(ip, fin->fin_v); switch (fin->fin_v) { case 4 : oip = fin->fin_ip; hlen = sizeof(*oip); fnew.fin_v = 4; fnew.fin_p = ip->ip_p; fnew.fin_plen = ntohs(ip->ip_len); IP_HL_A(ip, sizeof(*oip) >> 2); ip->ip_tos = oip->ip_tos; ip->ip_id = fin->fin_ip->ip_id; #if defined(FreeBSD) && (__FreeBSD_version > 460000) ip->ip_off = htons(path_mtu_discovery ? IP_DF : 0); #else ip->ip_off = 0; #endif ip->ip_ttl = V_ip_defttl; ip->ip_sum = 0; break; #ifdef USE_INET6 case 6 : { ip6_t *ip6 = (ip6_t *)ip; ip6->ip6_vfc = 0x60; ip6->ip6_hlim = IPDEFTTL; hlen = sizeof(*ip6); fnew.fin_p = ip6->ip6_nxt; fnew.fin_v = 6; fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; break; } #endif default : return EINVAL; } #ifdef IPSEC m->m_pkthdr.rcvif = NULL; #endif fnew.fin_ifp = fin->fin_ifp; fnew.fin_flx = FI_NOCKSUM; fnew.fin_m = m; fnew.fin_ip = ip; fnew.fin_mp = &m; fnew.fin_hlen = hlen; fnew.fin_dp = (char *)ip + hlen; (void) ipf_makefrip(hlen, ip, &fnew); return ipf_fastroute(m, &m, &fnew, NULL); } int ipf_send_icmp_err(type, fin, dst) int type; fr_info_t *fin; int dst; { int err, hlen, xtra, iclen, ohlen, avail, code; struct in_addr dst4; struct icmp *icmp; struct mbuf *m; i6addr_t dst6; void *ifp; #ifdef USE_INET6 ip6_t *ip6; #endif ip_t *ip, *ip2; if ((type < 0) || (type >= ICMP_MAXTYPE)) return -1; code = fin->fin_icode; #ifdef USE_INET6 #if 0 /* XXX Fix an off by one error: s/>/>=/ was: if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int))) Fix obtained from NetBSD ip_fil_netbsd.c r1.4: */ #endif if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int))) return -1; #endif if (ipf_checkl4sum(fin) == -1) return -1; #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == NULL) return -1; avail = MHLEN; xtra = 0; hlen = 0; ohlen = 0; dst4.s_addr = 0; ifp = fin->fin_ifp; if (fin->fin_v == 4) { if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT)) switch (ntohs(fin->fin_data[0]) >> 8) { case ICMP_ECHO : case ICMP_TSTAMP : case ICMP_IREQ : case ICMP_MASKREQ : break; default : FREE_MB_T(m); return 0; } if (dst == 0) { if (ipf_ifpaddr(&ipfmain, 4, FRI_NORMAL, ifp, &dst6, NULL) == -1) { FREE_MB_T(m); return -1; } dst4 = dst6.in4; } else dst4.s_addr = fin->fin_daddr; hlen = sizeof(ip_t); ohlen = fin->fin_hlen; iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen; if (fin->fin_hlen < fin->fin_plen) xtra = MIN(fin->fin_dlen, 8); else xtra = 0; } #ifdef USE_INET6 else if (fin->fin_v == 6) { hlen = sizeof(ip6_t); ohlen = sizeof(ip6_t); iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen; type = icmptoicmp6types[type]; if (type == ICMP6_DST_UNREACH) code = icmptoicmp6unreach[code]; if (iclen + max_linkhdr + fin->fin_plen > avail) { if (!(MCLGET(m, M_NOWAIT))) { FREE_MB_T(m); return -1; } avail = MCLBYTES; } xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr); xtra = MIN(xtra, IPV6_MMTU - iclen); if (dst == 0) { if (ipf_ifpaddr(&ipfmain, 6, FRI_NORMAL, ifp, &dst6, NULL) == -1) { FREE_MB_T(m); return -1; } } else dst6 = fin->fin_dst6; } #endif else { FREE_MB_T(m); return -1; } avail -= (max_linkhdr + iclen); if (avail < 0) { FREE_MB_T(m); return -1; } if (xtra > avail) xtra = avail; iclen += xtra; m->m_data += max_linkhdr; m->m_pkthdr.rcvif = (struct ifnet *)0; m->m_pkthdr.len = iclen; m->m_len = iclen; ip = mtod(m, ip_t *); icmp = (struct icmp *)((char *)ip + hlen); ip2 = (ip_t *)&icmp->icmp_ip; icmp->icmp_type = type; icmp->icmp_code = fin->fin_icode; icmp->icmp_cksum = 0; #ifdef icmp_nextmtu if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) { if (fin->fin_mtu != 0) { icmp->icmp_nextmtu = htons(fin->fin_mtu); } else if (ifp != NULL) { icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp)); } else { /* make up a number... */ icmp->icmp_nextmtu = htons(fin->fin_plen - 20); } } #endif bcopy((char *)fin->fin_ip, (char *)ip2, ohlen); #ifdef USE_INET6 ip6 = (ip6_t *)ip; if (fin->fin_v == 6) { ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; ip6->ip6_plen = htons(iclen - hlen); ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 0; ip6->ip6_src = dst6.in6; ip6->ip6_dst = fin->fin_src6.in6; if (xtra > 0) bcopy((char *)fin->fin_ip + ohlen, (char *)&icmp->icmp_ip + ohlen, xtra); icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), iclen - hlen); } else #endif { ip->ip_p = IPPROTO_ICMP; ip->ip_src.s_addr = dst4.s_addr; ip->ip_dst.s_addr = fin->fin_saddr; if (xtra > 0) bcopy((char *)fin->fin_ip + ohlen, (char *)&icmp->icmp_ip + ohlen, xtra); icmp->icmp_cksum = ipf_cksum((u_short *)icmp, sizeof(*icmp) + 8); ip->ip_len = htons(iclen); ip->ip_p = IPPROTO_ICMP; } err = ipf_send_ip(fin, m); return err; } /* * m0 - pointer to mbuf where the IP packet starts * mpp - pointer to the mbuf pointer that is the start of the mbuf chain */ int ipf_fastroute(m0, mpp, fin, fdp) mb_t *m0, **mpp; fr_info_t *fin; frdest_t *fdp; { register struct ip *ip, *mhip; register struct mbuf *m = *mpp; - register struct route *ro; int len, off, error = 0, hlen, code; struct ifnet *ifp, *sifp; - struct sockaddr_in *dst; - struct route iproute; + struct sockaddr_in dst; + struct nhop4_extended nh4; + int has_nhop = 0; + u_long fibnum = 0; u_short ip_off; frdest_t node; frentry_t *fr; - ro = NULL; - #ifdef M_WRITABLE /* * HOT FIX/KLUDGE: * * If the mbuf we're about to send is not writable (because of * a cluster reference, for example) we'll need to make a copy * of it since this routine modifies the contents. * * If you have non-crappy network hardware that can transmit data * from the mbuf, rather than making a copy, this is gonna be a * problem. */ if (M_WRITABLE(m) == 0) { m0 = m_dup(m, M_NOWAIT); if (m0 != 0) { FREE_MB_T(m); m = m0; *mpp = m; } else { error = ENOBUFS; FREE_MB_T(m); goto done; } } #endif #ifdef USE_INET6 if (fin->fin_v == 6) { /* * currently "to " and "to :ip#" are not supported * for IPv6 */ return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } #endif hlen = fin->fin_hlen; ip = mtod(m0, struct ip *); ifp = NULL; /* * Route packet. */ - ro = &iproute; - bzero(ro, sizeof (*ro)); - dst = (struct sockaddr_in *)&ro->ro_dst; - dst->sin_family = AF_INET; - dst->sin_addr = ip->ip_dst; + bzero(&dst, sizeof (dst)); + dst.sin_family = AF_INET; + dst.sin_addr = ip->ip_dst; + dst.sin_len = sizeof(dst); fr = fin->fin_fr; if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) && (fdp->fd_type == FRD_DSTLIST)) { if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0) fdp = &node; } if (fdp != NULL) ifp = fdp->fd_ptr; else ifp = fin->fin_ifp; if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) { error = -2; goto bad; } if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0)) - dst->sin_addr = fdp->fd_ip; + dst.sin_addr = fdp->fd_ip; - dst->sin_len = sizeof(*dst); - in_rtalloc(ro, M_GETFIB(m0)); - - if ((ifp == NULL) && (ro->ro_rt != NULL)) - ifp = ro->ro_rt->rt_ifp; - - if ((ro->ro_rt == NULL) || (ifp == NULL)) { + fibnum = M_GETFIB(m0); + if (fib4_lookup_nh_ext(fibnum, dst.sin_addr, NHR_REF, 0, &nh4) != 0) { if (in_localaddr(ip->ip_dst)) error = EHOSTUNREACH; else error = ENETUNREACH; goto bad; } - if (ro->ro_rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; - if (ro->ro_rt) - counter_u64_add(ro->ro_rt->rt_pksent, 1); + has_nhop = 1; + if (ifp == NULL) + ifp = nh4.nh_ifp; + if (nh4.nh_flags & NHF_GATEWAY) + dst.sin_addr = nh4.nh_addr; + /* * For input packets which are being "fastrouted", they won't * go back through output filtering and miss their chance to get * NAT'd and counted. Duplicated packets aren't considered to be * part of the normal packet stream, so do not NAT them or pass * them through stateful checking, etc. */ if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) { sifp = fin->fin_ifp; fin->fin_ifp = ifp; fin->fin_out = 1; (void) ipf_acctpkt(fin, NULL); fin->fin_fr = NULL; if (!fr || !(fr->fr_flags & FR_RETMASK)) { u_32_t pass; (void) ipf_state_check(fin, &pass); } switch (ipf_nat_checkout(fin, NULL)) { case 0 : break; case 1 : ip->ip_sum = 0; break; case -1 : error = -1; goto bad; break; } fin->fin_ifp = sifp; fin->fin_out = 0; } else ip->ip_sum = 0; /* * If small enough for interface, can just send directly. */ if (ntohs(ip->ip_len) <= ifp->if_mtu) { if (!ip->ip_sum) ip->ip_sum = in_cksum(m, hlen); - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, - ro + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst, + NULL ); goto done; } /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ ip_off = ntohs(ip->ip_off); if (ip_off & IP_DF) { error = EMSGSIZE; goto bad; } len = (ifp->if_mtu - hlen) &~ 7; if (len < 8) { error = EMSGSIZE; goto bad; } { int mhlen, firstlen = len; struct mbuf **mnext = &m->m_act; /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. */ m0 = m; mhlen = sizeof (struct ip); for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == 0) { m = m0; error = ENOBUFS; goto bad; } m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); bcopy((char *)ip, (char *)mhip, sizeof(*ip)); if (hlen > sizeof (struct ip)) { mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); IP_HL_A(mhip, mhlen >> 2); } m->m_len = mhlen; mhip->ip_off = ((off - hlen) >> 3) + ip_off; if (off + len >= ntohs(ip->ip_len)) len = ntohs(ip->ip_len) - off; else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); *mnext = m; m->m_next = m_copy(m0, off, len); if (m->m_next == 0) { error = ENOBUFS; /* ??? */ goto sendorfree; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = NULL; mhip->ip_off = htons((u_short)mhip->ip_off); mhip->ip_sum = 0; mhip->ip_sum = in_cksum(m, mhlen); mnext = &m->m_act; } /* * Update first fragment by trimming what's been copied out * and updating header, then send each fragment (in order). */ m_adj(m0, hlen + firstlen - ip->ip_len); ip->ip_len = htons((u_short)(hlen + firstlen)); ip->ip_off = htons((u_short)IP_MF); ip->ip_sum = 0; ip->ip_sum = in_cksum(m0, hlen); sendorfree: for (m = m0; m; m = m0) { m0 = m->m_act; m->m_act = 0; if (error == 0) error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, - ro + (struct sockaddr *)&dst, + NULL ); else FREE_MB_T(m); } } done: if (!error) ipfmain.ipf_frouteok[0]++; else ipfmain.ipf_frouteok[1]++; - if ((ro != NULL) && (ro->ro_rt != NULL)) { - RTFREE(ro->ro_rt); - } + if (has_nhop) + fib4_free_nh_ext(fibnum, &nh4); + return 0; bad: if (error == EMSGSIZE) { sifp = fin->fin_ifp; code = fin->fin_icode; fin->fin_icode = ICMP_UNREACH_NEEDFRAG; fin->fin_ifp = ifp; (void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1); fin->fin_ifp = sifp; fin->fin_icode = code; } FREE_MB_T(m); goto done; } int ipf_verifysrc(fin) fr_info_t *fin; { - struct sockaddr_in *dst; - struct route iproute; + struct nhop4_basic nh4; - bzero((char *)&iproute, sizeof(iproute)); - dst = (struct sockaddr_in *)&iproute.ro_dst; - dst->sin_len = sizeof(*dst); - dst->sin_family = AF_INET; - dst->sin_addr = fin->fin_src; - in_rtalloc(&iproute, 0); - if (iproute.ro_rt == NULL) - return 0; - return (fin->fin_ifp == iproute.ro_rt->rt_ifp); + if (fib4_lookup_nh_basic(0, fin->fin_src, 0, 0, &nh4) != 0) + return (0); + return (fin->fin_ifp == nh4.nh_ifp); } /* * return the first IP Address associated with an interface */ int ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask) ipf_main_softc_t *softc; int v, atype; void *ifptr; i6addr_t *inp, *inpmask; { #ifdef USE_INET6 struct in6_addr *inp6 = NULL; #endif struct sockaddr *sock, *mask; struct sockaddr_in *sin; struct ifaddr *ifa; struct ifnet *ifp; if ((ifptr == NULL) || (ifptr == (void *)-1)) return -1; sin = NULL; ifp = ifptr; if (v == 4) inp->in4.s_addr = 0; #ifdef USE_INET6 else if (v == 6) bzero((char *)inp, sizeof(*inp)); #endif ifa = TAILQ_FIRST(&ifp->if_addrhead); sock = ifa->ifa_addr; while (sock != NULL && ifa != NULL) { sin = (struct sockaddr_in *)sock; if ((v == 4) && (sin->sin_family == AF_INET)) break; #ifdef USE_INET6 if ((v == 6) && (sin->sin_family == AF_INET6)) { inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(inp6) && !IN6_IS_ADDR_LOOPBACK(inp6)) break; } #endif ifa = TAILQ_NEXT(ifa, ifa_link); if (ifa != NULL) sock = ifa->ifa_addr; } if (ifa == NULL || sin == NULL) return -1; mask = ifa->ifa_netmask; if (atype == FRI_BROADCAST) sock = ifa->ifa_broadaddr; else if (atype == FRI_PEERADDR) sock = ifa->ifa_dstaddr; if (sock == NULL) return -1; #ifdef USE_INET6 if (v == 6) { return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock, (struct sockaddr_in6 *)mask, inp, inpmask); } #endif return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock, (struct sockaddr_in *)mask, &inp->in4, &inpmask->in4); } u_32_t ipf_newisn(fin) fr_info_t *fin; { u_32_t newiss; newiss = arc4random(); return newiss; } INLINE int ipf_checkv4sum(fin) fr_info_t *fin; { #ifdef CSUM_DATA_VALID int manual = 0; u_short sum; ip_t *ip; mb_t *m; if ((fin->fin_flx & FI_NOCKSUM) != 0) return 0; if ((fin->fin_flx & FI_SHORT) != 0) return 1; if (fin->fin_cksum != FI_CK_NEEDED) return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1; m = fin->fin_m; if (m == NULL) { manual = 1; goto skipauto; } ip = fin->fin_ip; if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) == CSUM_IP_CHECKED) { fin->fin_cksum = FI_CK_BAD; fin->fin_flx |= FI_BAD; return -1; } if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { /* Depending on the driver, UDP may have zero checksum */ if (fin->fin_p == IPPROTO_UDP && (fin->fin_flx & (FI_FRAG|FI_SHORT|FI_BAD)) == 0) { udphdr_t *udp = fin->fin_dp; if (udp->uh_sum == 0) { /* * we're good no matter what the hardware * checksum flags and csum_data say (handling * of csum_data for zero UDP checksum is not * consistent across all drivers) */ fin->fin_cksum = 1; return 0; } } if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) sum = m->m_pkthdr.csum_data; else sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data + fin->fin_dlen + fin->fin_p)); sum ^= 0xffff; if (sum != 0) { fin->fin_cksum = FI_CK_BAD; fin->fin_flx |= FI_BAD; } else { fin->fin_cksum = FI_CK_SUMOK; return 0; } } else { if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) { fin->fin_cksum = FI_CK_L4FULL; return 0; } else if (m->m_pkthdr.csum_flags == CSUM_TCP || m->m_pkthdr.csum_flags == CSUM_UDP) { fin->fin_cksum = FI_CK_L4PART; return 0; } else if (m->m_pkthdr.csum_flags == CSUM_IP) { fin->fin_cksum = FI_CK_L4PART; return 0; } else { manual = 1; } } skipauto: if (manual != 0) { if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; return -1; } } #else if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; return -1; } #endif return 0; } #ifdef USE_INET6 INLINE int ipf_checkv6sum(fin) fr_info_t *fin; { if ((fin->fin_flx & FI_NOCKSUM) != 0) return 0; if ((fin->fin_flx & FI_SHORT) != 0) return 1; if (fin->fin_cksum != FI_CK_NEEDED) return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1; if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; return -1; } return 0; } #endif /* USE_INET6 */ size_t mbufchainlen(m0) struct mbuf *m0; { size_t len; if ((m0->m_flags & M_PKTHDR) != 0) { len = m0->m_pkthdr.len; } else { struct mbuf *m; for (m = m0, len = 0; m != NULL; m = m->m_next) len += m->m_len; } return len; } /* ------------------------------------------------------------------------ */ /* Function: ipf_pullup */ /* Returns: NULL == pullup failed, else pointer to protocol header */ /* Parameters: xmin(I)- pointer to buffer where data packet starts */ /* fin(I) - pointer to packet information */ /* len(I) - number of bytes to pullup */ /* */ /* Attempt to move at least len bytes (from the start of the buffer) into a */ /* single buffer for ease of access. Operating system native functions are */ /* used to manage buffers - if necessary. If the entire packet ends up in */ /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */ /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ /* and ONLY if the pullup succeeds. */ /* */ /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */ /* of buffers that starts at *fin->fin_mp. */ /* ------------------------------------------------------------------------ */ void * ipf_pullup(xmin, fin, len) mb_t *xmin; fr_info_t *fin; int len; { int dpoff, ipoff; mb_t *m = xmin; char *ip; if (m == NULL) return NULL; ip = (char *)fin->fin_ip; if ((fin->fin_flx & FI_COALESCE) != 0) return ip; ipoff = fin->fin_ipoff; if (fin->fin_dp != NULL) dpoff = (char *)fin->fin_dp - (char *)ip; else dpoff = 0; if (M_LEN(m) < len) { mb_t *n = *fin->fin_mp; /* * Assume that M_PKTHDR is set and just work with what is left * rather than check.. * Should not make any real difference, anyway. */ if (m != n) { /* * Record the mbuf that points to the mbuf that we're * about to go to work on so that we can update the * m_next appropriately later. */ for (; n->m_next != m; n = n->m_next) ; } else { n = NULL; } #ifdef MHLEN if (len > MHLEN) #else if (len > MLEN) #endif { #ifdef HAVE_M_PULLDOWN if (m_pulldown(m, 0, len, NULL) == NULL) m = NULL; #else FREE_MB_T(*fin->fin_mp); m = NULL; n = NULL; #endif } else { m = m_pullup(m, len); } if (n != NULL) n->m_next = m; if (m == NULL) { /* * When n is non-NULL, it indicates that m pointed to * a sub-chain (tail) of the mbuf and that the head * of this chain has not yet been free'd. */ if (n != NULL) { FREE_MB_T(*fin->fin_mp); } *fin->fin_mp = NULL; fin->fin_m = NULL; return NULL; } if (n == NULL) *fin->fin_mp = m; while (M_LEN(m) == 0) { m = m->m_next; } fin->fin_m = m; ip = MTOD(m, char *) + ipoff; fin->fin_ip = (ip_t *)ip; if (fin->fin_dp != NULL) fin->fin_dp = (char *)fin->fin_ip + dpoff; if (fin->fin_fraghdr != NULL) fin->fin_fraghdr = (char *)ip + ((char *)fin->fin_fraghdr - (char *)fin->fin_ip); } if (len == fin->fin_plen) fin->fin_flx |= FI_COALESCE; return ip; } int ipf_inject(fin, m) fr_info_t *fin; mb_t *m; { int error = 0; if (fin->fin_out == 0) { netisr_dispatch(NETISR_IP, m); } else { fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len); fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off); error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); } return error; } int ipf_pfil_unhook(void) { #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011) struct pfil_head *ph_inet; # ifdef USE_INET6 struct pfil_head *ph_inet6; # endif #endif #ifdef NETBSD_PF ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (ph_inet != NULL) pfil_remove_hook((void *)ipf_check_wrapper, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); # ifdef USE_INET6 ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (ph_inet6 != NULL) pfil_remove_hook((void *)ipf_check_wrapper6, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); # endif #endif return (0); } int ipf_pfil_hook(void) { #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011) struct pfil_head *ph_inet; # ifdef USE_INET6 struct pfil_head *ph_inet6; # endif #endif # ifdef NETBSD_PF ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); # ifdef USE_INET6 ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); # endif if (ph_inet == NULL # ifdef USE_INET6 && ph_inet6 == NULL # endif ) { return ENODEV; } if (ph_inet != NULL) pfil_add_hook((void *)ipf_check_wrapper, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); # ifdef USE_INET6 if (ph_inet6 != NULL) pfil_add_hook((void *)ipf_check_wrapper6, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); # endif # endif return (0); } void ipf_event_reg(void) { ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \ ipf_ifevent, &ipfmain, \ EVENTHANDLER_PRI_ANY); ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \ ipf_ifevent, &ipfmain, \ EVENTHANDLER_PRI_ANY); ipf_clonetag = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \ &ipfmain, EVENTHANDLER_PRI_ANY); } void ipf_event_dereg(void) { if (ipf_arrivetag != NULL) { EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag); } if (ipf_departtag != NULL) { EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag); } if (ipf_clonetag != NULL) { EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag); } } u_32_t ipf_random() { return arc4random(); } u_int ipf_pcksum(fin, hlen, sum) fr_info_t *fin; int hlen; u_int sum; { struct mbuf *m; u_int sum2; int off; m = fin->fin_m; off = (char *)fin->fin_dp - (char *)fin->fin_ip; m->m_data += hlen; m->m_len -= hlen; sum2 = in_cksum(fin->fin_m, fin->fin_plen - off); m->m_len += hlen; m->m_data -= hlen; /* * Both sum and sum2 are partial sums, so combine them together. */ sum += ~sum2 & 0xffff; while (sum > 0xffff) sum = (sum & 0xffff) + (sum >> 16); sum2 = ~sum & 0xffff; return sum2; } Property changes on: projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c:r292913-293685 Index: projects/clang380-import/sys/contrib/ipfilter =================================================================== --- projects/clang380-import/sys/contrib/ipfilter (revision 293686) +++ projects/clang380-import/sys/contrib/ipfilter (revision 293687) Property changes on: projects/clang380-import/sys/contrib/ipfilter ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/ipfilter:r292913-293685 Index: projects/clang380-import/sys/dev/cxgbe/t4_main.c =================================================================== --- projects/clang380-import/sys/dev/cxgbe/t4_main.c (revision 293686) +++ projects/clang380-import/sys/dev/cxgbe/t4_main.c (revision 293687) @@ -1,9222 +1,9229 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #if defined(__i386__) || defined(__amd64__) #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "t4_ioctl.h" #include "t4_l2t.h" #include "t4_mp_ring.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); static device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; /* T4 VI (vcxgbe) interface */ static int vcxgbe_probe(device_t); static int vcxgbe_attach(device_t); static int vcxgbe_detach(device_t); static device_method_t vcxgbe_methods[] = { DEVMETHOD(device_probe, vcxgbe_probe), DEVMETHOD(device_attach, vcxgbe_attach), DEVMETHOD(device_detach, vcxgbe_detach), { 0, 0 } }; static driver_t vcxgbe_driver = { "vcxgbe", vcxgbe_methods, sizeof(struct vi_info) }; static d_ioctl_t t4_ioctl; static d_open_t t4_open; static d_close_t t4_close; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = t4_open, .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; /* T5 VI (vcxl) interface */ static driver_t vcxl_driver = { "vcxl", vcxgbe_methods, sizeof(struct vi_info) }; static struct cdevsw t5_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = t4_open, .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t5nex", }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct sx t4_list_lock; SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct sx t4_uld_list_lock; SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -1 as an indication to tweak_tunables() that it should * provide a reasonable default when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, 10G and 1G, NIC and offload. */ #define NTXQ_10G 16 static int t4_ntxq10g = -1; TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g); #define NRXQ_10G 8 static int t4_nrxq10g = -1; TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g); #define NTXQ_1G 4 static int t4_ntxq1g = -1; TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g); #define NRXQ_1G 2 static int t4_nrxq1g = -1; TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #ifdef TCP_OFFLOAD #define NOFLDTXQ_10G 8 static int t4_nofldtxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g); #define NOFLDRXQ_10G 2 static int t4_nofldrxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g); #define NOFLDTXQ_1G 2 static int t4_nofldtxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g); #define NOFLDRXQ_1G 1 static int t4_nofldrxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g); #endif #ifdef DEV_NETMAP #define NNMTXQ_10G 2 static int t4_nnmtxq10g = -1; TUNABLE_INT("hw.cxgbe.nnmtxq10g", &t4_nnmtxq10g); #define NNMRXQ_10G 2 static int t4_nnmrxq10g = -1; TUNABLE_INT("hw.cxgbe.nnmrxq10g", &t4_nnmrxq10g); #define NNMTXQ_1G 1 static int t4_nnmtxq1g = -1; TUNABLE_INT("hw.cxgbe.nnmtxq1g", &t4_nnmtxq1g); #define NNMRXQ_1G 1 static int t4_nnmrxq1g = -1; TUNABLE_INT("hw.cxgbe.nnmrxq1g", &t4_nnmrxq1g); #endif /* * Holdoff parameters for 10G and 1G ports. */ #define TMR_IDX_10G 1 static int t4_tmr_idx_10g = TMR_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g); #define PKTC_IDX_10G (-1) static int t4_pktc_idx_10g = PKTC_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g); #define TMR_IDX_1G 1 static int t4_tmr_idx_1g = TMR_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g); #define PKTC_IDX_1G (-1) static int t4_pktc_idx_1g = PKTC_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g); /* * Size (# of entries) of each tx and rx queue. */ static unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); static unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. */ #define DEFAULT_CF "default" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively). * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them. * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water * mark or when signalled to do so, 0 to never emit PAUSE. */ static int t4_pause_settings = PAUSE_TX | PAUSE_RX; TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = 0; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_iscsicaps_allowed = 0; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 0; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); /* Functions used by extra VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_OFLD, FW_VI_FUNC_IWARP, FW_VI_FUNC_OPENISCSI, FW_VI_FUNC_OPENFCOE, FW_VI_FUNC_FOISCSI, FW_VI_FUNC_FOFCOE, }; struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t nirq; /* Total # of vectors */ uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */ uint16_t intr_flags_1g; /* Interrupt flags for each 1G port */ uint16_t ntxq10g; /* # of NIC txq's for each 10G port */ uint16_t nrxq10g; /* # of NIC rxq's for each 10G port */ uint16_t ntxq1g; /* # of NIC txq's for each 1G port */ uint16_t nrxq1g; /* # of NIC rxq's for each 1G port */ uint16_t rsrv_noflowq; /* Flag whether to reserve queue 0 */ #ifdef TCP_OFFLOAD uint16_t nofldtxq10g; /* # of TOE txq's for each 10G port */ uint16_t nofldrxq10g; /* # of TOE rxq's for each 10G port */ uint16_t nofldtxq1g; /* # of TOE txq's for each 1G port */ uint16_t nofldrxq1g; /* # of TOE rxq's for each 1G port */ #endif #ifdef DEV_NETMAP uint16_t nnmtxq10g; /* # of netmap txq's for each 10G port */ uint16_t nnmrxq10g; /* # of netmap rxq's for each 10G port */ uint16_t nnmtxq1g; /* # of netmap txq's for each 1G port */ uint16_t nnmrxq1g; /* # of netmap rxq's for each 1G port */ #endif }; struct filter_entry { uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; static int map_bars_0_and_4(struct adapter *); static int map_bar_2(struct adapter *); static void setup_memwin(struct adapter *); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static void memwin_info(struct adapter *, int, uint32_t *, uint32_t *); static uint32_t position_memwin(struct adapter *, int, uint32_t); static int cfg_itype_and_nqueues(struct adapter *, int, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); static int setup_intr_handlers(struct adapter *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void reg_block_dump(struct adapter *, uint8_t *, unsigned int, unsigned int); static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static int cpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *); static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *); static int fw_msg_not_handled(struct adapter *, const __be64 *); static void t4_sysctls(struct adapter *); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); #endif static uint32_t fconf_to_mode(uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t fspec_to_fconf(struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); static inline uint64_t get_filter_hits(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); static int set_sched_class(struct adapter *, struct t4_sched_params *); static int set_sched_queue(struct adapter *, struct t4_sched_queue *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, {0x5406, "Chelsio T540-CH"}, {0x5408, "Chelsio T520-CX"}, {0x540b, "Chelsio B520-SR"}, {0x540c, "Chelsio B504-BT"}, {0x540f, "Chelsio Amsterdam"}, {0x5413, "Chelsio T580-CHR"}, #endif }; #ifdef TCP_OFFLOAD /* * service_iq() has an iq and needs the fl. Offset of fl from the iq should be * exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif /* No easy way to include t4_msg.h before adapter.h so we check this way */ CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS); CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES); CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void t5_attribute_workaround(device_t dev) { device_t root_port; uint32_t v; /* * The T5 chips do not properly echo the No Snoop and Relaxed * Ordering attributes when replying to a TLP from a Root * Port. As a workaround, find the parent Root Port and * disable No Snoop and Relaxed Ordering. Note that this * affects all devices under this root port. */ root_port = pci_find_pcie_root_port(dev); if (root_port == NULL) { device_printf(dev, "Unable to find parent root port\n"); return; } v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL, PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2); if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) != 0) device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n", device_get_nameunit(root_port)); } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, j, n10g, n1g, rqidx, tqidx; struct intrs_and_queues iaq; struct sge *s; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif int num_vis; sc = device_get_softc(dev); sc->dev = dev; TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags); if ((pci_get_device(dev) & 0xff00) == 0x5400) t5_attribute_workaround(dev); pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5); } sc->traceq = -1; mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF); snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer", device_get_nameunit(dev)); snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); sx_xlock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); sx_xunlock(&t4_list_lock); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->regwin_lock, "register and memory window", 0, MTX_DEF); rc = map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI)); sc->mbox = sc->pf; memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); sc->an_handler = an_not_handled; for (i = 0; i < nitems(sc->cpl_handler); i++) sc->cpl_handler[i] = cpl_not_handled; for (i = 0; i < nitems(sc->fw_msg_handler); i++) sc->fw_msg_handler[i] = fw_msg_not_handled; t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl); t4_register_cpl_handler(sc, CPL_TRACE_PKT, t4_trace_pkt); t4_register_cpl_handler(sc, CPL_TRACE_PKT_T5, t5_trace_pkt); t4_init_sge_cpl_handlers(sc); /* Prepare the adapter for operation */ rc = -t4_prep_adapter(sc); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s", device_get_nameunit(dev)); if (sc->cdev == NULL) device_printf(dev, "failed to create nexus char device.\n"); else sc->cdev->si_drv1 = sc; /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } #if defined(__i386__) if ((cpu_feature & CPUID_CX8) == 0) { device_printf(dev, "64 bit atomics not available.\n"); rc = ENOTSUP; goto done; } #endif /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * Number of VIs to create per-port. The first VI is the * "main" regular VI for the port. The second VI is used for * netmap if present, and any remaining VIs are used for * additional virtual interfaces. * * Limit the number of VIs per port to the number of available * MAC addresses per port. */ if (t4_num_vis >= 1) num_vis = t4_num_vis; else num_vis = 1; #ifdef DEV_NETMAP num_vis++; #endif if (num_vis > nitems(vi_mac_funcs)) { num_vis = nitems(vi_mac_funcs); device_printf(dev, "Number of VIs limited to %d\n", num_vis); } /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. We also figure * out whether a port is 10G or 1G and use that information when * calculating how many interrupts to attempt to allocate. */ n10g = n1g = 0; for_each_port(sc, i) { struct port_info *pi; struct vi_info *vi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; pi->nvi = num_vis; pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE, M_ZERO | M_WAITOK); /* * Allocate the "main" VI and initialize parameters * like mac addr. */ rc = -t4_port_init(pi, sc->mbox, sc->pf, 0); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX); pi->link_cfg.requested_fc |= t4_pause_settings; pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX); pi->link_cfg.fc |= t4_pause_settings; rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, &pi->link_cfg); if (rc != 0) { device_printf(dev, "port %d l1cfg failed: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; if (is_10G_port(pi) || is_40G_port(pi)) { n10g++; for_each_vi(pi, j, vi) { vi->tmr_idx = t4_tmr_idx_10g; vi->pktc_idx = t4_pktc_idx_10g; } } else { n1g++; for_each_vi(pi, j, vi) { vi->tmr_idx = t4_tmr_idx_1g; vi->pktc_idx = t4_pktc_idx_1g; } } pi->linkdnrc = -1; for_each_vi(pi, j, vi) { vi->qsize_rxq = t4_qsize_rxq; vi->qsize_txq = t4_qsize_txq; vi->pi = pi; } pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ #ifdef DEV_NETMAP num_vis--; #endif rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq); if (rc != 0) goto done; /* error message displayed already */ sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; if (num_vis > 1) { s->nrxq += (n10g + n1g) * (num_vis - 1); s->ntxq += (n10g + n1g) * (num_vis - 1); } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g; s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g; if (num_vis > 1) { s->nofldrxq += (n10g + n1g) * (num_vis - 1); s->nofldtxq += (n10g + n1g) * (num_vis - 1); } s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef DEV_NETMAP s->nnmrxq = n10g * iaq.nnmrxq10g + n1g * iaq.nnmrxq1g; s->nnmtxq = n10g * iaq.nnmtxq10g + n1g * iaq.nnmtxq1g; s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq), M_CXGBE, M_ZERO | M_WAITOK); #endif s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #ifdef TCP_OFFLOAD ofld_rqidx = ofld_tqidx = 0; #endif #ifdef DEV_NETMAP nm_rqidx = nm_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; struct vi_info *vi; if (pi == NULL) continue; for_each_vi(pi, j, vi) { #ifdef DEV_NETMAP if (j == 1) { vi->flags |= VI_NETMAP | INTR_RXQ; vi->first_rxq = nm_rqidx; vi->first_txq = nm_tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { vi->nrxq = iaq.nnmrxq10g; vi->ntxq = iaq.nnmtxq10g; } else { vi->nrxq = iaq.nnmrxq1g; vi->ntxq = iaq.nnmtxq1g; } nm_rqidx += vi->nrxq; nm_tqidx += vi->ntxq; continue; } #endif vi->first_rxq = rqidx; vi->first_txq = tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { vi->flags |= iaq.intr_flags_10g & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq10g : 1; vi->ntxq = j == 0 ? iaq.ntxq10g : 1; } else { vi->flags |= iaq.intr_flags_1g & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq1g : 1; vi->ntxq = j == 0 ? iaq.ntxq1g : 1; } if (vi->ntxq > 1) vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0; else vi->rsrv_noflowq = 0; rqidx += vi->nrxq; tqidx += vi->ntxq; #ifdef TCP_OFFLOAD if (!is_offload(sc)) continue; vi->first_ofld_rxq = ofld_rqidx; vi->first_ofld_txq = ofld_tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ; vi->nofldrxq = j == 0 ? iaq.nofldrxq10g : 1; vi->nofldtxq = j == 0 ? iaq.nofldtxq10g : 1; } else { vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ; vi->nofldrxq = j == 0 ? iaq.nofldrxq1g : 1; vi->nofldtxq = j == 0 ? iaq.nofldtxq1g : 1; } ofld_rqidx += vi->nofldrxq; ofld_tqidx += vi->nofldtxq; #endif } } rc = setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.speed, sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach(dev); else t4_sysctls(sc); return (rc); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); if (sc->flags & FULL_INIT_DONE) t4_intr_disable(sc); if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); } } if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if (sc->flags & FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); free(sc->sge.ofld_txq, M_CXGBE); #endif #ifdef DEV_NETMAP free(sc->sge.nm_rxq, M_CXGBE); free(sc->sge.nm_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { sx_xlock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); sx_xunlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); if (mtx_initialized(&sc->ifp_lock)) mtx_destroy(&sc->ifp_lock); if (mtx_initialized(&sc->regwin_lock)) mtx_destroy(&sc->regwin_lock); bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_vi_attach(device_t dev, struct vi_info *vi) { struct ifnet *ifp; struct sbuf *sb; vi->xact_addr_filt = -1; callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } vi->ifp = ifp; ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; /* Initialize ifmedia for this VI */ ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); build_medialist(vi->pi, &vi->media); vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, vi->hw_addr); sb = sbuf_new_auto(); sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #ifdef TCP_OFFLOAD if (ifp->if_capabilities & IFCAP_TOE) sbuf_printf(sb, "; %d txq, %d rxq (TOE)", vi->nofldtxq, vi->nofldrxq); #endif sbuf_finish(sb); device_printf(dev, "%s\n", sbuf_data(sb)); sbuf_delete(sb); vi_sysctls(vi); return (0); } static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct vi_info *vi; int i, rc; callout_init_mtx(&pi->tick, &pi->pi_lock, 0); rc = cxgbe_vi_attach(dev, &pi->vi[0]); if (rc) return (rc); for_each_vi(pi, i, vi) { if (i == 0) continue; #ifdef DEV_NETMAP if (vi->flags & VI_NETMAP) { /* * media handled here to keep * implementation private to this file */ ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); build_medialist(pi, &vi->media); vi->dev = device_add_child(dev, is_t4(pi->adapter) ? "ncxgbe" : "ncxl", device_get_unit(dev)); } else #endif vi->dev = device_add_child(dev, is_t4(pi->adapter) ? "vcxgbe" : "vcxl", -1); if (vi->dev == NULL) { device_printf(dev, "failed to add VI %d\n", i); continue; } device_set_softc(vi->dev, vi); } cxgbe_sysctls(pi); bus_generic_attach(dev); return (0); } static void cxgbe_vi_detach(struct vi_info *vi) { struct ifnet *ifp = vi->ifp; ether_ifdetach(ifp); if (vi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); /* Let detach proceed even if these fail. */ cxgbe_uninit_synchronized(vi); callout_drain(&vi->tick); vi_full_uninit(vi); ifmedia_removeall(&vi->media); if_free(vi->ifp); vi->ifp = NULL; } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Detach the extra VIs first. */ rc = bus_generic_detach(dev); if (rc) return (rc); device_delete_children(dev); doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); end_synchronized_op(sc, 0); return (0); } static void cxgbe_init(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags, can_sleep; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) return (EINVAL); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: can_sleep = 0; redo_sifflags: rc = begin_synchronized_op(sc, vi, can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (can_sleep == 1) { end_synchronized_op(sc, 0); can_sleep = 0; goto redo_sifflags; } rc = update_mac_settings(ifp, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_init_synchronized(vi); } vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MCADDRS); end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(vi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifmedia_ioctl(ifp, ifr, &vi->media, cmd); break; case SIOCGI2C: { struct ifi2creq i2c; rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (rc != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { rc = EPERM; break; } if (i2c.len > sizeof(i2c.data)) { rc = EINVAL; break; } rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } rc = parse_pkt(&m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); if (__predict_false(rc != 0)) m_freem(m); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; /* queues do not exist if !VI_INIT_DONE. */ if (vi->flags & VI_INIT_DONE) { for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("qflush", 1); } } } if_qflush(ifp); } static uint64_t vi_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct fw_vi_stats_vf *s = &vi->stats; vi_refresh_stats(vi->pi->adapter, vi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_bcast_frames + s->rx_mcast_frames + s->rx_ucast_frames); case IFCOUNTER_IERRORS: return (s->rx_err_frames); case IFCOUNTER_OPACKETS: return (s->tx_bcast_frames + s->tx_mcast_frames + s->tx_ucast_frames + s->tx_offload_frames); case IFCOUNTER_OERRORS: return (s->tx_drop_frames); case IFCOUNTER_IBYTES: return (s->rx_bcast_bytes + s->rx_mcast_bytes + s->rx_ucast_bytes); case IFCOUNTER_OBYTES: return (s->tx_bcast_bytes + s->tx_mcast_bytes + s->tx_ucast_bytes + s->tx_offload_bytes); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = 0; if ((vi->flags & (VI_INIT_DONE | VI_NETMAP)) == VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } uint64_t cxgbe_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; if (pi->nvi > 1) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_frames - s->rx_pause); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: return (s->tx_frames - s->tx_pause); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: return (s->rx_octets - s->rx_pause * 64); case IFCOUNTER_OBYTES: return (s->tx_octets - s->tx_pause * 64); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames - s->rx_pause); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames - s->tx_pause); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3 + pi->tnl_cong_drops); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = s->tx_drop; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static int cxgbe_media_change(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; device_printf(vi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct ifmedia_entry *cur; int speed = pi->link_cfg.speed; cur = vi->media.ifm_cur; ifmr->ifm_status = IFM_AVALID; if (!pi->link_cfg.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* active and current will differ iff current media is autoselect. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == SPEED_10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == SPEED_1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == SPEED_100) ifmr->ifm_active |= IFM_100_TX; else if (speed == SPEED_10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } static int vcxgbe_probe(device_t dev) { char buf[128]; struct vi_info *vi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, vi - vi->pi->vi); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int vcxgbe_attach(device_t dev) { struct vi_info *vi; struct port_info *pi; struct adapter *sc; int func, index, rc; u32 param, val; vi = device_get_softc(dev); pi = vi->pi; sc = pi->adapter; index = vi - pi->vi; KASSERT(index < nitems(vi_mac_funcs), ("%s: VI %s doesn't have a MAC func", __func__, device_get_nameunit(dev))); func = vi_mac_funcs[index]; rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, vi->hw_addr, &vi->rss_size, func, 0); if (rc < 0) { device_printf(dev, "Failed to allocate virtual interface " "for port %d: %d\n", pi->port_id, -rc); return (-rc); } vi->viid = rc; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_YZ(vi->viid); rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc) vi->rss_base = 0xffff; else { /* MPASS((val >> 16) == rss_size); */ vi->rss_base = val & 0xffff; } rc = cxgbe_vi_attach(dev, vi); if (rc) { t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); return (rc); } return (0); } static int vcxgbe_detach(device_t dev) { struct vi_info *vi; struct adapter *sc; vi = device_get_softc(dev); sc = vi->pi->adapter; doom_vi(sc, vi); cxgbe_vi_detach(vi); t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); end_synchronized_op(sc, 0); return (0); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); } static int map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } static int map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (is_t5(sc)) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | V_STATMODE(0)); } #endif } return (0); } static const struct memwin t4_memwin[] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin t5_memwin[] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin *mw; int i, n; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw = &t4_memwin[0]; n = nitems(t4_memwin); } else { /* T5 uses the relative offset inside the PCIe BAR */ bar0 = 0; mw = &t5_memwin[0]; n = nitems(t5_memwin); } for (i = 0; i < n; i++, mw++) { t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->aperture) - 10)); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Verify that the memory range specified by the addr/len pair is valid and lies * entirely within a single region (EDCx or MCx). */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { uint32_t em, addr_len, maddr, mlen; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len == 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; mlen = G_EDRAM0_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; mlen = G_EDRAM1_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; mlen = G_EXT_MEM_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } if (!is_t4(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; mlen = G_EXT_MEM1_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr, mlen; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; mlen = G_EDRAM0_SIZE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; mlen = G_EDRAM1_SIZE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; mlen = G_EXT_MEM_SIZE(addr_len) << 20; break; case MEM_MC1: if (is_t4(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; mlen = G_EXT_MEM1_SIZE(addr_len) << 20; break; default: return (EINVAL); } if (mlen > 0 && off < mlen && off + len <= mlen) { *addr = maddr + off; /* global address */ return (0); } return (EFAULT); } static void memwin_info(struct adapter *sc, int win, uint32_t *base, uint32_t *aperture) { const struct memwin *mw; if (is_t4(sc)) { KASSERT(win >= 0 && win < nitems(t4_memwin), ("%s: incorrect memwin# (%d)", __func__, win)); mw = &t4_memwin[win]; } else { KASSERT(win >= 0 && win < nitems(t5_memwin), ("%s: incorrect memwin# (%d)", __func__, win)); mw = &t5_memwin[win]; } if (base != NULL) *base = mw->base; if (aperture != NULL) *aperture = mw->aperture; } /* * Positions the memory window such that it can be used to access the specified * address in the chip's address space. The return value is the offset of addr * from the start of the window. */ static uint32_t position_memwin(struct adapter *sc, int n, uint32_t addr) { uint32_t start, pf; uint32_t reg; KASSERT(n >= 0 && n <= 3, ("%s: invalid window %d.", __func__, n)); KASSERT((addr & 3) == 0, ("%s: addr (0x%x) is not at a 4B boundary.", __func__, addr)); if (is_t4(sc)) { pf = 0; start = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); start = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, n); t4_write_reg(sc, reg, start | pf); t4_read_reg(sc, reg); return (addr - start); } static int cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis, struct intrs_and_queues *iaq) { int rc, itype, navail, nrxq10g, nrxq1g, n; int nofldrxq10g = 0, nofldrxq1g = 0; int nnmrxq10g = 0, nnmrxq1g = 0; bzero(iaq, sizeof(*iaq)); iaq->ntxq10g = t4_ntxq10g; iaq->ntxq1g = t4_ntxq1g; iaq->nrxq10g = nrxq10g = t4_nrxq10g; iaq->nrxq1g = nrxq1g = t4_nrxq1g; iaq->rsrv_noflowq = t4_rsrv_noflowq; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq10g = t4_nofldtxq10g; iaq->nofldtxq1g = t4_nofldtxq1g; iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g; iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g; } #endif #ifdef DEV_NETMAP iaq->nnmtxq10g = t4_nnmtxq10g; iaq->nnmtxq1g = t4_nnmtxq1g; iaq->nnmrxq10g = nnmrxq10g = t4_nnmrxq10g; iaq->nnmrxq1g = nnmrxq1g = t4_nnmrxq1g; #endif for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; iaq->intr_type = itype; iaq->intr_flags_10g = 0; iaq->intr_flags_1g = 0; /* * Best option: an interrupt vector for errors, one for the * firmware event queue, and one for every rxq (NIC, TOE, and * netmap). */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g * (nrxq10g + nofldrxq10g + nnmrxq10g); iaq->nirq += n10g * 2 * (num_vis - 1); iaq->nirq += n1g * (nrxq1g + nofldrxq1g + nnmrxq1g); iaq->nirq += n1g * 2 * (num_vis - 1); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { iaq->intr_flags_10g = INTR_ALL; iaq->intr_flags_1g = INTR_ALL; goto allocate; } /* * Second best option: a vector for errors, one for the firmware * event queue, and vectors for either all the NIC rx queues or * all the TOE rx queues. The queues that don't get vectors * will forward their interrupts to those that do. * * Note: netmap rx queues cannot be created early and so they * can't be setup to receive forwarded interrupts for others. */ iaq->nirq = T4_EXTRA_INTR; if (nrxq10g >= nofldrxq10g) { iaq->intr_flags_10g = INTR_RXQ; iaq->nirq += n10g * nrxq10g; iaq->nirq += n10g * (num_vis - 1); #ifdef DEV_NETMAP iaq->nnmrxq10g = min(nnmrxq10g, nrxq10g); #endif } else { iaq->intr_flags_10g = INTR_OFLD_RXQ; iaq->nirq += n10g * nofldrxq10g; #ifdef DEV_NETMAP iaq->nnmrxq10g = min(nnmrxq10g, nofldrxq10g); #endif } if (nrxq1g >= nofldrxq1g) { iaq->intr_flags_1g = INTR_RXQ; iaq->nirq += n1g * nrxq1g; iaq->nirq += n1g * (num_vis - 1); #ifdef DEV_NETMAP iaq->nnmrxq1g = min(nnmrxq1g, nrxq1g); #endif } else { iaq->intr_flags_1g = INTR_OFLD_RXQ; iaq->nirq += n1g * nofldrxq1g; #ifdef DEV_NETMAP iaq->nnmrxq1g = min(nnmrxq1g, nofldrxq1g); #endif } if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) goto allocate; /* * Next best option: an interrupt vector for errors, one for the * firmware event queue, and at least one per VI. At this * point we know we'll have to downsize nrxq and/or nofldrxq * and/or nnmrxq to fit what's available to us. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += (n10g + n1g) * num_vis; if (iaq->nirq <= navail) { int leftover = navail - iaq->nirq; if (n10g > 0) { int target = max(nrxq10g, nofldrxq10g); iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ? INTR_RXQ : INTR_OFLD_RXQ; n = 1; while (n < target && leftover >= n10g) { leftover -= n10g; iaq->nirq += n10g; n++; } iaq->nrxq10g = min(n, nrxq10g); #ifdef TCP_OFFLOAD iaq->nofldrxq10g = min(n, nofldrxq10g); #endif #ifdef DEV_NETMAP iaq->nnmrxq10g = min(n, nnmrxq10g); #endif } if (n1g > 0) { int target = max(nrxq1g, nofldrxq1g); iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ? INTR_RXQ : INTR_OFLD_RXQ; n = 1; while (n < target && leftover >= n1g) { leftover -= n1g; iaq->nirq += n1g; n++; } iaq->nrxq1g = min(n, nrxq1g); #ifdef TCP_OFFLOAD iaq->nofldrxq1g = min(n, nofldrxq1g); #endif #ifdef DEV_NETMAP iaq->nnmrxq1g = min(n, nnmrxq1g); #endif } if (itype != INTR_MSI || powerof2(iaq->nirq)) goto allocate; } /* * Least desirable option: one interrupt vector for everything. */ iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1; iaq->intr_flags_10g = iaq->intr_flags_1g = 0; #ifdef TCP_OFFLOAD if (is_offload(sc)) iaq->nofldrxq10g = iaq->nofldrxq1g = 1; #endif #ifdef DEV_NETMAP iaq->nnmrxq10g = iaq->nnmrxq1g = 1; #endif allocate: navail = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &navail); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &navail); if (rc == 0) { if (navail == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate (it's in navail). */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, navail); pci_release_msi(sc->dev); goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, navail); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32_const(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32_const(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); return (rc); } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); return (EDOOFUS); } /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* We're using whatever's on the card and it's known to be good. */ sc->params.fw_vers = ntohl(card_fw->fw_ver); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); t4_get_tp_version(sc, &sc->params.tp_vers); /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen, i, n; const uint32_t *cfdata; uint32_t param, val, addr, off, mw_base, mw_aperture; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } memwin_info(sc, 2, &mw_base, &mw_aperture); while (cflen) { off = position_memwin(sc, 2, addr); n = min(cflen, mw_aperture - off); for (i = 0; i < n; i += 4) t4_write_reg(sc, mw_base + off + i, *cfdata++); cflen -= n; addr += n; } } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x).\n", rc, mtype, moff); goto done; } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(linkcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; struct fw_devlog_cmd cmd; struct devlog_params *dlog = &sc->params.devlog; param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ bzero(&cmd, sizeof(cmd)); cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); cmd.retval_len16 = htobe32(FW_LEN16(cmd)); rc = -t4_wr_mbox(sc, sc->mbox, &cmd, sizeof(cmd), &cmd); if (rc != 0) { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); bzero(dlog, sizeof (*dlog)); rc = 0; /* devlog isn't critical for device operation */ } else { val[0] = be32toh(cmd.memtype_devlog_memaddr16_devlog); dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(val[0]); dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(val[0]) << 4; dlog->size = be32toh(cmd.memsize_devlog); } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; sc->params.ftid_min = val[2]; sc->params.ftid_max = val[3]; sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(linkcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } sc->tids.etid_base = val[0]; sc->params.etid_min = val[0]; sc->tids.netids = val[1] - val[0] + 1; sc->params.netids = sc->tids.netids; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, " "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "", chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi, struct ifmedia *media) { int m; PORT_LOCK(pi); ifmedia_removeall(media); m = IFM_ETHER | IFM_FDX; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, 0, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, 0, NULL); ifmedia_add(media, m | IFM_100_TX, 0, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, 0, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, 0, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, 0, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, 0, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_40G_LR4, 0, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_40G_SR4, 0, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_40G_CR4, 0, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } PORT_UNLOCK(pi); } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { vi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); MPASS(ETHER_IS_MULTICAST(mcaddr[i])); i++; if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ int begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "begin_synchronized_op"); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; sc->last_op_flags = flags; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } /* * Tell if_ioctl and if_init that the VI is going away. This is * special variant of begin_synchronized_op and must be paired with a * call to end_synchronized_op. */ void doom_vi(struct adapter *sc, struct vi_info *vi) { ADAPTER_LOCK(sc); SET_DOOMED(vi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; sc->last_op_flags = 0; #endif ADAPTER_UNLOCK(sc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* * Can't fail from this point onwards. Review cxgbe_uninit_synchronized * if this changes. */ for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); } /* * The first iq of the first port to come up is used for tracing. */ if (sc->traceq < 0 && IS_MAIN_VI(vi)) { sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); pi->flags |= HAS_TRACEQ; } /* all ok */ PORT_LOCK(pi); ifp->if_drv_flags |= IFF_DRV_RUNNING; pi->up_vis++; if (pi->nvi > 1) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); PORT_UNLOCK(pi); done: if (rc != 0) cxgbe_uninit_synchronized(vi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (!(vi->flags & VI_INIT_DONE)) { KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING), ("uninited VI is running")); return (0); } /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } PORT_LOCK(pi); if (pi->nvi == 1) callout_stop(&pi->tick); else callout_stop(&vi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; pi->up_vis--; if (pi->up_vis > 0) { PORT_UNLOCK(pi); return (0); } PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->linkdnrc = -1; t4_os_link_changed(sc, pi->port_id, 0, -1); return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ static int setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; struct vi_info *vi; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif +#ifdef RSS + int nbuckets = rss_getnumbuckets(); +#endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (sc->intr_count == 1) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; /* The second one is always the firmware event queue */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; for_each_port(sc, p) { pi = sc->port[p]; for_each_vi(pi, v, vi) { vi->first_intr = rid - 1; #ifdef DEV_NETMAP if (vi->flags & VI_NETMAP) { for_each_nm_rxq(vi, q, nm_rxq) { snprintf(s, sizeof(s), "%d-%d", p, q); rc = t4_alloc_irq(sc, irq, rid, t4_nm_intr, nm_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } continue; } #endif if (vi->flags & INTR_RXQ) { for_each_rxq(vi, q, rxq) { if (v == 0) snprintf(s, sizeof(s), "%d.%d", p, q); else snprintf(s, sizeof(s), "%d(%d).%d", p, v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); +#ifdef RSS + bus_bind_intr(sc->dev, irq->res, + rss_getcpu(q % nbuckets)); +#endif irq++; rid++; vi->nintr++; } } #ifdef TCP_OFFLOAD if (vi->flags & INTR_OFLD_RXQ) { for_each_ofld_rxq(vi, q, ofld_rxq) { snprintf(s, sizeof(s), "%d,%d", p, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } } #endif } } MPASS(irq == &sc->irq[sc->intr_count]); return (0); } int adapter_full_init(struct adapter *sc) { int rc, i; ASSERT_SYNCHRONIZED_OP(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } #ifdef RSS #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ RSS_HASHTYPE_RSS_UDP_IPV6) /* Translates kernel hash types to hardware. */ static int hashconfig_to_hashen(int hashconfig) { int hashen = 0; if (hashconfig & RSS_HASHTYPE_RSS_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; return (hashen); } /* Translates hardware hash types to kernel. */ static int hashen_to_hashconfig(int hashen) { int hashconfig = 0; if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { /* * If UDP hashing was enabled it must have been enabled for * either IPv4 or IPv6 (inclusive or). Enabling UDP without * enabling any 4-tuple hash is nonsense configuration. */ MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; } if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV6; return (hashconfig); } #endif int vi_full_init(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j, hashen; #ifdef RSS int nbuckets = rss_getnumbuckets(); int hashconfig = rss_gethashconfig(); int extra; uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; #endif ASSERT_SYNCHRONIZED_OP(sc); KASSERT((vi->flags & VI_INIT_DONE) == 0, ("%s: VI_INIT_DONE already", __func__)); sysctl_ctx_init(&vi->ctx); vi->flags |= VI_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this VI. */ rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ #ifdef DEV_NETMAP /* Netmap VIs configure RSS when netmap is enabled. */ if (vi->flags & VI_NETMAP) { vi->flags |= VI_INIT_DONE; return (0); } #endif /* * Setup RSS for this VI. Save a copy of the RSS table for later use. */ if (vi->nrxq > vi->rss_size) { if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " "some queues will never receive traffic.\n", vi->nrxq, vi->rss_size); } else if (vi->rss_size % vi->nrxq) { if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " "expect uneven traffic distribution.\n", vi->nrxq, vi->rss_size); } #ifdef RSS MPASS(RSS_KEYSIZE == 40); if (vi->nrxq != nbuckets) { if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" "performance will be impacted.\n", vi->nrxq, nbuckets); } rss_getkey((void *)&raw_rss_key[0]); for (i = 0; i < nitems(rss_key); i++) { rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); } t4_write_rss_key(sc, (void *)&rss_key[0], -1); #endif rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < vi->rss_size;) { #ifdef RSS j = rss_get_indirection_to_bucket(i); j %= vi->nrxq; rxq = &sc->sge.rxq[vi->first_rxq + j]; rss[i++] = rxq->iq.abs_id; #else for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == vi->rss_size) break; } #endif } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } #ifdef RSS hashen = hashconfig_to_hashen(hashconfig); /* * We may have had to enable some hashes even though the global config * wants them disabled. This is a potential problem that must be * reported to the user. */ extra = hashen_to_hashconfig(hashen) ^ hashconfig; /* * If we consider only the supported hash types, then the enabled hashes * are a superset of the requested hashes. In other words, there cannot * be any supported hash that was requested but not enabled, but there * can be hashes that were not requested but had to be enabled. */ extra &= SUPPORTED_RSS_HASHTYPES; MPASS((extra & hashconfig) == 0); if (extra) { if_printf(ifp, "global RSS config (0x%x) cannot be accomodated.\n", hashconfig); } if (extra & RSS_HASHTYPE_RSS_IPV4) if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_IPV6) if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); #else hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; #endif rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]); if (rc != 0) { if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); goto done; } vi->rss = rss; vi->flags |= VI_INIT_DONE; done: if (rc != 0) vi_full_uninit(vi); return (rc); } /* * Idempotent. */ int vi_full_uninit(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ #ifdef DEV_NETMAP if (vi->flags & VI_NETMAP) goto skip; #endif /* XXX: Only for the first VI? */ if (IS_MAIN_VI(vi)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(vi->rss, M_CXGBE); } #ifdef DEV_NETMAP skip: #endif t4_teardown_vi_queues(vi); vi->flags &= ~VI_INIT_DONE; return (0); } static void quiesce_txq(struct adapter *sc, struct sge_txq *txq) { struct sge_eq *eq = &txq->eq; struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; (void) sc; /* unused */ #ifdef INVARIANTS TXQ_LOCK(txq); MPASS((eq->flags & EQ_ENABLED) == 0); TXQ_UNLOCK(txq); #endif /* Wait for the mp_ring to empty. */ while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("rquiesce", 1); } /* Then wait for the hardware to finish. */ while (spg->cidx != htobe16(eq->pidx)) pause("equiesce", 1); /* Finally, wait for the driver to reclaim all descriptors. */ while (eq->cidx != eq->pidx) pause("dquiesce", 1); } static void quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq) { /* XXXTX */ } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void reg_block_dump(struct adapter *sc, uint8_t *buf, unsigned int start, unsigned int end) { uint32_t *p = (uint32_t *)(buf + start); for ( ; start <= end; start += sizeof(uint32_t)) *p++ = t4_read_reg(sc, start); } static void t4_get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { int i, n; const unsigned int *reg_ranges; static const unsigned int t4_reg_ranges[] = { 0x1008, 0x1108, 0x1180, 0x11b4, 0x11fc, 0x123c, 0x1300, 0x173c, 0x1800, 0x18fc, 0x3000, 0x30d8, 0x30e0, 0x5924, 0x5960, 0x59d4, 0x5a00, 0x5af8, 0x6000, 0x6098, 0x6100, 0x6150, 0x6200, 0x6208, 0x6240, 0x6248, 0x6280, 0x6338, 0x6370, 0x638c, 0x6400, 0x643c, 0x6500, 0x6524, 0x6a00, 0x6a38, 0x6a60, 0x6a78, 0x6b00, 0x6b84, 0x6bf0, 0x6c84, 0x6cf0, 0x6d84, 0x6df0, 0x6e84, 0x6ef0, 0x6f84, 0x6ff0, 0x7084, 0x70f0, 0x7184, 0x71f0, 0x7284, 0x72f0, 0x7384, 0x73f0, 0x7450, 0x7500, 0x7530, 0x7600, 0x761c, 0x7680, 0x76cc, 0x7700, 0x7798, 0x77c0, 0x77fc, 0x7900, 0x79fc, 0x7b00, 0x7c38, 0x7d00, 0x7efc, 0x8dc0, 0x8e1c, 0x8e30, 0x8e78, 0x8ea0, 0x8f6c, 0x8fc0, 0x9074, 0x90fc, 0x90fc, 0x9400, 0x9458, 0x9600, 0x96bc, 0x9800, 0x9808, 0x9820, 0x983c, 0x9850, 0x9864, 0x9c00, 0x9c6c, 0x9c80, 0x9cec, 0x9d00, 0x9d6c, 0x9d80, 0x9dec, 0x9e00, 0x9e6c, 0x9e80, 0x9eec, 0x9f00, 0x9f6c, 0x9f80, 0x9fec, 0xd004, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0xea7c, 0xf000, 0x11110, 0x11118, 0x11190, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x19124, 0x19150, 0x191b0, 0x191d0, 0x191e8, 0x19238, 0x1924c, 0x193f8, 0x19474, 0x19490, 0x194f8, 0x19800, 0x19f30, 0x1a000, 0x1a06c, 0x1a0b0, 0x1a120, 0x1a128, 0x1a138, 0x1a190, 0x1a1c4, 0x1a1fc, 0x1a1fc, 0x1e040, 0x1e04c, 0x1e284, 0x1e28c, 0x1e2c0, 0x1e2c0, 0x1e2e0, 0x1e2e0, 0x1e300, 0x1e384, 0x1e3c0, 0x1e3c8, 0x1e440, 0x1e44c, 0x1e684, 0x1e68c, 0x1e6c0, 0x1e6c0, 0x1e6e0, 0x1e6e0, 0x1e700, 0x1e784, 0x1e7c0, 0x1e7c8, 0x1e840, 0x1e84c, 0x1ea84, 0x1ea8c, 0x1eac0, 0x1eac0, 0x1eae0, 0x1eae0, 0x1eb00, 0x1eb84, 0x1ebc0, 0x1ebc8, 0x1ec40, 0x1ec4c, 0x1ee84, 0x1ee8c, 0x1eec0, 0x1eec0, 0x1eee0, 0x1eee0, 0x1ef00, 0x1ef84, 0x1efc0, 0x1efc8, 0x1f040, 0x1f04c, 0x1f284, 0x1f28c, 0x1f2c0, 0x1f2c0, 0x1f2e0, 0x1f2e0, 0x1f300, 0x1f384, 0x1f3c0, 0x1f3c8, 0x1f440, 0x1f44c, 0x1f684, 0x1f68c, 0x1f6c0, 0x1f6c0, 0x1f6e0, 0x1f6e0, 0x1f700, 0x1f784, 0x1f7c0, 0x1f7c8, 0x1f840, 0x1f84c, 0x1fa84, 0x1fa8c, 0x1fac0, 0x1fac0, 0x1fae0, 0x1fae0, 0x1fb00, 0x1fb84, 0x1fbc0, 0x1fbc8, 0x1fc40, 0x1fc4c, 0x1fe84, 0x1fe8c, 0x1fec0, 0x1fec0, 0x1fee0, 0x1fee0, 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x20000, 0x2002c, 0x20100, 0x2013c, 0x20190, 0x201c8, 0x20200, 0x20318, 0x20400, 0x20528, 0x20540, 0x20614, 0x21000, 0x21040, 0x2104c, 0x21060, 0x210c0, 0x210ec, 0x21200, 0x21268, 0x21270, 0x21284, 0x212fc, 0x21388, 0x21400, 0x21404, 0x21500, 0x21518, 0x2152c, 0x2153c, 0x21550, 0x21554, 0x21600, 0x21600, 0x21608, 0x21628, 0x21630, 0x2163c, 0x21700, 0x2171c, 0x21780, 0x2178c, 0x21800, 0x21c38, 0x21c80, 0x21d7c, 0x21e00, 0x21e04, 0x22000, 0x2202c, 0x22100, 0x2213c, 0x22190, 0x221c8, 0x22200, 0x22318, 0x22400, 0x22528, 0x22540, 0x22614, 0x23000, 0x23040, 0x2304c, 0x23060, 0x230c0, 0x230ec, 0x23200, 0x23268, 0x23270, 0x23284, 0x232fc, 0x23388, 0x23400, 0x23404, 0x23500, 0x23518, 0x2352c, 0x2353c, 0x23550, 0x23554, 0x23600, 0x23600, 0x23608, 0x23628, 0x23630, 0x2363c, 0x23700, 0x2371c, 0x23780, 0x2378c, 0x23800, 0x23c38, 0x23c80, 0x23d7c, 0x23e00, 0x23e04, 0x24000, 0x2402c, 0x24100, 0x2413c, 0x24190, 0x241c8, 0x24200, 0x24318, 0x24400, 0x24528, 0x24540, 0x24614, 0x25000, 0x25040, 0x2504c, 0x25060, 0x250c0, 0x250ec, 0x25200, 0x25268, 0x25270, 0x25284, 0x252fc, 0x25388, 0x25400, 0x25404, 0x25500, 0x25518, 0x2552c, 0x2553c, 0x25550, 0x25554, 0x25600, 0x25600, 0x25608, 0x25628, 0x25630, 0x2563c, 0x25700, 0x2571c, 0x25780, 0x2578c, 0x25800, 0x25c38, 0x25c80, 0x25d7c, 0x25e00, 0x25e04, 0x26000, 0x2602c, 0x26100, 0x2613c, 0x26190, 0x261c8, 0x26200, 0x26318, 0x26400, 0x26528, 0x26540, 0x26614, 0x27000, 0x27040, 0x2704c, 0x27060, 0x270c0, 0x270ec, 0x27200, 0x27268, 0x27270, 0x27284, 0x272fc, 0x27388, 0x27400, 0x27404, 0x27500, 0x27518, 0x2752c, 0x2753c, 0x27550, 0x27554, 0x27600, 0x27600, 0x27608, 0x27628, 0x27630, 0x2763c, 0x27700, 0x2771c, 0x27780, 0x2778c, 0x27800, 0x27c38, 0x27c80, 0x27d7c, 0x27e00, 0x27e04 }; static const unsigned int t5_reg_ranges[] = { 0x1008, 0x1148, 0x1180, 0x11b4, 0x11fc, 0x123c, 0x1280, 0x173c, 0x1800, 0x18fc, 0x3000, 0x3028, 0x3060, 0x30d8, 0x30e0, 0x30fc, 0x3140, 0x357c, 0x35a8, 0x35cc, 0x35ec, 0x35ec, 0x3600, 0x5624, 0x56cc, 0x575c, 0x580c, 0x5814, 0x5890, 0x58bc, 0x5940, 0x59dc, 0x59fc, 0x5a18, 0x5a60, 0x5a9c, 0x5b94, 0x5bfc, 0x6000, 0x6040, 0x6058, 0x614c, 0x7700, 0x7798, 0x77c0, 0x78fc, 0x7b00, 0x7c54, 0x7d00, 0x7efc, 0x8dc0, 0x8de0, 0x8df8, 0x8e84, 0x8ea0, 0x8f84, 0x8fc0, 0x90f8, 0x9400, 0x9470, 0x9600, 0x96f4, 0x9800, 0x9808, 0x9820, 0x983c, 0x9850, 0x9864, 0x9c00, 0x9c6c, 0x9c80, 0x9cec, 0x9d00, 0x9d6c, 0x9d80, 0x9dec, 0x9e00, 0x9e6c, 0x9e80, 0x9eec, 0x9f00, 0x9f6c, 0x9f80, 0xa020, 0xd004, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0x11088, 0x1109c, 0x11110, 0x11118, 0x1117c, 0x11190, 0x11204, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x19124, 0x19150, 0x191b0, 0x191d0, 0x191e8, 0x19238, 0x19290, 0x193f8, 0x19474, 0x19490, 0x194cc, 0x194f0, 0x194f8, 0x19c00, 0x19c60, 0x19c94, 0x19e10, 0x19e50, 0x19f34, 0x19f40, 0x19f50, 0x19f90, 0x19fe4, 0x1a000, 0x1a06c, 0x1a0b0, 0x1a120, 0x1a128, 0x1a138, 0x1a190, 0x1a1c4, 0x1a1fc, 0x1a1fc, 0x1e008, 0x1e00c, 0x1e040, 0x1e04c, 0x1e284, 0x1e290, 0x1e2c0, 0x1e2c0, 0x1e2e0, 0x1e2e0, 0x1e300, 0x1e384, 0x1e3c0, 0x1e3c8, 0x1e408, 0x1e40c, 0x1e440, 0x1e44c, 0x1e684, 0x1e690, 0x1e6c0, 0x1e6c0, 0x1e6e0, 0x1e6e0, 0x1e700, 0x1e784, 0x1e7c0, 0x1e7c8, 0x1e808, 0x1e80c, 0x1e840, 0x1e84c, 0x1ea84, 0x1ea90, 0x1eac0, 0x1eac0, 0x1eae0, 0x1eae0, 0x1eb00, 0x1eb84, 0x1ebc0, 0x1ebc8, 0x1ec08, 0x1ec0c, 0x1ec40, 0x1ec4c, 0x1ee84, 0x1ee90, 0x1eec0, 0x1eec0, 0x1eee0, 0x1eee0, 0x1ef00, 0x1ef84, 0x1efc0, 0x1efc8, 0x1f008, 0x1f00c, 0x1f040, 0x1f04c, 0x1f284, 0x1f290, 0x1f2c0, 0x1f2c0, 0x1f2e0, 0x1f2e0, 0x1f300, 0x1f384, 0x1f3c0, 0x1f3c8, 0x1f408, 0x1f40c, 0x1f440, 0x1f44c, 0x1f684, 0x1f690, 0x1f6c0, 0x1f6c0, 0x1f6e0, 0x1f6e0, 0x1f700, 0x1f784, 0x1f7c0, 0x1f7c8, 0x1f808, 0x1f80c, 0x1f840, 0x1f84c, 0x1fa84, 0x1fa90, 0x1fac0, 0x1fac0, 0x1fae0, 0x1fae0, 0x1fb00, 0x1fb84, 0x1fbc0, 0x1fbc8, 0x1fc08, 0x1fc0c, 0x1fc40, 0x1fc4c, 0x1fe84, 0x1fe90, 0x1fec0, 0x1fec0, 0x1fee0, 0x1fee0, 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x30000, 0x30030, 0x30100, 0x30144, 0x30190, 0x301d0, 0x30200, 0x30318, 0x30400, 0x3052c, 0x30540, 0x3061c, 0x30800, 0x30834, 0x308c0, 0x30908, 0x30910, 0x309ac, 0x30a00, 0x30a2c, 0x30a44, 0x30a50, 0x30a74, 0x30c24, 0x30d00, 0x30d00, 0x30d08, 0x30d14, 0x30d1c, 0x30d20, 0x30d3c, 0x30d50, 0x31200, 0x3120c, 0x31220, 0x31220, 0x31240, 0x31240, 0x31600, 0x3160c, 0x31a00, 0x31a1c, 0x31e00, 0x31e20, 0x31e38, 0x31e3c, 0x31e80, 0x31e80, 0x31e88, 0x31ea8, 0x31eb0, 0x31eb4, 0x31ec8, 0x31ed4, 0x31fb8, 0x32004, 0x32200, 0x32200, 0x32208, 0x32240, 0x32248, 0x32280, 0x32288, 0x322c0, 0x322c8, 0x322fc, 0x32600, 0x32630, 0x32a00, 0x32abc, 0x32b00, 0x32b70, 0x33000, 0x33048, 0x33060, 0x3309c, 0x330f0, 0x33148, 0x33160, 0x3319c, 0x331f0, 0x332e4, 0x332f8, 0x333e4, 0x333f8, 0x33448, 0x33460, 0x3349c, 0x334f0, 0x33548, 0x33560, 0x3359c, 0x335f0, 0x336e4, 0x336f8, 0x337e4, 0x337f8, 0x337fc, 0x33814, 0x33814, 0x3382c, 0x3382c, 0x33880, 0x3388c, 0x338e8, 0x338ec, 0x33900, 0x33948, 0x33960, 0x3399c, 0x339f0, 0x33ae4, 0x33af8, 0x33b10, 0x33b28, 0x33b28, 0x33b3c, 0x33b50, 0x33bf0, 0x33c10, 0x33c28, 0x33c28, 0x33c3c, 0x33c50, 0x33cf0, 0x33cfc, 0x34000, 0x34030, 0x34100, 0x34144, 0x34190, 0x341d0, 0x34200, 0x34318, 0x34400, 0x3452c, 0x34540, 0x3461c, 0x34800, 0x34834, 0x348c0, 0x34908, 0x34910, 0x349ac, 0x34a00, 0x34a2c, 0x34a44, 0x34a50, 0x34a74, 0x34c24, 0x34d00, 0x34d00, 0x34d08, 0x34d14, 0x34d1c, 0x34d20, 0x34d3c, 0x34d50, 0x35200, 0x3520c, 0x35220, 0x35220, 0x35240, 0x35240, 0x35600, 0x3560c, 0x35a00, 0x35a1c, 0x35e00, 0x35e20, 0x35e38, 0x35e3c, 0x35e80, 0x35e80, 0x35e88, 0x35ea8, 0x35eb0, 0x35eb4, 0x35ec8, 0x35ed4, 0x35fb8, 0x36004, 0x36200, 0x36200, 0x36208, 0x36240, 0x36248, 0x36280, 0x36288, 0x362c0, 0x362c8, 0x362fc, 0x36600, 0x36630, 0x36a00, 0x36abc, 0x36b00, 0x36b70, 0x37000, 0x37048, 0x37060, 0x3709c, 0x370f0, 0x37148, 0x37160, 0x3719c, 0x371f0, 0x372e4, 0x372f8, 0x373e4, 0x373f8, 0x37448, 0x37460, 0x3749c, 0x374f0, 0x37548, 0x37560, 0x3759c, 0x375f0, 0x376e4, 0x376f8, 0x377e4, 0x377f8, 0x377fc, 0x37814, 0x37814, 0x3782c, 0x3782c, 0x37880, 0x3788c, 0x378e8, 0x378ec, 0x37900, 0x37948, 0x37960, 0x3799c, 0x379f0, 0x37ae4, 0x37af8, 0x37b10, 0x37b28, 0x37b28, 0x37b3c, 0x37b50, 0x37bf0, 0x37c10, 0x37c28, 0x37c28, 0x37c3c, 0x37c50, 0x37cf0, 0x37cfc, 0x38000, 0x38030, 0x38100, 0x38144, 0x38190, 0x381d0, 0x38200, 0x38318, 0x38400, 0x3852c, 0x38540, 0x3861c, 0x38800, 0x38834, 0x388c0, 0x38908, 0x38910, 0x389ac, 0x38a00, 0x38a2c, 0x38a44, 0x38a50, 0x38a74, 0x38c24, 0x38d00, 0x38d00, 0x38d08, 0x38d14, 0x38d1c, 0x38d20, 0x38d3c, 0x38d50, 0x39200, 0x3920c, 0x39220, 0x39220, 0x39240, 0x39240, 0x39600, 0x3960c, 0x39a00, 0x39a1c, 0x39e00, 0x39e20, 0x39e38, 0x39e3c, 0x39e80, 0x39e80, 0x39e88, 0x39ea8, 0x39eb0, 0x39eb4, 0x39ec8, 0x39ed4, 0x39fb8, 0x3a004, 0x3a200, 0x3a200, 0x3a208, 0x3a240, 0x3a248, 0x3a280, 0x3a288, 0x3a2c0, 0x3a2c8, 0x3a2fc, 0x3a600, 0x3a630, 0x3aa00, 0x3aabc, 0x3ab00, 0x3ab70, 0x3b000, 0x3b048, 0x3b060, 0x3b09c, 0x3b0f0, 0x3b148, 0x3b160, 0x3b19c, 0x3b1f0, 0x3b2e4, 0x3b2f8, 0x3b3e4, 0x3b3f8, 0x3b448, 0x3b460, 0x3b49c, 0x3b4f0, 0x3b548, 0x3b560, 0x3b59c, 0x3b5f0, 0x3b6e4, 0x3b6f8, 0x3b7e4, 0x3b7f8, 0x3b7fc, 0x3b814, 0x3b814, 0x3b82c, 0x3b82c, 0x3b880, 0x3b88c, 0x3b8e8, 0x3b8ec, 0x3b900, 0x3b948, 0x3b960, 0x3b99c, 0x3b9f0, 0x3bae4, 0x3baf8, 0x3bb10, 0x3bb28, 0x3bb28, 0x3bb3c, 0x3bb50, 0x3bbf0, 0x3bc10, 0x3bc28, 0x3bc28, 0x3bc3c, 0x3bc50, 0x3bcf0, 0x3bcfc, 0x3c000, 0x3c030, 0x3c100, 0x3c144, 0x3c190, 0x3c1d0, 0x3c200, 0x3c318, 0x3c400, 0x3c52c, 0x3c540, 0x3c61c, 0x3c800, 0x3c834, 0x3c8c0, 0x3c908, 0x3c910, 0x3c9ac, 0x3ca00, 0x3ca2c, 0x3ca44, 0x3ca50, 0x3ca74, 0x3cc24, 0x3cd00, 0x3cd00, 0x3cd08, 0x3cd14, 0x3cd1c, 0x3cd20, 0x3cd3c, 0x3cd50, 0x3d200, 0x3d20c, 0x3d220, 0x3d220, 0x3d240, 0x3d240, 0x3d600, 0x3d60c, 0x3da00, 0x3da1c, 0x3de00, 0x3de20, 0x3de38, 0x3de3c, 0x3de80, 0x3de80, 0x3de88, 0x3dea8, 0x3deb0, 0x3deb4, 0x3dec8, 0x3ded4, 0x3dfb8, 0x3e004, 0x3e200, 0x3e200, 0x3e208, 0x3e240, 0x3e248, 0x3e280, 0x3e288, 0x3e2c0, 0x3e2c8, 0x3e2fc, 0x3e600, 0x3e630, 0x3ea00, 0x3eabc, 0x3eb00, 0x3eb70, 0x3f000, 0x3f048, 0x3f060, 0x3f09c, 0x3f0f0, 0x3f148, 0x3f160, 0x3f19c, 0x3f1f0, 0x3f2e4, 0x3f2f8, 0x3f3e4, 0x3f3f8, 0x3f448, 0x3f460, 0x3f49c, 0x3f4f0, 0x3f548, 0x3f560, 0x3f59c, 0x3f5f0, 0x3f6e4, 0x3f6f8, 0x3f7e4, 0x3f7f8, 0x3f7fc, 0x3f814, 0x3f814, 0x3f82c, 0x3f82c, 0x3f880, 0x3f88c, 0x3f8e8, 0x3f8ec, 0x3f900, 0x3f948, 0x3f960, 0x3f99c, 0x3f9f0, 0x3fae4, 0x3faf8, 0x3fb10, 0x3fb28, 0x3fb28, 0x3fb3c, 0x3fb50, 0x3fbf0, 0x3fc10, 0x3fc28, 0x3fc28, 0x3fc3c, 0x3fc50, 0x3fcf0, 0x3fcfc, 0x40000, 0x4000c, 0x40040, 0x40068, 0x4007c, 0x40144, 0x40180, 0x4018c, 0x40200, 0x40298, 0x402ac, 0x4033c, 0x403f8, 0x403fc, 0x41304, 0x413c4, 0x41400, 0x4141c, 0x41480, 0x414d0, 0x44000, 0x44078, 0x440c0, 0x44278, 0x442c0, 0x44478, 0x444c0, 0x44678, 0x446c0, 0x44878, 0x448c0, 0x449fc, 0x45000, 0x45068, 0x45080, 0x45084, 0x450a0, 0x450b0, 0x45200, 0x45268, 0x45280, 0x45284, 0x452a0, 0x452b0, 0x460c0, 0x460e4, 0x47000, 0x4708c, 0x47200, 0x47250, 0x47400, 0x47420, 0x47600, 0x47618, 0x47800, 0x47814, 0x48000, 0x4800c, 0x48040, 0x48068, 0x4807c, 0x48144, 0x48180, 0x4818c, 0x48200, 0x48298, 0x482ac, 0x4833c, 0x483f8, 0x483fc, 0x49304, 0x493c4, 0x49400, 0x4941c, 0x49480, 0x494d0, 0x4c000, 0x4c078, 0x4c0c0, 0x4c278, 0x4c2c0, 0x4c478, 0x4c4c0, 0x4c678, 0x4c6c0, 0x4c878, 0x4c8c0, 0x4c9fc, 0x4d000, 0x4d068, 0x4d080, 0x4d084, 0x4d0a0, 0x4d0b0, 0x4d200, 0x4d268, 0x4d280, 0x4d284, 0x4d2a0, 0x4d2b0, 0x4e0c0, 0x4e0e4, 0x4f000, 0x4f08c, 0x4f200, 0x4f250, 0x4f400, 0x4f420, 0x4f600, 0x4f618, 0x4f800, 0x4f814, 0x50000, 0x500cc, 0x50400, 0x50400, 0x50800, 0x508cc, 0x50c00, 0x50c00, 0x51000, 0x5101c, 0x51300, 0x51308, }; if (is_t4(sc)) { reg_ranges = &t4_reg_ranges[0]; n = nitems(t4_reg_ranges); } else { reg_ranges = &t5_reg_ranges[0]; n = nitems(t5_reg_ranges); } regs->version = chip_id(sc) | chip_rev(sc) << 10; for (i = 0; i < n; i += 2) reg_block_dump(sc, buf, reg_ranges[i], reg_ranges[i + 1]); } #define A_PL_INDIR_CMD 0x1f8 #define S_PL_AUTOINC 31 #define M_PL_AUTOINC 0x1U #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) #define S_PL_VFID 20 #define M_PL_VFID 0xffU #define V_PL_VFID(x) ((x) << S_PL_VFID) #define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) #define S_PL_ADDR 0 #define M_PL_ADDR 0xfffffU #define V_PL_ADDR(x) ((x) << S_PL_ADDR) #define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) #define A_PL_INDIR_DATA 0x1fc static uint64_t read_vf_stat(struct adapter *sc, unsigned int viid, int reg) { u32 stats[2]; mtx_assert(&sc->regwin_lock, MA_OWNED); t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); return (((uint64_t)stats[1]) << 32 | stats[0]); } static void t4_get_vi_stats(struct adapter *sc, unsigned int viid, struct fw_vi_stats_vf *stats) { #define GET_STAT(name) \ read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); #undef GET_STAT } static void t4_clr_vi_stats(struct adapter *sc, unsigned int viid) { int reg; t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) t4_write_reg(sc, A_PL_INDIR_DATA, 0); } static void vi_refresh_stats(struct adapter *sc, struct vi_info *vi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ if (!(vi->flags & VI_INIT_DONE)) return; getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &vi->last_refreshed, <)) return; mtx_lock(&sc->regwin_lock); t4_get_vi_stats(sc, vi->viid, &vi->stats); getmicrotime(&vi->last_refreshed); mtx_unlock(&sc->regwin_lock); } static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { int i; u_int v, tnl_cong_drops; struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; tnl_cong_drops = 0; t4_get_port_stats(sc, pi->tx_chan, &pi->stats); for (i = 0; i < NCHAN; i++) { if (pi->rx_chan_map & (1 << i)) { mtx_lock(&sc->regwin_lock); t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); mtx_unlock(&sc->regwin_lock); tnl_cong_drops += v; } } pi->tnl_cong_drops = tnl_cong_drops; getmicrotime(&pi->last_refreshed); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); } void vi_tick(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; vi_refresh_stats(sc, vi); callout_schedule(&vi->tick, hz); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } static int cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { #ifdef INVARIANTS panic("%s: opcode 0x%02x on iq %p with payload %p", __func__, rss->opcode, iq, m); #else log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n", __func__, rss->opcode, iq, m); m_freem(m); #endif return (EDOOFUS); } int t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h) { uintptr_t *loc, new; if (opcode >= nitems(sc->cpl_handler)) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; loc = (uintptr_t *) &sc->cpl_handler[opcode]; atomic_store_rel_ptr(loc, new); return (0); } static int an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl) { #ifdef INVARIANTS panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl); #else log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n", __func__, iq, ctrl); #endif return (EDOOFUS); } int t4_register_an_handler(struct adapter *sc, an_handler_t h) { uintptr_t *loc, new; new = h ? (uintptr_t)h : (uintptr_t)an_not_handled; loc = (uintptr_t *) &sc->an_handler; atomic_store_rel_ptr(loc, new); return (0); } static int fw_msg_not_handled(struct adapter *sc, const __be64 *rpl) { const struct cpl_fw6_msg *cpl = __containerof(rpl, struct cpl_fw6_msg, data[0]); #ifdef INVARIANTS panic("%s: fw_msg type %d", __func__, cpl->type); #else log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type); #endif return (EDOOFUS); } int t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h) { uintptr_t *loc, new; if (type >= nitems(sc->fw_msg_handler)) return (EINVAL); /* * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL * handler dispatch table. Reject any attempt to install a handler for * this subtype. */ if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled; loc = (uintptr_t *) &sc->fw_msg_handler[type]; atomic_store_rel_ptr(loc, new); return (0); } static void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *caps[] = { "\20\1PPP\2QFC\3DCBX", /* caps[0] linkcaps */ "\20\1NIC\2VM\3IDS\4UM\5UM_ISGL" /* caps[1] niccaps */ "\6HASHFILTER\7ETHOFLD", "\20\1TOE", /* caps[2] toecaps */ "\20\1RDDP\2RDMAC", /* caps[3] rdmacaps */ "\20\1INITIATOR_PDU\2TARGET_PDU" /* caps[4] iscsicaps */ "\3INITIATOR_CNXOFLD\4TARGET_CNXOFLD" "\5INITIATOR_SSNOFLD\6TARGET_SSNOFLD", "\20\1INITIATOR\2TARGET\3CTRL_OFLD" /* caps[5] fcoecaps */ "\4PO_INITIAOR\5PO_TARGET" }; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells, sysctl_bitfield, "A", "available doorbells"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkcaps", CTLTYPE_STRING | CTLFLAG_RD, caps[0], sc->linkcaps, sysctl_bitfield, "A", "available link capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "niccaps", CTLTYPE_STRING | CTLFLAG_RD, caps[1], sc->niccaps, sysctl_bitfield, "A", "available NIC capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "toecaps", CTLTYPE_STRING | CTLFLAG_RD, caps[2], sc->toecaps, sysctl_bitfield, "A", "available TCP offload capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdmacaps", CTLTYPE_STRING | CTLFLAG_RD, caps[3], sc->rdmacaps, sysctl_bitfield, "A", "available RDMA capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "iscsicaps", CTLTYPE_STRING | CTLFLAG_RD, caps[4], sc->iscsicaps, sysctl_bitfield, "A", "available iSCSI capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoecaps", CTLTYPE_STRING | CTLFLAG_RD, caps[5], sc->fcoecaps, sysctl_bitfield, "A", "available FCoE capabilities"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->sge.timer_val, sizeof(sc->sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->sge.counter_val, sizeof(sc->sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); t4_sge_sysctls(sc, ctx, children); sc->lro_timeout = 100; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW, &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); #ifdef SBUF_DRAIN /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_la, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (is_t5(sc)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "non-TCP DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_mps_tcam, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (is_t5(sc)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5)); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW, &sc->tt.indsz, 0, "DDP max indicate size allowed"); sc->tt.ddp_thres = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW, &sc->tt.ddp_thres, 0, "DDP threshold"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); sc->tt.tx_align = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align", CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); } #endif } void vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(vi->dev); /* * dev.[nv](cxgbe|cxl).X. */ oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &vi->first_txq, 0, "index of first tx queue"); if (vi->flags & VI_NETMAP) return; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); } static void cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; struct adapter *sc = pi->adapter; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings, "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)"); /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD, &pi->tx_parse_error, 0, "# of tx packets with invalid length or # of segments"); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by upto 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i, space = 0; struct sbuf sb; sbuf_new_for_sysctl(&sb, NULL, 64, req); for (i = arg1; arg2; arg2 -= sizeof(int), i++) { if (space) sbuf_printf(&sb, " "); sbuf_printf(&sb, "%d", *i); space = 1; } rc = sbuf_finish(&sb); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; int rc, val; val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (vi->ntxq > 1)) vi->rsrv_noflowq = 1; else vi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif uint8_t v; idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } #endif vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || qsize > 65536) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1PAUSE_RX\2PAUSE_TX"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX)); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4PAUSE"); if (rc) return (rc); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { int link_ok = lc->link_ok; lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX); lc->requested_fc |= n; rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, lc); lc->link_ok = link_ok; /* restore */ } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = is_t4(sc) ? CIM_NUM_OBQ : CIM_NUM_OBQ_T5; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); KASSERT((sc->params.cim_la_size & 7) == 0, ("%s: p will walk off the end of buf", __func__)); for (p = buf; p < &buf[sc->params.cim_la_size]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; if (is_t4(sc)) { cim_num_obq = CIM_NUM_OBQ; ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { cim_num_obq = CIM_NUM_OBQ_T5; ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_cpl_stats(sc, &stats); sbuf_printf(sb, " channel 0 channel 1 channel 2 " "channel 3\n"); sbuf_printf(sb, "CPL requests: %10u %10u %10u %10u\n", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "CPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } const char *devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; const char *devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE" }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0, m; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { dparams->memtype = FW_MEMTYPE_EDC0; dparams->start = 0x84000; dparams->size = 32768; } nentries = dparams->size / sizeof(struct fw_devlog_e); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); m = fwmtype_to_hwmtype(dparams->memtype); rc = -t4_mem_read(sc, m, dparams->start, dparams->size, (void *)buf); if (rc != 0) goto done; for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[4]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_fcoe_stats(sc, 0, &stats[0]); t4_get_fcoe_stats(sc, 1, &stats[1]); t4_get_fcoe_stats(sc, 2, &stats[2]); t4_get_fcoe_stats(sc, 3, &stats[3]); sbuf_printf(sb, " channel 0 channel 1 " "channel 2 channel 3\n"); sbuf_printf(sb, "octetsDDP: %16ju %16ju %16ju %16ju\n", stats[0].octetsDDP, stats[1].octetsDDP, stats[2].octetsDDP, stats[3].octetsDDP); sbuf_printf(sb, "framesDDP: %16u %16u %16u %16u\n", stats[0].framesDDP, stats[1].framesDDP, stats[2].framesDDP, stats[3].framesDDP); sbuf_printf(sb, "framesDrop: %16u %16u %16u %16u", stats[0].framesDrop, stats[1].framesDrop, stats[2].framesDrop, stats[3].framesDrop); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < 4; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct sbuf *sb; static const char *linkdnreasons[] = { "non-specific", "remote fault", "autoneg failed", "reserved3", "PHY overheated", "unknown", "rx los", "reserved7" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (pi->linkdnrc < 0) sbuf_printf(sb, "n/a"); else if (pi->linkdnrc < nitems(linkdnreasons)) sbuf_printf(sb, "%s", linkdnreasons[pi->linkdnrc]); else sbuf_printf(sb, "%d", pi->linkdnrc); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:" }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t4(sc) ? 2 : 3; /* Call it MC for T4 */ i++; } if (!is_t4(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); md->limit = (sc->tids.ntids - hi) * 16 + md->base - 1; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc) && t4_read_reg(sc, A_SGE_CONTROL2) & F_VFIFO_ENABLE) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (G_DBVFIFO_SIZE((t4_read_reg(sc, A_SGE_DBVFIFO_SIZE))) << 2) - 1; } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = md->base + sc->tids.ntids - 1; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = md->base + sc->tids.ntids - 1; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t4(sc)) { used = G_USED(lo); alloc = G_ALLOC(lo); } else { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < 4; i++) { lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t4(sc)) { used = G_USED(lo); alloc = G_ALLOC(lo); } else { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); n = is_t4(sc) ? NUM_MPS_CLS_SRAM_L_INSTANCES : NUM_MPS_T5_CLS_SRAM_L_INSTANCES; for (i = 0; i < n; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, " ------------ error %3u ------------", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t cnt[PM_NSTATS]; uint64_t cyc[PM_NSTATS]; static const char *rx_stats[] = { "Read:", "Write bypass:", "Write mem:", "Flush:" }; static const char *tx_stats[] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, cnt, cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < ARRAY_SIZE(tx_stats); i++) sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], cnt[i], cyc[i]); t4_pmrx_get_stats(sc, cnt, cyc); sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < ARRAY_SIZE(rx_stats); i++) sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], cnt[i], cyc[i]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_rdma_stats(sc, &stats); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_tcp_stats(sc, &v4, &v6); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcpOutRsts, v6.tcpOutRsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcpInSegs, v6.tcpInSegs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcpOutSegs, v6.tcpOutSegs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcpRetransSegs, v6.tcpRetransSegs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->ntids) { if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; if (b) { sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1, t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } else { sbuf_printf(sb, "TID range: %u-%u", t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } } else sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, t->ftid_base + t->nftids - 1); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base, t->etid_base + t->netids - 1); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_err_stats(sc, &stats); sbuf_printf(sb, " channel 0 channel 1 channel 2 " "channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.macInErrs[0], stats.macInErrs[1], stats.macInErrs[2], stats.macInErrs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdrInErrs[0], stats.hdrInErrs[1], stats.hdrInErrs[2], stats.hdrInErrs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcpInErrs[0], stats.tcpInErrs[1], stats.tcpInErrs[2], stats.tcpInErrs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6InErrs[0], stats.tcp6InErrs[1], stats.tcp6InErrs[2], stats.tcp6InErrs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnlCongDrops[0], stats.tnlCongDrops[1], stats.tnlCongDrops[2], stats.tnlCongDrops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnlTxDrops[0], stats.tnlTxDrops[1], stats.tnlTxDrops[2], stats.tnlTxDrops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofldVlanDrops[0], stats.ofldVlanDrops[1], stats.ofldVlanDrops[2], stats.ofldVlanDrops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofldChanDrops[0], stats.ofldChanDrops[1], stats.ofldChanDrops[2], stats.ofldChanDrops[3]); sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofldNoNeigh, stats.ofldCongDefer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[NCHAN], orate[NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); sbuf_printf(sb, " channel 0 channel 1 channel 2 " "channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { if (G_STATMODE(v) == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (G_STATMODE(v) == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #endif static uint32_t fconf_to_mode(uint32_t fconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) mode |= T4_FILTER_VNIC; if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t fspec_to_fconf(struct t4_filter_specification *fs) { uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; if (fs->val.vnic_vld || fs->mask.vnic_vld) fconf |= F_VNIC_ID; if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; return (fconf); } static int get_filter_mode(struct adapter *sc, uint32_t *mode) { int rc; uint32_t fconf; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getfm"); if (rc) return (rc); t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1, A_TP_VLAN_PRI_MAP); if (sc->params.tp.vlan_pri_map != fconf) { log(LOG_WARNING, "%s: cached filter mode out of sync %x %x.\n", device_get_nameunit(sc->dev), sc->params.tp.vlan_pri_map, fconf); } *mode = fconf_to_mode(fconf); end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter_mode(struct adapter *sc, uint32_t mode) { uint32_t fconf; int rc; fconf = mode_to_fconf(mode); rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif rc = -t4_set_filter_mode(sc, fconf); done: end_synchronized_op(sc, LOCK_HELD); return (rc); } static inline uint64_t get_filter_hits(struct adapter *sc, uint32_t fid) { uint32_t mw_base, off, tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); uint64_t hits; memwin_info(sc, 0, &mw_base, NULL); off = position_memwin(sc, 0, tcb_base + (fid + sc->tids.ftid_base) * TCB_SIZE); if (is_t4(sc)) { hits = t4_read_reg64(sc, mw_base + off + 16); hits = be64toh(hits); } else { hits = t4_read_reg(sc, mw_base + off + 24); hits = be32toh(hits); } return (hits); } static int get_filter(struct adapter *sc, struct t4_filter *t) { int i, rc, nfilters = sc->tids.nftids; struct filter_entry *f; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getf"); if (rc) return (rc); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; goto done; } f = &sc->tids.ftid_tab[t->idx]; for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; t->l2tidx = f->l2t ? f->l2t->idx : 0; t->smtidx = f->smtidx; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, t->idx); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters, nports; struct filter_entry *f; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); nfilters = sc->tids.nftids; nports = sc->params.nports; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } if (t->idx >= nfilters) { rc = EINVAL; goto done; } /* Validate against the global filter mode */ if ((sc->params.tp.vlan_pri_map | fspec_to_fconf(&t->fs)) != sc->params.tp.vlan_pri_map) { rc = E2BIG; goto done; } if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { rc = EINVAL; goto done; } if (t->fs.val.iport >= nports) { rc = EINVAL; goto done; } /* Can't specify an iq if not steering to it */ if (!t->fs.dirsteer && t->fs.iq) { rc = EINVAL; goto done; } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { rc = EINVAL; goto done; } if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, ("%s: no memory allocated but filters_in_use > 0", __func__)); sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); if (sc->tids.ftid_tab == NULL) { rc = ENOMEM; goto done; } mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; if (f->pending || f->valid) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (t->fs.type == 0) break; } f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; rc = set_filter_wr(sc, t->idx); done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? 0 : EIO; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4setfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static int del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; int rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); if (rc) return (rc); nfilters = sc->tids.nftids; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || t->idx >= nfilters) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } f = &sc->tids.ftid_tab[t->idx]; if (f->pending) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (f->valid) { t->fs = f->fs; /* extra info for the caller */ rc = del_filter_wr(sc, t->idx); } done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? EIO : 0; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4delfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static void clear_filter(struct filter_entry *f) { if (f->l2t) t4_l2t_release(f->l2t); bzero(f, sizeof (*f)); } static int set_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid; struct wrq_cookie cookie; ASSERT_SYNCHRONIZED_OP(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ f->l2t = t4_l2t_alloc_switching(sc->l2t); if (f->l2t == NULL) return (EAGAIN); if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { t4_l2t_release(f->l2t); f->l2t = NULL; return (ENOMEM); } } ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof(*fwr)); fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(ftid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.vnic_vld)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); if (f->fs.newsmac) bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); f->pending = 1; sc->tids.ftids_in_use++; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } static int del_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid; struct wrq_cookie cookie; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof (*fwr)); t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); f->pending = 1; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } int t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); unsigned int idx = GET_TID(rpl); unsigned int rc; struct filter_entry *f; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); if (is_ftid(sc, idx)) { idx -= sc->tids.ftid_base; f = &sc->tids.ftid_tab[idx]; rc = G_COOKIE(rpl->cookie); mtx_lock(&sc->tids.ftid_lock); if (rc == FW_FILTER_WR_FLT_ADDED) { KASSERT(f->pending, ("%s: filter[%u] isn't pending.", __func__, idx)); f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; } else { if (rc != FW_FILTER_WR_FLT_DELETED) { /* Add or delete failed, display an error */ log(LOG_ERR, "filter %u setup failed with error %u\n", idx, rc); } clear_filter(f); sc->tids.ftids_in_use--; } wakeup(&sc->tids.ftid_tab); mtx_unlock(&sc->tids.ftid_lock); } return (0); } static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); if (sc->flags & FULL_INIT_DONE) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, off, remaining, i, n; uint32_t *buf, *b; uint32_t mw_base, mw_aperture; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); memwin_info(sc, win, &mw_base, &mw_aperture); buf = b = malloc(min(mr->len, mw_aperture), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { off = position_memwin(sc, win, addr); /* number of bytes that we'll copy in the inner loop */ n = min(remaining, mw_aperture - off); for (i = 0; i < n; i += 4) *b++ = t4_read_reg(sc, mw_base + off + i); rc = copyout(buf, dst, n); if (rc != 0) break; b = buf; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } static int in_range(int val, int lo, int hi) { return (val < 0 || (val <= hi && val >= lo)); } static int set_sched_class(struct adapter *sc, struct t4_sched_params *p) { int fw_subcmd, fw_type, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsc"); if (rc) return (rc); if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } /* * Translate the cxgbetool parameters into T4 firmware parameters. (The * sub-command and type are in common locations.) */ if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) fw_subcmd = FW_SCHED_SC_CONFIG; else if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) fw_subcmd = FW_SCHED_SC_PARAMS; else { rc = EINVAL; goto done; } if (p->type == SCHED_CLASS_TYPE_PACKET) fw_type = FW_SCHED_TYPE_PKTSCHED; else { rc = EINVAL; goto done; } if (fw_subcmd == FW_SCHED_SC_CONFIG) { /* Vet our parameters ..*/ if (p->u.config.minmax < 0) { rc = EINVAL; goto done; } /* And pass the request to the firmware ...*/ rc = -t4_sched_config(sc, fw_type, p->u.config.minmax, 1); goto done; } if (fw_subcmd == FW_SCHED_SC_PARAMS) { int fw_level; int fw_mode; int fw_rateunit; int fw_ratemode; if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; else if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; else if (p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; else { rc = EINVAL; goto done; } if (p->u.params.mode == SCHED_CLASS_MODE_CLASS) fw_mode = FW_SCHED_PARAMS_MODE_CLASS; else if (p->u.params.mode == SCHED_CLASS_MODE_FLOW) fw_mode = FW_SCHED_PARAMS_MODE_FLOW; else { rc = EINVAL; goto done; } if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_BITS) fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; else if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_PKTS) fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; else { rc = EINVAL; goto done; } if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_REL) fw_ratemode = FW_SCHED_PARAMS_RATE_REL; else if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_ABS) fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; else { rc = EINVAL; goto done; } /* Vet our parameters ... */ if (!in_range(p->u.params.channel, 0, 3) || !in_range(p->u.params.cl, 0, is_t4(sc) ? 15 : 16) || !in_range(p->u.params.minrate, 0, 10000000) || !in_range(p->u.params.maxrate, 0, 10000000) || !in_range(p->u.params.weight, 0, 100)) { rc = ERANGE; goto done; } /* * Translate any unset parameters into the firmware's * nomenclature and/or fail the call if the parameters * are required ... */ if (p->u.params.rateunit < 0 || p->u.params.ratemode < 0 || p->u.params.channel < 0 || p->u.params.cl < 0) { rc = EINVAL; goto done; } if (p->u.params.minrate < 0) p->u.params.minrate = 0; if (p->u.params.maxrate < 0) { if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL || p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) { rc = EINVAL; goto done; } else p->u.params.maxrate = 0; } if (p->u.params.weight < 0) { if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) { rc = EINVAL; goto done; } else p->u.params.weight = 0; } if (p->u.params.pktsize < 0) { if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL || p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) { rc = EINVAL; goto done; } else p->u.params.pktsize = 0; } /* See what the firmware thinks of the request ... */ rc = -t4_sched_params(sc, fw_type, fw_level, fw_mode, fw_rateunit, fw_ratemode, p->u.params.channel, p->u.params.cl, p->u.params.minrate, p->u.params.maxrate, p->u.params.weight, p->u.params.pktsize, 1); goto done; } rc = EINVAL; done: end_synchronized_op(sc, 0); return (rc); } static int set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) { struct port_info *pi = NULL; struct vi_info *vi; struct sge_txq *txq; uint32_t fw_mnem, fw_queue, fw_class; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq"); if (rc) return (rc); if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } if (p->port >= sc->params.nports) { rc = EINVAL; goto done; } /* XXX: Only supported for the main VI. */ pi = sc->port[p->port]; vi = &pi->vi[0]; if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, 7)) { rc = EINVAL; goto done; } /* * Create a template for the FW_PARAMS_CMD mnemonic and value (TX * Scheduling Class in this case). */ fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH)); fw_class = p->cl < 0 ? 0xffffffff : p->cl; /* * If op.queue is non-negative, then we're only changing the scheduling * on a single specified TX queue. */ if (p->queue >= 0) { txq = &sc->sge.txq[vi->first_txq + p->queue]; fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); goto done; } /* * Change the scheduling on all the TX queues for the * interface. */ for_each_txq(vi, i, txq) { fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); if (rc) goto done; } rc = 0; done: end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(const struct adapter *sc, int idx) { struct port_info *pi = sc->port[idx]; struct vi_info *vi; struct ifnet *ifp; int v; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; for_each_vi(pi, v, vi) { build_medialist(pi, &vi->media); } ifp = pi->vi[0].ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(ifp, "%s transceiver inserted.\n", mod_str[pi->mod_type]); } else { if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason) { struct port_info *pi = sc->port[idx]; struct vi_info *vi; struct ifnet *ifp; int v; if (link_stat) pi->linkdnrc = -1; else { if (reason >= 0) pi->linkdnrc = reason; } for_each_vi(pi, v, vi) { ifp = vi->ifp; if (ifp == NULL) continue; if (link_stat) { ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); } } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; sx_slock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } sx_sunlock(&t4_list_lock); } static int t4_open(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_close(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE; uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); t4_get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i, v; u_int port_id = *(uint32_t *)data; struct port_info *pi; struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; mtx_lock(&sc->regwin_lock); for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) t4_clr_vi_stats(sc, vi->viid); } mtx_unlock(&sc->regwin_lock); /* * Since this command accepts a port, clear stats for * all VIs on this port. */ for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; if (vi->flags & VI_NETMAP) continue; for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(vi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts0_wrs = 0; txq->txpkts1_wrs = 0; txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; mp_ring_reset_stats(txq->r); } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(vi, i, wrq) { wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } #endif if (IS_MAIN_VI(vi)) { wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } } } break; } case CHELSIO_T4_SCHED_CLASS: rc = set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_SET_TRACER: rc = t4_set_tracer(sc, (struct t4_tracer *)data); break; default: rc = EINVAL; } return (rc); } #ifdef TCP_OFFLOAD void t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order) { t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) | V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) | V_HPZ3(pgsz_order[3])); } static int toe_capability(struct vi_info *vi, int enable) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { /* TOE is already enabled. */ return (0); } /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ if (!(vi->flags & VI_INIT_DONE)) { rc = cxgbe_init_synchronized(vi); if (rc) return (rc); } if (!(pi->vi[0].flags & VI_INIT_DONE)) { rc = cxgbe_init_synchronized(&pi->vi[0]); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) { /* TOE is enabled on another VI of this port. */ pi->uld_vis++; return (0); } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM activated but flag not set", __func__)); } /* Activate iWARP and iSCSI too, if the modules are loaded. */ if (!uld_active(sc, ULD_IWARP)) (void) t4_activate_uld(sc, ULD_IWARP); if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { pi->uld_vis--; if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = EAGAIN; /* kldoad the module with this ULD and try again. */ sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { if (!(sc->flags & FULL_INIT_DONE)) { rc = adapter_full_init(sc); if (rc != 0) break; } rc = ui->activate(sc); if (rc == 0) { setbit(&sc->active_ulds, id); ui->refcount++; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = ENXIO; sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) { clrbit(&sc->active_ulds, id); ui->refcount--; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int uld_active(struct adapter *sc, int uld_id) { MPASS(uld_id >= 0 && uld_id <= ULD_MAX); return (isset(&sc->active_ulds, uld_id)); } #endif /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq10g < 1) { #ifdef RSS t4_ntxq10g = rss_getnumbuckets(); #else t4_ntxq10g = min(nc, NTXQ_10G); #endif } if (t4_ntxq1g < 1) { #ifdef RSS /* XXX: way too many for 1GbE? */ t4_ntxq1g = rss_getnumbuckets(); #else t4_ntxq1g = min(nc, NTXQ_1G); #endif } if (t4_nrxq10g < 1) { #ifdef RSS t4_nrxq10g = rss_getnumbuckets(); #else t4_nrxq10g = min(nc, NRXQ_10G); #endif } if (t4_nrxq1g < 1) { #ifdef RSS /* XXX: way too many for 1GbE? */ t4_nrxq1g = rss_getnumbuckets(); #else t4_nrxq1g = min(nc, NRXQ_1G); #endif } #ifdef TCP_OFFLOAD if (t4_nofldtxq10g < 1) t4_nofldtxq10g = min(nc, NOFLDTXQ_10G); if (t4_nofldtxq1g < 1) t4_nofldtxq1g = min(nc, NOFLDTXQ_1G); if (t4_nofldrxq10g < 1) t4_nofldrxq10g = min(nc, NOFLDRXQ_10G); if (t4_nofldrxq1g < 1) t4_nofldrxq1g = min(nc, NOFLDRXQ_1G); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; #endif #ifdef DEV_NETMAP if (t4_nnmtxq10g < 1) t4_nnmtxq10g = min(nc, NNMTXQ_10G); if (t4_nnmtxq1g < 1) t4_nnmtxq1g = min(nc, NNMTXQ_1G); if (t4_nnmrxq10g < 1) t4_nnmrxq10g = min(nc, NNMRXQ_10G); if (t4_nnmrxq1g < 1) t4_nnmrxq1g = min(nc, NNMRXQ_1G); #endif if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS) t4_tmr_idx_10g = TMR_IDX_10G; if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS) t4_pktc_idx_10g = PKTC_IDX_10G; if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS) t4_tmr_idx_1g = TMR_IDX_1G; if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS) t4_pktc_idx_1g = PKTC_IDX_1G; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; } static struct sx mlu; /* mod load unload */ SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload"); static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: sx_xlock(&mlu); if (loaded++ == 0) { t4_sge_modload(); sx_init(&t4_list_lock, "T4/T5 adapters"); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD sx_init(&t4_uld_list_lock, "T4/T5 ULDs"); SLIST_INIT(&t4_uld_list); #endif t4_tracer_modload(); tweak_tunables(); } sx_xunlock(&mlu); break; case MOD_UNLOAD: sx_xlock(&mlu); if (--loaded == 0) { int tries; sx_slock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; sx_sunlock(&t4_list_lock); goto done_unload; } #ifdef TCP_OFFLOAD sx_slock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; sx_sunlock(&t4_uld_list_lock); sx_sunlock(&t4_list_lock); goto done_unload; } #endif tries = 0; while (tries++ < 5 && t4_sge_extfree_refs() != 0) { uprintf("%ju clusters with custom free routine " "still is use.\n", t4_sge_extfree_refs()); pause("t4unload", 2 * hz); } #ifdef TCP_OFFLOAD sx_sunlock(&t4_uld_list_lock); #endif sx_sunlock(&t4_list_lock); if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); #ifdef TCP_OFFLOAD sx_destroy(&t4_uld_list_lock); #endif sx_destroy(&t4_list_lock); t4_sge_modunload(); loaded = 0; } else { rc = EBUSY; loaded++; /* undo earlier decrement */ } } done_unload: sx_xunlock(&mlu); break; } return (rc); } static devclass_t t4_devclass, t5_devclass; static devclass_t cxgbe_devclass, cxl_devclass; static devclass_t vcxgbe_devclass, vcxl_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t4nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t5nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); MODULE_VERSION(vcxgbe, 1); DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); MODULE_VERSION(vcxl, 1); Index: projects/clang380-import/sys/dev/ic/ns16550.h =================================================================== --- projects/clang380-import/sys/dev/ic/ns16550.h (revision 293686) +++ projects/clang380-import/sys/dev/ic/ns16550.h (revision 293687) @@ -1,240 +1,242 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)ns16550.h 7.1 (Berkeley) 5/9/91 * $FreeBSD$ */ /* * NS8250... UART registers. */ /* 8250 registers #[0-6]. */ #define com_data 0 /* data register (R/W) */ #define REG_DATA com_data #define com_ier 1 /* interrupt enable register (W) */ #define REG_IER com_ier #define IER_ERXRDY 0x1 #define IER_ETXRDY 0x2 #define IER_ERLS 0x4 #define IER_EMSC 0x8 #define IER_BITS "\20\1ERXRDY\2ETXRDY\3ERLS\4EMSC" #define com_iir 2 /* interrupt identification register (R) */ #define REG_IIR com_iir #define IIR_IMASK 0xf #define IIR_RXTOUT 0xc #define IIR_BUSY 0x7 #define IIR_RLS 0x6 #define IIR_RXRDY 0x4 #define IIR_TXRDY 0x2 #define IIR_NOPEND 0x1 #define IIR_MLSC 0x0 #define IIR_FIFO_MASK 0xc0 /* set if FIFOs are enabled */ #define IIR_BITS "\20\1NOPEND\2TXRDY\3RXRDY" #define com_lcr 3 /* line control register (R/W) */ #define com_cfcr com_lcr /* character format control register (R/W) */ #define REG_LCR com_lcr #define LCR_DLAB 0x80 #define CFCR_DLAB LCR_DLAB #define LCR_EFR_ENABLE 0xbf /* magic to enable EFR on 16650 up */ #define CFCR_EFR_ENABLE LCR_EFR_ENABLE #define LCR_SBREAK 0x40 #define CFCR_SBREAK LCR_SBREAK #define LCR_PZERO 0x30 #define CFCR_PZERO LCR_PZERO #define LCR_PONE 0x20 #define CFCR_PONE LCR_PONE #define LCR_PEVEN 0x10 #define CFCR_PEVEN LCR_PEVEN #define LCR_PODD 0x00 #define CFCR_PODD LCR_PODD #define LCR_PENAB 0x08 #define CFCR_PENAB LCR_PENAB #define LCR_STOPB 0x04 #define CFCR_STOPB LCR_STOPB #define LCR_8BITS 0x03 #define CFCR_8BITS LCR_8BITS #define LCR_7BITS 0x02 #define CFCR_7BITS LCR_7BITS #define LCR_6BITS 0x01 #define CFCR_6BITS LCR_6BITS #define LCR_5BITS 0x00 #define CFCR_5BITS LCR_5BITS #define com_mcr 4 /* modem control register (R/W) */ #define REG_MCR com_mcr #define MCR_PRESCALE 0x80 /* only available on 16650 up */ #define MCR_LOOPBACK 0x10 #define MCR_IE 0x08 #define MCR_IENABLE MCR_IE #define MCR_DRS 0x04 #define MCR_RTS 0x02 #define MCR_DTR 0x01 #define MCR_BITS "\20\1DTR\2RTS\3DRS\4IE\5LOOPBACK\10PRESCALE" #define com_lsr 5 /* line status register (R/W) */ #define REG_LSR com_lsr #define LSR_RCV_FIFO 0x80 #define LSR_TEMT 0x40 #define LSR_TSRE LSR_TEMT #define LSR_THRE 0x20 #define LSR_TXRDY LSR_THRE #define LSR_BI 0x10 #define LSR_FE 0x08 #define LSR_PE 0x04 #define LSR_OE 0x02 #define LSR_RXRDY 0x01 #define LSR_RCV_MASK 0x1f #define LSR_BITS "\20\1RXRDY\2OE\3PE\4FE\5BI\6THRE\7TEMT\10RCV_FIFO" #define com_msr 6 /* modem status register (R/W) */ #define REG_MSR com_msr #define MSR_DCD 0x80 #define MSR_RI 0x40 #define MSR_DSR 0x20 #define MSR_CTS 0x10 #define MSR_DDCD 0x08 #define MSR_TERI 0x04 #define MSR_DDSR 0x02 #define MSR_DCTS 0x01 #define MSR_BITS "\20\1DCTS\2DDSR\3TERI\4DDCD\5CTS\6DSR\7RI\10DCD" /* 8250 multiplexed registers #[0-1]. Access enabled by LCR[7]. */ #define com_dll 0 /* divisor latch low (R/W) */ #define com_dlbl com_dll #define com_dlm 1 /* divisor latch high (R/W) */ #define com_dlbh com_dlm #define REG_DLL com_dll #define REG_DLH com_dlm /* 16450 register #7. Not multiplexed. */ #define com_scr 7 /* scratch register (R/W) */ /* 16550 register #2. Not multiplexed. */ #define com_fcr 2 /* FIFO control register (W) */ #define com_fifo com_fcr #define REG_FCR com_fcr #define FCR_ENABLE 0x01 #define FIFO_ENABLE FCR_ENABLE #define FCR_RCV_RST 0x02 #define FIFO_RCV_RST FCR_RCV_RST #define FCR_XMT_RST 0x04 #define FIFO_XMT_RST FCR_XMT_RST #define FCR_DMA 0x08 #define FIFO_DMA_MODE FCR_DMA #define FCR_RX_LOW 0x00 #define FIFO_RX_LOW FCR_RX_LOW #define FCR_RX_MEDL 0x40 #define FIFO_RX_MEDL FCR_RX_MEDL #define FCR_RX_MEDH 0x80 #define FIFO_RX_MEDH FCR_RX_MEDH #define FCR_RX_HIGH 0xc0 #define FIFO_RX_HIGH FCR_RX_HIGH #define FCR_BITS "\20\1ENABLE\2RCV_RST\3XMT_RST\4DMA" /* 16650 registers #2,[4-7]. Access enabled by LCR_EFR_ENABLE. */ #define com_efr 2 /* enhanced features register (R/W) */ #define REG_EFR com_efr #define EFR_CTS 0x80 #define EFR_AUTOCTS EFR_CTS #define EFR_RTS 0x40 #define EFR_AUTORTS EFR_RTS #define EFR_EFE 0x10 /* enhanced functions enable */ #define com_xon1 4 /* XON 1 character (R/W) */ #define com_xon2 5 /* XON 2 character (R/W) */ #define com_xoff1 6 /* XOFF 1 character (R/W) */ #define com_xoff2 7 /* XOFF 2 character (R/W) */ #define DW_REG_USR 31 /* DesignWare derived Uart Status Reg */ #define com_usr 39 /* Octeon 16750/16550 Uart Status Reg */ #define REG_USR com_usr #define USR_BUSY 1 /* Uart Busy. Serial transfer in progress */ #define USR_TXFIFO_NOTFULL 2 /* Uart TX FIFO Not full */ /* 16950 register #1. Access enabled by ACR[7]. Also requires !LCR[7]. */ #define com_asr 1 /* additional status register (R[0-7]/W[0-1]) */ /* 16950 register #3. R/W access enabled by ACR[7]. */ #define com_rfl 3 /* receiver fifo level (R) */ /* * 16950 register #4. Access enabled by ACR[7]. Also requires * !LCR_EFR_ENABLE. */ #define com_tfl 4 /* transmitter fifo level (R) */ /* * 16950 register #5. Accessible if !LCR_EFR_ENABLE. Read access also * requires ACR[6]. */ #define com_icr 5 /* index control register (R/W) */ +#define REG_ICR com_icr /* * 16950 register #7. It is the same as com_scr except it has a different * abbreviation in the manufacturer's data sheet and it also serves as an * index into the Indexed Control register set. */ #define com_spr com_scr /* scratch pad (and index) register (R/W) */ #define REG_SPR com_scr /* * 16950 indexed control registers #[0-0x13]. Access is via index in SPR, * data in ICR (if ICR is accessible). */ #define com_acr 0 /* additional control register (R/W) */ +#define REG_ACR com_acr #define ACR_ASE 0x80 /* ASR/RFL/TFL enable */ #define ACR_ICRE 0x40 /* ICR enable */ #define ACR_TLE 0x20 /* TTL/RTL enable */ #define com_cpr 1 /* clock prescaler register (R/W) */ #define com_tcr 2 /* times clock register (R/W) */ #define com_ttl 4 /* transmitter trigger level (R/W) */ #define com_rtl 5 /* receiver trigger level (R/W) */ /* ... */ /* Hardware extension mode register for RSB-2000/3000. */ #define com_emr com_msr #define EMR_EXBUFF 0x04 #define EMR_CTSFLW 0x08 #define EMR_DSRFLW 0x10 #define EMR_RTSFLW 0x20 #define EMR_DTRFLW 0x40 #define EMR_EFMODE 0x80 Index: projects/clang380-import/sys/dev/iscsi/iscsi.c =================================================================== --- projects/clang380-import/sys/dev/iscsi/iscsi.c (revision 293686) +++ projects/clang380-import/sys/dev/iscsi/iscsi.c (revision 293687) @@ -1,2454 +1,2497 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ICL_KERNEL_PROXY #include #endif #ifdef ICL_KERNEL_PROXY FEATURE(iscsi_kernel_proxy, "iSCSI initiator built with ICL_KERNEL_PROXY"); #endif /* * XXX: This is global so the iscsi_unload() can access it. * Think about how to do this properly. */ static struct iscsi_softc *sc; SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD, 0, "iSCSI initiator"); static int debug = 1; SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 0, "Enable debug messages"); static int ping_timeout = 5; SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout, 0, "Timeout for ping (NOP-Out) requests, in seconds"); static int iscsid_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RWTUN, &iscsid_timeout, 0, "Time to wait for iscsid(8) to handle reconnection, in seconds"); static int login_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout, 0, "Time to wait for iscsid(8) to finish Login Phase, in seconds"); static int maxtags = 255; SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, 0, "Max number of IO requests queued"); static int fail_on_disconnection = 0; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, &fail_on_disconnection, 0, "Destroy CAM SIM on connection failure"); +static int fail_on_shutdown = 1; +SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_shutdown, CTLFLAG_RWTUN, + &fail_on_shutdown, 0, "Fail disconnected sessions on shutdown"); static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator"); static uma_zone_t iscsi_outstanding_zone; #define CONN_SESSION(X) ((struct iscsi_session *)X->ic_prv0) #define PDU_SESSION(X) (CONN_SESSION(X->ip_conn)) #define ISCSI_DEBUG(X, ...) \ do { \ if (debug > 1) \ printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ } while (0) #define ISCSI_WARN(X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_DEBUG(S, X, ...) \ do { \ if (debug > 1) { \ printf("%s: %s (%s): " X "\n", \ __func__, S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_WARN(S, X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s (%s): " X "\n", \ S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_LOCK(X) mtx_lock(&X->is_lock) #define ISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->is_lock) #define ISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->is_lock, MA_OWNED) #define ISCSI_SESSION_LOCK_ASSERT_NOT(X) mtx_assert(&X->is_lock, MA_NOTOWNED) static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td); static struct cdevsw iscsi_cdevsw = { .d_version = D_VERSION, .d_ioctl = iscsi_ioctl, .d_name = "iscsi", }; static void iscsi_pdu_queue_locked(struct icl_pdu *request); static void iscsi_pdu_queue(struct icl_pdu *request); static void iscsi_pdu_update_statsn(const struct icl_pdu *response); static void iscsi_pdu_handle_nop_in(struct icl_pdu *response); static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response); static void iscsi_pdu_handle_task_response(struct icl_pdu *response); static void iscsi_pdu_handle_data_in(struct icl_pdu *response); static void iscsi_pdu_handle_logout_response(struct icl_pdu *response); static void iscsi_pdu_handle_r2t(struct icl_pdu *response); static void iscsi_pdu_handle_async_message(struct icl_pdu *response); static void iscsi_pdu_handle_reject(struct icl_pdu *response); static void iscsi_session_reconnect(struct iscsi_session *is); static void iscsi_session_terminate(struct iscsi_session *is); static void iscsi_action(struct cam_sim *sim, union ccb *ccb); static void iscsi_poll(struct cam_sim *sim); static struct iscsi_outstanding *iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag); static struct iscsi_outstanding *iscsi_outstanding_add(struct iscsi_session *is, union ccb *ccb, uint32_t *initiator_task_tagp); static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io); static bool iscsi_pdu_prepare(struct icl_pdu *request) { struct iscsi_session *is; struct iscsi_bhs_scsi_command *bhssc; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the request * (initiator -> target) PDUs. */ bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; /* * Data-Out PDU does not contain CmdSN. */ if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { if (ISCSI_SNGT(is->is_cmdsn, is->is_maxcmdsn) && (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) { /* * Current MaxCmdSN prevents us from sending any more * SCSI Command PDUs to the target; postpone the PDU. * It will get resent by either iscsi_pdu_queue(), * or by maintenance thread. */ #if 0 ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %u, " "ExpCmdSN %u, MaxCmdSN %u, opcode 0x%x", is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn, bhssc->bhssc_opcode); #endif return (true); } bhssc->bhssc_cmdsn = htonl(is->is_cmdsn); if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) is->is_cmdsn++; } bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1); return (false); } static void iscsi_session_send_postponed(struct iscsi_session *is) { struct icl_pdu *request; bool postpone; ISCSI_SESSION_LOCK_ASSERT(is); while (!STAILQ_EMPTY(&is->is_postponed)) { request = STAILQ_FIRST(&is->is_postponed); postpone = iscsi_pdu_prepare(request); if (postpone) break; STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_queue(request); } } static void iscsi_pdu_queue_locked(struct icl_pdu *request) { struct iscsi_session *is; bool postpone; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); iscsi_session_send_postponed(is); postpone = iscsi_pdu_prepare(request); if (postpone) { STAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next); return; } icl_pdu_queue(request); } static void iscsi_pdu_queue(struct icl_pdu *request) { struct iscsi_session *is; is = PDU_SESSION(request); ISCSI_SESSION_LOCK(is); iscsi_pdu_queue_locked(request); ISCSI_SESSION_UNLOCK(is); } static void iscsi_session_logout(struct iscsi_session *is) { struct icl_pdu *request; struct iscsi_bhs_logout_request *bhslr; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) return; bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST; bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION; iscsi_pdu_queue_locked(request); } static void iscsi_session_terminate_task(struct iscsi_session *is, struct iscsi_outstanding *io, bool requeue) { if (io->io_ccb != NULL) { io->io_ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK); if (requeue) io->io_ccb->ccb_h.status |= CAM_REQUEUE_REQ; else io->io_ccb->ccb_h.status |= CAM_REQ_ABORTED; if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN; xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } xpt_done(io->io_ccb); } iscsi_outstanding_remove(is, io); } static void iscsi_session_terminate_tasks(struct iscsi_session *is, bool requeue) { struct iscsi_outstanding *io, *tmp; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) { iscsi_session_terminate_task(is, io, requeue); } } static void iscsi_session_cleanup(struct iscsi_session *is, bool destroy_sim) { struct icl_pdu *pdu; ISCSI_SESSION_LOCK_ASSERT(is); /* * Don't queue any new PDUs. */ if (is->is_sim != NULL && is->is_simq_frozen == false) { ISCSI_SESSION_DEBUG(is, "freezing"); xpt_freeze_simq(is->is_sim, 1); is->is_simq_frozen = true; } /* * Remove postponed PDUs. */ while (!STAILQ_EMPTY(&is->is_postponed)) { pdu = STAILQ_FIRST(&is->is_postponed); STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_free(pdu); } if (destroy_sim == false) { /* * Terminate SCSI tasks, asking CAM to requeue them. */ iscsi_session_terminate_tasks(is, true); return; } iscsi_session_terminate_tasks(is, false); if (is->is_sim == NULL) return; ISCSI_SESSION_DEBUG(is, "deregistering SIM"); xpt_async(AC_LOST_DEVICE, is->is_path, NULL); if (is->is_simq_frozen) { xpt_release_simq(is->is_sim, 1); is->is_simq_frozen = false; } xpt_free_path(is->is_path); is->is_path = NULL; xpt_bus_deregister(cam_sim_path(is->is_sim)); cam_sim_free(is->is_sim, TRUE /*free_devq*/); is->is_sim = NULL; is->is_devq = NULL; } static void iscsi_maintenance_thread_reconnect(struct iscsi_session *is) { icl_conn_close(is->is_conn); ISCSI_SESSION_LOCK(is); is->is_connected = false; is->is_reconnecting = false; is->is_login_phase = false; #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif if (fail_on_disconnection) { ISCSI_SESSION_DEBUG(is, "connection failed, destroying devices"); iscsi_session_cleanup(is, true); } else { iscsi_session_cleanup(is, false); } KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); /* * Request immediate reconnection from iscsid(8). */ //ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)"); is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_softc->sc_cv); } static void iscsi_maintenance_thread_terminate(struct iscsi_session *is) { struct iscsi_softc *sc; sc = is->is_softc; sx_xlock(&sc->sc_lock); - TAILQ_REMOVE(&sc->sc_sessions, is, is_next); - sx_xunlock(&sc->sc_lock); icl_conn_close(is->is_conn); callout_drain(&is->is_callout); ISCSI_SESSION_LOCK(is); KASSERT(is->is_terminating, ("is_terminating == false")); #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif iscsi_session_cleanup(is, true); KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); ISCSI_SESSION_UNLOCK(is); icl_conn_free(is->is_conn); mtx_destroy(&is->is_lock); cv_destroy(&is->is_maintenance_cv); #ifdef ICL_KERNEL_PROXY cv_destroy(&is->is_login_cv); #endif + TAILQ_REMOVE(&sc->sc_sessions, is, is_next); + sx_xunlock(&sc->sc_lock); + ISCSI_SESSION_DEBUG(is, "terminated"); free(is, M_ISCSI); /* * The iscsi_unload() routine might be waiting. */ cv_signal(&sc->sc_cv); } static void iscsi_maintenance_thread(void *arg) { struct iscsi_session *is; is = arg; for (;;) { ISCSI_SESSION_LOCK(is); if (is->is_reconnecting == false && is->is_terminating == false && STAILQ_EMPTY(&is->is_postponed)) cv_wait(&is->is_maintenance_cv, &is->is_lock); - if (is->is_reconnecting) { - ISCSI_SESSION_UNLOCK(is); - iscsi_maintenance_thread_reconnect(is); - continue; - } - + /* Terminate supersedes reconnect. */ if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_terminate(is); kthread_exit(); return; } + if (is->is_reconnecting) { + ISCSI_SESSION_UNLOCK(is); + iscsi_maintenance_thread_reconnect(is); + continue; + } + iscsi_session_send_postponed(is); ISCSI_SESSION_UNLOCK(is); } } static void iscsi_session_reconnect(struct iscsi_session *is) { /* * XXX: We can't use locking here, because * it's being called from various contexts. * Hope it doesn't break anything. */ if (is->is_reconnecting) return; is->is_reconnecting = true; cv_signal(&is->is_maintenance_cv); } static void iscsi_session_terminate(struct iscsi_session *is) { if (is->is_terminating) return; is->is_terminating = true; #if 0 iscsi_session_logout(is); #endif cv_signal(&is->is_maintenance_cv); } static void iscsi_callout(void *context) { struct icl_pdu *request; struct iscsi_bhs_nop_out *bhsno; struct iscsi_session *is; bool reconnect_needed = false; is = context; ISCSI_SESSION_LOCK(is); if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); return; } callout_schedule(&is->is_callout, 1 * hz); is->is_timeout++; if (is->is_waiting_for_iscsid) { if (iscsid_timeout > 0 && is->is_timeout > iscsid_timeout) { ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) " "for %d seconds; reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (is->is_login_phase) { if (login_timeout > 0 && is->is_timeout > login_timeout) { ISCSI_SESSION_WARN(is, "login timed out after %d seconds; " "reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (ping_timeout <= 0) { /* * Pings are disabled. Don't send NOP-Out in this case. * Reset the timeout, to avoid triggering reconnection, * should the user decide to reenable them. */ is->is_timeout = 0; goto out; } if (is->is_timeout >= ping_timeout) { ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; " "reconnecting", ping_timeout); reconnect_needed = true; goto out; } ISCSI_SESSION_UNLOCK(is); /* * If the ping was reset less than one second ago - which means * that we've received some PDU during the last second - assume * the traffic flows correctly and don't bother sending a NOP-Out. * * (It's 2 - one for one second, and one for incrementing is_timeout * earlier in this routine.) */ if (is->is_timeout < 2) return; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate PDU"); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_target_transfer_tag = 0xffffffff; iscsi_pdu_queue(request); return; out: + if (is->is_terminating) { + ISCSI_SESSION_UNLOCK(is); + return; + } + ISCSI_SESSION_UNLOCK(is); if (reconnect_needed) iscsi_session_reconnect(is); } static void iscsi_pdu_update_statsn(const struct icl_pdu *response) { const struct iscsi_bhs_data_in *bhsdi; struct iscsi_session *is; uint32_t expcmdsn, maxcmdsn, statsn; is = PDU_SESSION(response); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the response * (target -> initiator) PDUs. */ bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs; /* * Ok, I lied. In case of Data-In, "The fields StatSN, Status, * and Residual Count only have meaningful content if the S bit * is set to 1", so we also need to check the bit specific for * Data-In PDU. */ if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) { statsn = ntohl(bhsdi->bhsdi_statsn); if (statsn != is->is_statsn && statsn != (is->is_statsn + 1)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_WARN(is, "PDU 0x%x StatSN %u != " "session ExpStatSN %u (or + 1); reconnecting", bhsdi->bhsdi_opcode, statsn, is->is_statsn); iscsi_session_reconnect(is); } if (ISCSI_SNGT(statsn, is->is_statsn)) is->is_statsn = statsn; } expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn); maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn); if (ISCSI_SNLT(maxcmdsn + 1, expcmdsn)) { ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u + 1 < PDU ExpCmdSN %u; ignoring", maxcmdsn, expcmdsn); } else { if (ISCSI_SNGT(maxcmdsn, is->is_maxcmdsn)) { is->is_maxcmdsn = maxcmdsn; /* * Command window increased; kick the maintanance thread * to send out postponed commands. */ if (!STAILQ_EMPTY(&is->is_postponed)) cv_signal(&is->is_maintenance_cv); } else if (ISCSI_SNLT(maxcmdsn, is->is_maxcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u < session MaxCmdSN %u; ignoring", maxcmdsn, is->is_maxcmdsn); } if (ISCSI_SNGT(expcmdsn, is->is_expcmdsn)) { is->is_expcmdsn = expcmdsn; } else if (ISCSI_SNLT(expcmdsn, is->is_expcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU ExpCmdSN %u < session ExpCmdSN %u; ignoring", expcmdsn, is->is_expcmdsn); } } /* * Every incoming PDU - not just NOP-In - resets the ping timer. * The purpose of the timeout is to reset the connection when it stalls; * we don't want this to happen when NOP-In or NOP-Out ends up delayed * in some queue. */ is->is_timeout = 0; } static void iscsi_receive_callback(struct icl_pdu *response) { struct iscsi_session *is; is = PDU_SESSION(response); ISCSI_SESSION_LOCK(is); #ifdef ICL_KERNEL_PROXY if (is->is_login_phase) { if (is->is_login_pdu == NULL) is->is_login_pdu = response; else icl_pdu_free(response); ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_login_cv); return; } #endif iscsi_pdu_update_statsn(response); /* * The handling routine is responsible for freeing the PDU * when it's no longer needed. */ switch (response->ip_bhs->bhs_opcode) { case ISCSI_BHS_OPCODE_NOP_IN: iscsi_pdu_handle_nop_in(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_RESPONSE: iscsi_pdu_handle_scsi_response(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_TASK_RESPONSE: iscsi_pdu_handle_task_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_DATA_IN: iscsi_pdu_handle_data_in(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE: iscsi_pdu_handle_logout_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_R2T: iscsi_pdu_handle_r2t(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_ASYNC_MESSAGE: iscsi_pdu_handle_async_message(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_REJECT: iscsi_pdu_handle_reject(response); ISCSI_SESSION_UNLOCK(is); break; default: ISCSI_SESSION_WARN(is, "received PDU with unsupported " "opcode 0x%x; reconnecting", response->ip_bhs->bhs_opcode); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); } } static void iscsi_error_callback(struct icl_conn *ic) { struct iscsi_session *is; is = CONN_SESSION(ic); ISCSI_SESSION_WARN(is, "connection error; reconnecting"); iscsi_session_reconnect(is); } static void iscsi_pdu_handle_nop_in(struct icl_pdu *response) { struct iscsi_session *is; struct iscsi_bhs_nop_out *bhsno; struct iscsi_bhs_nop_in *bhsni; struct icl_pdu *request; void *data = NULL; size_t datasize; int error; is = PDU_SESSION(response); bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; if (bhsni->bhsni_target_transfer_tag == 0xffffffff) { /* * Nothing to do; iscsi_pdu_update_statsn() already * zeroed the timeout. */ icl_pdu_free(response); return; } datasize = icl_pdu_data_segment_length(response); if (datasize > 0) { data = malloc(datasize, M_ISCSI, M_NOWAIT | M_ZERO); if (data == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } icl_pdu_get_data(response, 0, data, datasize); } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_initiator_task_tag = 0xffffffff; bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag; if (datasize > 0) { error = icl_pdu_append_data(request, data, datasize, M_NOWAIT); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } free(data, M_ISCSI); } icl_pdu_free(response); iscsi_pdu_queue_locked(request); } static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response) { struct iscsi_bhs_scsi_response *bhssr; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received; uint16_t sense_len; is = PDU_SESSION(response); bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; received = io->io_received; iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) { ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; } else if (bhssr->bhssr_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; ccb->csio.scsi_status = bhssr->bhssr_status; } csio = &ccb->csio; data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len > 0) { if (data_segment_len < sizeof(sense_len)) { ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)", data_segment_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len)); sense_len = ntohs(sense_len); #if 0 ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd", sense_len, data_segment_len); #endif if (sizeof(sense_len) + sense_len > data_segment_len) { ISCSI_SESSION_WARN(is, "truncated data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } else if (sizeof(sense_len) + sense_len < data_segment_len) ISCSI_SESSION_WARN(is, "oversize data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if (sense_len > csio->sense_len) { ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d", sense_len, csio->sense_len); sense_len = csio->sense_len; } icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len); csio->sense_resid = csio->sense_len - sense_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } out: if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW) csio->resid = ntohl(bhssr->bhssr_residual_count); if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_task_response(struct icl_pdu *response) { struct iscsi_bhs_task_management_response *bhstmr; struct iscsi_outstanding *io, *aio; struct iscsi_session *is; is = PDU_SESSION(response); bhstmr = (struct iscsi_bhs_task_management_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhstmr->bhstmr_initiator_task_tag); if (io == NULL || io->io_ccb != NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhstmr->bhstmr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } if (bhstmr->bhstmr_response != BHSTMR_RESPONSE_FUNCTION_COMPLETE) { ISCSI_SESSION_WARN(is, "task response 0x%x", bhstmr->bhstmr_response); } else { aio = iscsi_outstanding_find(is, io->io_datasn); if (aio != NULL && aio->io_ccb != NULL) iscsi_session_terminate_task(is, aio, false); } iscsi_outstanding_remove(is, io); icl_pdu_free(response); } static void iscsi_pdu_handle_data_in(struct icl_pdu *response) { struct iscsi_bhs_data_in *bhsdi; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received, oreceived; is = PDU_SESSION(response); bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len == 0) { /* * "The sending of 0 length data segments should be avoided, * but initiators and targets MUST be able to properly receive * 0 length data segments." */ ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); return; } /* * We need to track this for security reasons - without it, malicious target * could respond to SCSI READ without sending Data-In PDUs, which would result * in read operation on the initiator side returning random kernel data. */ if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) { ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd", io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset)); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; csio = &ccb->csio; if (io->io_received + data_segment_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes " "at offset %zd, buffer is %d)", data_segment_len, io->io_received, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } oreceived = io->io_received; io->io_received += data_segment_len; received = io->io_received; if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); icl_pdu_get_data(response, 0, csio->data_ptr + oreceived, data_segment_len); /* * XXX: Check DataSN. * XXX: Check F. */ if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) == 0) { /* * Nothing more to do. */ icl_pdu_free(response); return; } //ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status); if (bhsdi->bhsdi_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; csio->scsi_status = bhsdi->bhsdi_status; } if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { csio->resid = ntohl(bhsdi->bhsdi_residual_count); if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_logout_response(struct icl_pdu *response) { ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response"); icl_pdu_free(response); } static void iscsi_pdu_handle_r2t(struct icl_pdu *response) { struct icl_pdu *request; struct iscsi_session *is; struct iscsi_bhs_r2t *bhsr2t; struct iscsi_bhs_data_out *bhsdo; struct iscsi_outstanding *io; struct ccb_scsiio *csio; size_t off, len, total_len; int error; is = PDU_SESSION(response); bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting", bhsr2t->bhsr2t_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } csio = &io->io_ccb->csio; if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) { ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } /* * XXX: Verify R2TSN. */ io->io_datasn = 0; off = ntohl(bhsr2t->bhsr2t_buffer_offset); if (off > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid offset " "%zd, buffer is is %d; reconnecting", off, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length); if (total_len == 0 || total_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid length " "%zd, buffer is %d; reconnecting", total_len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } //ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len); for (;;) { len = total_len; if (len > is->is_max_data_segment_length) len = is->is_max_data_segment_length; if (off + len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid " "length/offset %zd, buffer is %d; reconnecting", off + len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT; bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun; bhsdo->bhsdo_initiator_task_tag = bhsr2t->bhsr2t_initiator_task_tag; bhsdo->bhsdo_target_transfer_tag = bhsr2t->bhsr2t_target_transfer_tag; bhsdo->bhsdo_datasn = htonl(io->io_datasn++); bhsdo->bhsdo_buffer_offset = htonl(off); error = icl_pdu_append_data(request, csio->data_ptr + off, len, M_NOWAIT); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } off += len; total_len -= len; if (total_len == 0) { bhsdo->bhsdo_flags |= BHSDO_FLAGS_F; //ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off); } else { //ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off); } iscsi_pdu_queue_locked(request); if (total_len == 0) break; } icl_pdu_free(response); } static void iscsi_pdu_handle_async_message(struct icl_pdu *response) { struct iscsi_bhs_asynchronous_message *bhsam; struct iscsi_session *is; is = PDU_SESSION(response); bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; switch (bhsam->bhsam_async_event) { case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT: ISCSI_SESSION_WARN(is, "target requests logout; removing session"); iscsi_session_logout(is); iscsi_session_terminate(is); break; case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION: ISCSI_SESSION_WARN(is, "target indicates it will drop drop the connection"); break; case BHSAM_EVENT_TARGET_TERMINATES_SESSION: ISCSI_SESSION_WARN(is, "target indicates it will drop drop the session"); break; default: /* * XXX: Technically, we're obligated to also handle * parameter renegotiation. */ ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event); break; } icl_pdu_free(response); } static void iscsi_pdu_handle_reject(struct icl_pdu *response) { struct iscsi_bhs_reject *bhsr; struct iscsi_session *is; is = PDU_SESSION(response); bhsr = (struct iscsi_bhs_reject *)response->ip_bhs; ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?", bhsr->bhsr_reason); icl_pdu_free(response); } static int iscsi_ioctl_daemon_wait(struct iscsi_softc *sc, struct iscsi_daemon_request *request) { struct iscsi_session *is; int error; sx_slock(&sc->sc_lock); for (;;) { TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_waiting_for_iscsid) break; ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { /* * No session requires attention from iscsid(8); wait. */ error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } continue; } is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); request->idr_session_id = is->is_id; memcpy(&request->idr_isid, &is->is_isid, sizeof(request->idr_isid)); request->idr_tsih = 0; /* New or reinstated session. */ memcpy(&request->idr_conf, &is->is_conf, sizeof(request->idr_conf)); error = icl_limits(is->is_conf.isc_offload, &request->idr_limits.isl_max_data_segment_length); if (error != 0) { ISCSI_SESSION_WARN(is, "icl_limits for offload \"%s\" " "failed with error %d", is->is_conf.isc_offload, error); sx_sunlock(&sc->sc_lock); return (error); } sx_sunlock(&sc->sc_lock); return (0); } } static int iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc, struct iscsi_daemon_handoff *handoff) { struct iscsi_session *is; struct icl_conn *ic; int error; sx_slock(&sc->sc_lock); /* * Find the session to hand off socket to. */ TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == handoff->idh_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ic = is->is_conn; if (is->is_conf.isc_discovery || is->is_terminating) { ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EINVAL); } if (is->is_connected) { /* * This might have happened because another iscsid(8) * instance handed off the connection in the meantime. * Just return. */ ISCSI_SESSION_WARN(is, "handoff on already connected " "session"); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EBUSY); } strlcpy(is->is_target_alias, handoff->idh_target_alias, sizeof(is->is_target_alias)); is->is_tsih = handoff->idh_tsih; is->is_statsn = handoff->idh_statsn; is->is_initial_r2t = handoff->idh_initial_r2t; is->is_immediate_data = handoff->idh_immediate_data; /* * Cap MaxRecvDataSegmentLength obtained from the target to the maximum * size supported by our ICL module. */ is->is_max_data_segment_length = min(ic->ic_max_data_segment_length, handoff->idh_max_data_segment_length); is->is_max_burst_length = handoff->idh_max_burst_length; is->is_first_burst_length = handoff->idh_first_burst_length; if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C) ic->ic_header_crc32c = true; else ic->ic_header_crc32c = false; if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C) ic->ic_data_crc32c = true; else ic->ic_data_crc32c = false; is->is_cmdsn = 0; is->is_expcmdsn = 0; is->is_maxcmdsn = 0; is->is_waiting_for_iscsid = false; is->is_login_phase = false; is->is_timeout = 0; is->is_connected = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); #ifdef ICL_KERNEL_PROXY if (handoff->idh_socket != 0) { #endif /* * Handoff without using ICL proxy. */ error = icl_conn_handoff(ic, handoff->idh_socket); if (error != 0) { sx_sunlock(&sc->sc_lock); iscsi_session_terminate(is); return (error); } #ifdef ICL_KERNEL_PROXY } #endif sx_sunlock(&sc->sc_lock); if (is->is_sim != NULL) { /* * When reconnecting, there already is SIM allocated for the session. */ KASSERT(is->is_simq_frozen, ("reconnect without frozen simq")); ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "releasing"); xpt_release_simq(is->is_sim, 1); is->is_simq_frozen = false; ISCSI_SESSION_UNLOCK(is); } else { ISCSI_SESSION_LOCK(is); is->is_devq = cam_simq_alloc(maxtags); if (is->is_devq == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate simq"); iscsi_session_terminate(is); return (ENOMEM); } is->is_sim = cam_sim_alloc(iscsi_action, iscsi_poll, "iscsi", is, is->is_id /* unit */, &is->is_lock, 1, maxtags, is->is_devq); if (is->is_sim == NULL) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to allocate SIM"); cam_simq_free(is->is_devq); iscsi_session_terminate(is); return (ENOMEM); } error = xpt_bus_register(is->is_sim, NULL, 0); if (error != 0) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to register bus"); iscsi_session_terminate(is); return (ENOMEM); } error = xpt_create_path(&is->is_path, /*periph*/NULL, cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD); if (error != CAM_REQ_CMP) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to create path"); iscsi_session_terminate(is); return (ENOMEM); } ISCSI_SESSION_UNLOCK(is); } return (0); } static int iscsi_ioctl_daemon_fail(struct iscsi_softc *sc, struct iscsi_daemon_fail *fail) { struct iscsi_session *is; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == fail->idf_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s", fail->idf_reason); strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason)); //is->is_waiting_for_iscsid = false; //is->is_login_phase = true; //iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (0); } #ifdef ICL_KERNEL_PROXY static int iscsi_ioctl_daemon_connect(struct iscsi_softc *sc, struct iscsi_daemon_connect *idc) { struct iscsi_session *is; struct sockaddr *from_sa, *to_sa; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idc->idc_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (idc->idc_from_addrlen > 0) { error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); return (error); } } else { from_sa = NULL; } error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); free(from_sa, M_SONAME); return (error); } ISCSI_SESSION_LOCK(is); is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); error = icl_conn_connect(is->is_conn, idc->idc_iser, idc->idc_domain, idc->idc_socktype, idc->idc_protocol, from_sa, to_sa); free(from_sa, M_SONAME); free(to_sa, M_SONAME); /* * Digests are always disabled during login phase. */ is->is_conn->ic_header_crc32c = false; is->is_conn->ic_data_crc32c = false; return (error); } static int iscsi_ioctl_daemon_send(struct iscsi_softc *sc, struct iscsi_daemon_send *ids) { struct iscsi_session *is; struct icl_pdu *ip; size_t datalen; void *data; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == ids->ids_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); if (is->is_terminating || is->is_reconnecting) return (EIO); datalen = ids->ids_data_segment_len; if (datalen > ISCSI_MAX_DATA_SEGMENT_LENGTH) return (EINVAL); if (datalen > 0) { data = malloc(datalen, M_ISCSI, M_WAITOK); error = copyin(ids->ids_data_segment, data, datalen); if (error != 0) { free(data, M_ISCSI); return (error); } } ip = icl_pdu_new(is->is_conn, M_WAITOK); memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs)); if (datalen > 0) { error = icl_pdu_append_data(ip, data, datalen, M_WAITOK); KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed")); free(data, M_ISCSI); } icl_pdu_queue(ip); return (0); } static int iscsi_ioctl_daemon_receive(struct iscsi_softc *sc, struct iscsi_daemon_receive *idr) { struct iscsi_session *is; struct icl_pdu *ip; void *data; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idr->idr_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); ISCSI_SESSION_LOCK(is); while (is->is_login_pdu == NULL && is->is_terminating == false && is->is_reconnecting == false) cv_wait(&is->is_login_cv, &is->is_lock); if (is->is_terminating || is->is_reconnecting) { ISCSI_SESSION_UNLOCK(is); return (EIO); } ip = is->is_login_pdu; is->is_login_pdu = NULL; ISCSI_SESSION_UNLOCK(is); if (ip->ip_data_len > idr->idr_data_segment_len) { icl_pdu_free(ip); return (EMSGSIZE); } copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs)); if (ip->ip_data_len > 0) { data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK); icl_pdu_get_data(ip, 0, data, ip->ip_data_len); copyout(data, idr->idr_data_segment, ip->ip_data_len); free(data, M_ISCSI); } icl_pdu_free(ip); return (0); } #endif /* ICL_KERNEL_PROXY */ static void iscsi_sanitize_session_conf(struct iscsi_session_conf *isc) { /* * Just make sure all the fields are null-terminated. * * XXX: This is not particularly secure. We should * create our own conf and then copy in relevant * fields. */ isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0'; isc->isc_target[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0'; isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0'; } static bool iscsi_valid_session_conf(const struct iscsi_session_conf *isc) { if (isc->isc_initiator[0] == '\0') { ISCSI_DEBUG("empty isc_initiator"); return (false); } if (isc->isc_target_addr[0] == '\0') { ISCSI_DEBUG("empty isc_target_addr"); return (false); } if (isc->isc_discovery != 0 && isc->isc_target[0] != 0) { ISCSI_DEBUG("non-empty isc_target for discovery session"); return (false); } if (isc->isc_discovery == 0 && isc->isc_target[0] == 0) { ISCSI_DEBUG("empty isc_target for non-discovery session"); return (false); } return (true); } static int iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa) { struct iscsi_session *is; const struct iscsi_session *is2; int error; iscsi_sanitize_session_conf(&isa->isa_conf); if (iscsi_valid_session_conf(&isa->isa_conf) == false) return (EINVAL); is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK); memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf)); sx_xlock(&sc->sc_lock); /* * Prevent duplicates. */ TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) { if (!!is->is_conf.isc_discovery != !!is2->is_conf.isc_discovery) continue; if (strcmp(is->is_conf.isc_target_addr, is2->is_conf.isc_target_addr) != 0) continue; if (is->is_conf.isc_discovery == 0 && strcmp(is->is_conf.isc_target, is2->is_conf.isc_target) != 0) continue; sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EBUSY); } is->is_conn = icl_new_conn(is->is_conf.isc_offload, "iscsi", &is->is_lock); if (is->is_conn == NULL) { sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EINVAL); } is->is_conn->ic_receive = iscsi_receive_callback; is->is_conn->ic_error = iscsi_error_callback; is->is_conn->ic_prv0 = is; TAILQ_INIT(&is->is_outstanding); STAILQ_INIT(&is->is_postponed); mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF); cv_init(&is->is_maintenance_cv, "iscsi_mt"); #ifdef ICL_KERNEL_PROXY cv_init(&is->is_login_cv, "iscsi_login"); #endif is->is_softc = sc; sc->sc_last_session_id++; is->is_id = sc->sc_last_session_id; is->is_isid[0] = 0x80; /* RFC 3720, 10.12.5: 10b, "Random" ISID. */ arc4rand(&is->is_isid[1], 5, 0); is->is_tsih = 0; callout_init(&is->is_callout, 1); error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt"); if (error != 0) { ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error); sx_xunlock(&sc->sc_lock); return (error); } callout_reset(&is->is_callout, 1 * hz, iscsi_callout, is); TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next); /* * Trigger immediate reconnection. */ ISCSI_SESSION_LOCK(is); is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); ISCSI_SESSION_UNLOCK(is); cv_signal(&sc->sc_cv); sx_xunlock(&sc->sc_lock); return (0); } static bool iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1, unsigned int id2, const struct iscsi_session_conf *c2) { if (id2 != 0 && id2 != id1) return (false); if (c2->isc_target[0] != '\0' && strcmp(c1->isc_target, c2->isc_target) != 0) return (false); if (c2->isc_target_addr[0] != '\0' && strcmp(c1->isc_target_addr, c2->isc_target_addr) != 0) return (false); return (true); } static int iscsi_ioctl_session_remove(struct iscsi_softc *sc, struct iscsi_session_remove *isr) { struct iscsi_session *is, *tmp; bool found = false; iscsi_sanitize_session_conf(&isr->isr_conf); sx_xlock(&sc->sc_lock); TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) { ISCSI_SESSION_LOCK(is); if (iscsi_session_conf_matches(is->is_id, &is->is_conf, isr->isr_session_id, &isr->isr_conf)) { found = true; iscsi_session_logout(is); iscsi_session_terminate(is); } ISCSI_SESSION_UNLOCK(is); } sx_xunlock(&sc->sc_lock); if (!found) return (ESRCH); return (0); } static int iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl) { int error; unsigned int i = 0; struct iscsi_session *is; struct iscsi_session_state iss; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (i >= isl->isl_nentries) { sx_sunlock(&sc->sc_lock); return (EMSGSIZE); } memset(&iss, 0, sizeof(iss)); memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf)); iss.iss_id = is->is_id; strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias)); strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason)); strlcpy(iss.iss_offload, is->is_conn->ic_offload, sizeof(iss.iss_offload)); if (is->is_conn->ic_header_crc32c) iss.iss_header_digest = ISCSI_DIGEST_CRC32C; else iss.iss_header_digest = ISCSI_DIGEST_NONE; if (is->is_conn->ic_data_crc32c) iss.iss_data_digest = ISCSI_DIGEST_CRC32C; else iss.iss_data_digest = ISCSI_DIGEST_NONE; iss.iss_max_data_segment_length = is->is_max_data_segment_length; iss.iss_immediate_data = is->is_immediate_data; iss.iss_connected = is->is_connected; error = copyout(&iss, isl->isl_pstates + i, sizeof(iss)); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } i++; } sx_sunlock(&sc->sc_lock); isl->isl_nentries = i; return (0); } static int iscsi_ioctl_session_modify(struct iscsi_softc *sc, struct iscsi_session_modify *ism) { struct iscsi_session *is; iscsi_sanitize_session_conf(&ism->ism_conf); if (iscsi_valid_session_conf(&ism->ism_conf) == false) return (EINVAL); sx_xlock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_id == ism->ism_session_id) break; ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { sx_xunlock(&sc->sc_lock); return (ESRCH); } sx_xunlock(&sc->sc_lock); memcpy(&is->is_conf, &ism->ism_conf, sizeof(is->is_conf)); ISCSI_SESSION_UNLOCK(is); iscsi_session_reconnect(is); return (0); } static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td) { struct iscsi_softc *sc; sc = dev->si_drv1; switch (cmd) { case ISCSIDWAIT: return (iscsi_ioctl_daemon_wait(sc, (struct iscsi_daemon_request *)arg)); case ISCSIDHANDOFF: return (iscsi_ioctl_daemon_handoff(sc, (struct iscsi_daemon_handoff *)arg)); case ISCSIDFAIL: return (iscsi_ioctl_daemon_fail(sc, (struct iscsi_daemon_fail *)arg)); #ifdef ICL_KERNEL_PROXY case ISCSIDCONNECT: return (iscsi_ioctl_daemon_connect(sc, (struct iscsi_daemon_connect *)arg)); case ISCSIDSEND: return (iscsi_ioctl_daemon_send(sc, (struct iscsi_daemon_send *)arg)); case ISCSIDRECEIVE: return (iscsi_ioctl_daemon_receive(sc, (struct iscsi_daemon_receive *)arg)); #endif /* ICL_KERNEL_PROXY */ case ISCSISADD: return (iscsi_ioctl_session_add(sc, (struct iscsi_session_add *)arg)); case ISCSISREMOVE: return (iscsi_ioctl_session_remove(sc, (struct iscsi_session_remove *)arg)); case ISCSISLIST: return (iscsi_ioctl_session_list(sc, (struct iscsi_session_list *)arg)); case ISCSISMODIFY: return (iscsi_ioctl_session_modify(sc, (struct iscsi_session_modify *)arg)); default: return (EINVAL); } } static struct iscsi_outstanding * iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_initiator_task_tag == initiator_task_tag) return (io); } return (NULL); } static struct iscsi_outstanding * iscsi_outstanding_find_ccb(struct iscsi_session *is, union ccb *ccb) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_ccb == ccb) return (io); } return (NULL); } static struct iscsi_outstanding * iscsi_outstanding_add(struct iscsi_session *is, union ccb *ccb, uint32_t *initiator_task_tagp) { struct iscsi_outstanding *io; int error; ISCSI_SESSION_LOCK_ASSERT(is); io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO); if (io == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes", sizeof(*io)); return (NULL); } error = icl_conn_task_setup(is->is_conn, &ccb->csio, initiator_task_tagp, &io->io_icl_prv); if (error != 0) { ISCSI_SESSION_WARN(is, "icl_conn_task_setup() failed with error %d", error); uma_zfree(iscsi_outstanding_zone, io); return (NULL); } KASSERT(iscsi_outstanding_find(is, *initiator_task_tagp) == NULL, ("initiator_task_tag 0x%x already added", *initiator_task_tagp)); io->io_initiator_task_tag = *initiator_task_tagp; io->io_ccb = ccb; TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next); return (io); } static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io) { ISCSI_SESSION_LOCK_ASSERT(is); icl_conn_task_done(is->is_conn, io->io_icl_prv); TAILQ_REMOVE(&is->is_outstanding, io, io_next); uma_zfree(iscsi_outstanding_zone, io); } static void iscsi_action_abort(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_task_management_request *bhstmr; struct ccb_abort *cab = &ccb->cab; struct iscsi_outstanding *io, *aio; uint32_t initiator_task_tag; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); return; } #endif aio = iscsi_outstanding_find_ccb(is, cab->abort_ccb); if (aio == NULL) { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; io = iscsi_outstanding_add(is, NULL, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } io->io_datasn = aio->io_initiator_task_tag; bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; bhstmr->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_REQUEST; bhstmr->bhstmr_function = 0x80 | BHSTMR_FUNCTION_ABORT_TASK; bhstmr->bhstmr_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhstmr->bhstmr_initiator_task_tag = initiator_task_tag; bhstmr->bhstmr_referenced_task_tag = aio->io_initiator_task_tag; iscsi_pdu_queue_locked(request); } static void iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_scsi_command *bhssc; struct ccb_scsiio *csio; struct iscsi_outstanding *io; size_t len; uint32_t initiator_task_tag; int error; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ISCSI_SESSION_DEBUG(is, "called during login phase"); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN; xpt_done(ccb); return; } #endif request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; io = iscsi_outstanding_add(is, ccb, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } csio = &ccb->csio; bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND; bhssc->bhssc_flags |= BHSSC_FLAGS_F; switch (csio->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_IN: bhssc->bhssc_flags |= BHSSC_FLAGS_R; break; case CAM_DIR_OUT: bhssc->bhssc_flags |= BHSSC_FLAGS_W; break; } if ((ccb->ccb_h.flags & CAM_TAG_ACTION_VALID) != 0) { switch (csio->tag_action) { case MSG_HEAD_OF_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ; break; case MSG_ORDERED_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED; break; case MSG_ACA_TASK: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA; break; case MSG_SIMPLE_Q_TAG: default: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE; break; } } else bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_UNTAGGED; bhssc->bhssc_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhssc->bhssc_initiator_task_tag = initiator_task_tag; bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len); KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb), ("unsupported CDB size %zd", (size_t)csio->cdb_len)); if (csio->ccb_h.flags & CAM_CDB_POINTER) memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); else memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); if (is->is_immediate_data && (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { len = csio->dxfer_len; //ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len); if (len > is->is_first_burst_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_first_burst_length); len = is->is_first_burst_length; } if (len > is->is_max_data_segment_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_max_data_segment_length); len = is->is_max_data_segment_length; } error = icl_pdu_append_data(request, csio->data_ptr, len, M_NOWAIT); if (error != 0) { icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } } iscsi_pdu_queue_locked(request); } static void iscsi_action(struct cam_sim *sim, union ccb *ccb) { struct iscsi_session *is; is = cam_sim_softc(sim); ISCSI_SESSION_LOCK_ASSERT(is); if (is->is_terminating || (is->is_connected == false && fail_on_disconnection)) { ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_EXTLUNS; cpi->hba_eng_cnt = 0; cpi->max_target = 0; /* * Note that the variable below is only relevant for targets * that don't claim compliance with anything above SPC2, which * means they don't support REPORT_LUNS. */ cpi->max_lun = 255; cpi->initiator_id = ~0; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "iSCSI", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; /* XXX */ cpi->transport = XPORT_ISCSI; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC3; cpi->maxio = MAXPHYS; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts; struct ccb_trans_settings_scsi *scsi; cts = &ccb->cts; scsi = &cts->proto_specific.scsi; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC3; cts->transport = XPORT_ISCSI; cts->transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; cts->ccb_h.status = CAM_REQ_CMP; break; } case XPT_CALC_GEOMETRY: cam_calc_geometry(&ccb->ccg, /*extended*/1); ccb->ccb_h.status = CAM_REQ_CMP; break; #if 0 /* * XXX: What's the point? */ case XPT_RESET_BUS: case XPT_TERM_IO: ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io"); ccb->ccb_h.status = CAM_REQ_CMP; break; #endif case XPT_ABORT: iscsi_action_abort(is, ccb); return; case XPT_SCSI_IO: iscsi_action_scsiio(is, ccb); return; default: #if 0 ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code); #endif ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; } xpt_done(ccb); } static void iscsi_poll(struct cam_sim *sim) { KASSERT(0, ("%s: you're not supposed to be here", __func__)); } static void -iscsi_shutdown(struct iscsi_softc *sc) +iscsi_terminate_sessions(struct iscsi_softc *sc) { struct iscsi_session *is; - /* - * Trying to reconnect during system shutdown would lead to hang. - */ - fail_on_disconnection = 1; + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) + iscsi_session_terminate(is); + while(!TAILQ_EMPTY(&sc->sc_sessions)) { + ISCSI_DEBUG("waiting for sessions to terminate"); + cv_wait(&sc->sc_cv, &sc->sc_lock); + } + ISCSI_DEBUG("all sessions terminated"); + sx_sunlock(&sc->sc_lock); +} +static void +iscsi_shutdown_pre(struct iscsi_softc *sc) +{ + struct iscsi_session *is; + + if (!fail_on_shutdown) + return; + /* * If we have any sessions waiting for reconnection, request * maintenance thread to fail them immediately instead of waiting * for reconnect timeout. + * + * This prevents LUNs with mounted filesystems that are supported + * by disconnected iSCSI sessions from hanging, however it will + * fail all queued BIOs. */ + ISCSI_DEBUG("forcing failing all disconnected sessions due to shutdown"); + + fail_on_disconnection = 1; + sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); - if (is->is_waiting_for_iscsid) + if (!is->is_connected) { + ISCSI_SESSION_DEBUG(is, "force failing disconnected session early"); iscsi_session_reconnect(is); + } ISCSI_SESSION_UNLOCK(is); } sx_sunlock(&sc->sc_lock); } +static void +iscsi_shutdown_post(struct iscsi_softc *sc) +{ + + ISCSI_DEBUG("removing all sessions due to shutdown"); + iscsi_terminate_sessions(sc); +} + static int iscsi_load(void) { int error; sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK); sx_init(&sc->sc_lock, "iscsi"); TAILQ_INIT(&sc->sc_sessions); cv_init(&sc->sc_cv, "iscsi_cv"); iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding", sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi"); if (error != 0) { ISCSI_WARN("failed to create device node, error %d", error); return (error); } sc->sc_cdev->si_drv1 = sc; - sc->sc_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, - iscsi_shutdown, sc, SHUTDOWN_PRI_DEFAULT-1); + sc->sc_shutdown_pre_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, + iscsi_shutdown_pre, sc, SHUTDOWN_PRI_FIRST); + /* + * shutdown_post_sync needs to run after filesystem shutdown and before + * CAM shutdown - otherwise when rebooting with an iSCSI session that is + * disconnected but has outstanding requests, dashutdown() will hang on + * cam_periph_runccb(). + */ + sc->sc_shutdown_post_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, + iscsi_shutdown_post, sc, SHUTDOWN_PRI_DEFAULT - 1); return (0); } static int iscsi_unload(void) { - struct iscsi_session *is, *tmp; if (sc->sc_cdev != NULL) { ISCSI_DEBUG("removing device node"); destroy_dev(sc->sc_cdev); ISCSI_DEBUG("device node removed"); } - if (sc->sc_shutdown_eh != NULL) - EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_eh); + if (sc->sc_shutdown_pre_eh != NULL) + EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_pre_eh); + if (sc->sc_shutdown_post_eh != NULL) + EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_post_eh); - sx_slock(&sc->sc_lock); - TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) - iscsi_session_terminate(is); - while(!TAILQ_EMPTY(&sc->sc_sessions)) { - ISCSI_DEBUG("waiting for sessions to terminate"); - cv_wait(&sc->sc_cv, &sc->sc_lock); - } - ISCSI_DEBUG("all sessions terminated"); - sx_sunlock(&sc->sc_lock); + iscsi_terminate_sessions(sc); uma_zdestroy(iscsi_outstanding_zone); sx_destroy(&sc->sc_lock); cv_destroy(&sc->sc_cv); free(sc, M_ISCSI); return (0); } static int iscsi_quiesce(void) { sx_slock(&sc->sc_lock); if (!TAILQ_EMPTY(&sc->sc_sessions)) { sx_sunlock(&sc->sc_lock); return (EBUSY); } sx_sunlock(&sc->sc_lock); return (0); } static int iscsi_modevent(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: error = iscsi_load(); break; case MOD_UNLOAD: error = iscsi_unload(); break; case MOD_QUIESCE: error = iscsi_quiesce(); break; default: error = EINVAL; break; } return (error); } moduledata_t iscsi_data = { "iscsi", iscsi_modevent, 0 }; DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(iscsi, cam, 1, 1, 1); MODULE_DEPEND(iscsi, icl, 1, 1, 1); Index: projects/clang380-import/sys/dev/iscsi/iscsi.h =================================================================== --- projects/clang380-import/sys/dev/iscsi/iscsi.h (revision 293686) +++ projects/clang380-import/sys/dev/iscsi/iscsi.h (revision 293687) @@ -1,137 +1,138 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ISCSI_H #define ISCSI_H struct iscsi_softc; struct icl_conn; #define ISCSI_NAME_LEN 224 /* 223 bytes, by RFC 3720, + '\0' */ #define ISCSI_ADDR_LEN 47 /* INET6_ADDRSTRLEN + '\0' */ #define ISCSI_SECRET_LEN 17 /* 16 + '\0' */ struct iscsi_outstanding { TAILQ_ENTRY(iscsi_outstanding) io_next; union ccb *io_ccb; size_t io_received; uint32_t io_initiator_task_tag; uint32_t io_datasn; void *io_icl_prv; }; struct iscsi_session { TAILQ_ENTRY(iscsi_session) is_next; struct icl_conn *is_conn; struct mtx is_lock; uint32_t is_statsn; uint32_t is_cmdsn; uint32_t is_expcmdsn; uint32_t is_maxcmdsn; uint32_t is_initiator_task_tag; int is_header_digest; int is_data_digest; int is_initial_r2t; size_t is_max_burst_length; size_t is_first_burst_length; uint8_t is_isid[6]; uint16_t is_tsih; bool is_immediate_data; size_t is_max_data_segment_length; char is_target_alias[ISCSI_ALIAS_LEN]; TAILQ_HEAD(, iscsi_outstanding) is_outstanding; STAILQ_HEAD(, icl_pdu) is_postponed; struct callout is_callout; unsigned int is_timeout; /* * XXX: This could be rewritten using a single variable, * but somehow it results in uglier code. */ /* * We're waiting for iscsid(8); after iscsid_timeout * expires, kernel will wake up an iscsid(8) to handle * the session. */ bool is_waiting_for_iscsid; /* * Some iscsid(8) instance is handling the session; * after login_timeout expires, kernel will wake up * another iscsid(8) to handle the session. */ bool is_login_phase; /* * We're in the process of removing the iSCSI session. */ bool is_terminating; /* * We're waiting for the maintenance thread to do some * reconnection tasks. */ bool is_reconnecting; bool is_connected; struct cam_devq *is_devq; struct cam_sim *is_sim; struct cam_path *is_path; struct cv is_maintenance_cv; struct iscsi_softc *is_softc; unsigned int is_id; struct iscsi_session_conf is_conf; bool is_simq_frozen; char is_reason[ISCSI_REASON_LEN]; #ifdef ICL_KERNEL_PROXY struct cv is_login_cv;; struct icl_pdu *is_login_pdu; #endif }; struct iscsi_softc { device_t sc_dev; struct sx sc_lock; struct cdev *sc_cdev; TAILQ_HEAD(, iscsi_session) sc_sessions; struct cv sc_cv; unsigned int sc_last_session_id; - eventhandler_tag sc_shutdown_eh; + eventhandler_tag sc_shutdown_pre_eh; + eventhandler_tag sc_shutdown_post_eh; }; #endif /* !ISCSI_H */ Index: projects/clang380-import/sys/dev/puc/pucdata.c =================================================================== --- projects/clang380-import/sys/dev/puc/pucdata.c (revision 293686) +++ projects/clang380-import/sys/dev/puc/pucdata.c (revision 293687) @@ -1,1764 +1,1862 @@ /*- * Copyright (c) 2006 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * PCI "universal" communications card driver configuration data (used to * match/attach the cards). */ #include #include #include #include #include #include #include #include +#include + +#include #include #include #include #include +static puc_config_f puc_config_advantech; static puc_config_f puc_config_amc; static puc_config_f puc_config_diva; static puc_config_f puc_config_exar; static puc_config_f puc_config_exar_pcie; static puc_config_f puc_config_icbook; static puc_config_f puc_config_moxa; static puc_config_f puc_config_oxford_pci954; static puc_config_f puc_config_oxford_pcie; static puc_config_f puc_config_quatech; static puc_config_f puc_config_syba; static puc_config_f puc_config_siig; static puc_config_f puc_config_sunix; static puc_config_f puc_config_timedia; static puc_config_f puc_config_titan; const struct puc_cfg puc_pci_devices[] = { { 0x0009, 0x7168, 0xffff, 0, "Sunix SUN1889", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, }, { 0x103c, 0x1048, 0x103c, 0x1049, "HP Diva Serial [GSP] Multiport UART - Tosca Console", DEFAULT_RCLK, PUC_PORT_3S, 0x10, 0, -1, .config_function = puc_config_diva }, { 0x103c, 0x1048, 0x103c, 0x104a, "HP Diva Serial [GSP] Multiport UART - Tosca Secondary", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 0, -1, .config_function = puc_config_diva }, { 0x103c, 0x1048, 0x103c, 0x104b, "HP Diva Serial [GSP] Multiport UART - Maestro SP2", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 0, -1, .config_function = puc_config_diva }, { 0x103c, 0x1048, 0x103c, 0x1223, "HP Diva Serial [GSP] Multiport UART - Superdome Console", DEFAULT_RCLK, PUC_PORT_3S, 0x10, 0, -1, .config_function = puc_config_diva }, { 0x103c, 0x1048, 0x103c, 0x1226, "HP Diva Serial [GSP] Multiport UART - Keystone SP2", DEFAULT_RCLK, PUC_PORT_3S, 0x10, 0, -1, .config_function = puc_config_diva }, { 0x103c, 0x1048, 0x103c, 0x1282, "HP Diva Serial [GSP] Multiport UART - Everest SP2", DEFAULT_RCLK, PUC_PORT_3S, 0x10, 0, -1, .config_function = puc_config_diva }, { 0x10b5, 0x1076, 0x10b5, 0x1076, "VScom PCI-800", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x18, 0, 8, }, { 0x10b5, 0x1077, 0x10b5, 0x1077, "VScom PCI-400", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x18, 0, 8, }, { 0x10b5, 0x1103, 0x10b5, 0x1103, "VScom PCI-200", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x18, 4, 0, }, /* * Boca Research Turbo Serial 658 (8 serial port) card. * Appears to be the same as Chase Research PLC PCI-FAST8 * and Perle PCI-FAST8 Multi-Port serial cards. */ { 0x10b5, 0x9050, 0x12e0, 0x0021, "Boca Research Turbo Serial 658", DEFAULT_RCLK * 4, PUC_PORT_8S, 0x18, 0, 8, }, { 0x10b5, 0x9050, 0x12e0, 0x0031, "Boca Research Turbo Serial 654", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x18, 0, 8, }, /* * Dolphin Peripherals 4035 (dual serial port) card. PLX 9050, with * a seemingly-lame EEPROM setup that puts the Dolphin IDs * into the subsystem fields, and claims that it's a * network/misc (0x02/0x80) device. */ { 0x10b5, 0x9050, 0xd84d, 0x6808, "Dolphin Peripherals 4035", DEFAULT_RCLK, PUC_PORT_2S, 0x18, 4, 0, }, /* * Dolphin Peripherals 4014 (dual parallel port) card. PLX 9050, with * a seemingly-lame EEPROM setup that puts the Dolphin IDs * into the subsystem fields, and claims that it's a * network/misc (0x02/0x80) device. */ { 0x10b5, 0x9050, 0xd84d, 0x6810, "Dolphin Peripherals 4014", 0, PUC_PORT_2P, 0x20, 4, 0, }, { 0x10e8, 0x818e, 0xffff, 0, "Applied Micro Circuits 8 Port UART", DEFAULT_RCLK, PUC_PORT_8S, 0x14, -1, -1, .config_function = puc_config_amc }, /* * The following members of the Digi International Neo series are * based on Exar PCI chips, f. e. the 8 port variants on XR17V258IV. * Accordingly, the PCIe versions of these cards incorporate a PLX * PCIe-PCI-bridge. */ { 0x114f, 0x00b0, 0xffff, 0, "Digi Neo PCI 4 Port", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x114f, 0x00b1, 0xffff, 0, "Digi Neo PCI 8 Port", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x114f, 0x00f0, 0xffff, 0, "Digi Neo PCIe 8 Port", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x114f, 0x00f1, 0xffff, 0, "Digi Neo PCIe 4 Port", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x114f, 0x00f2, 0xffff, 0, "Digi Neo PCIe 4 Port RJ45", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x114f, 0x00f3, 0xffff, 0, "Digi Neo PCIe 8 Port RJ45", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x11fe, 0x8010, 0xffff, 0, "Comtrol RocketPort 550/8 RJ11 part A", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8011, 0xffff, 0, "Comtrol RocketPort 550/8 RJ11 part B", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8012, 0xffff, 0, "Comtrol RocketPort 550/8 Octa part A", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8013, 0xffff, 0, "Comtrol RocketPort 550/8 Octa part B", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8014, 0xffff, 0, "Comtrol RocketPort 550/4 RJ45", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8015, 0xffff, 0, "Comtrol RocketPort 550/Quad", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8016, 0xffff, 0, "Comtrol RocketPort 550/16 part A", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8017, 0xffff, 0, "Comtrol RocketPort 550/16 part B", DEFAULT_RCLK * 4, PUC_PORT_12S, 0x10, 0, 8, }, { 0x11fe, 0x8018, 0xffff, 0, "Comtrol RocketPort 550/8 part A", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, { 0x11fe, 0x8019, 0xffff, 0, "Comtrol RocketPort 550/8 part B", DEFAULT_RCLK * 4, PUC_PORT_4S, 0x10, 0, 8, }, /* * IBM SurePOS 300 Series (481033H) serial ports * Details can be found on the IBM RSS websites */ { 0x1014, 0x0297, 0xffff, 0, "IBM SurePOS 300 Series (481033H) serial ports", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0 }, /* * SIIG Boards. * * SIIG provides documentation for their boards at: * */ { 0x131f, 0x1010, 0xffff, 0, "SIIG Cyber I/O PCI 16C550 (10x family)", DEFAULT_RCLK, PUC_PORT_1S1P, 0x18, 4, 0, }, { 0x131f, 0x1011, 0xffff, 0, "SIIG Cyber I/O PCI 16C650 (10x family)", DEFAULT_RCLK, PUC_PORT_1S1P, 0x18, 4, 0, }, { 0x131f, 0x1012, 0xffff, 0, "SIIG Cyber I/O PCI 16C850 (10x family)", DEFAULT_RCLK, PUC_PORT_1S1P, 0x18, 4, 0, }, { 0x131f, 0x1021, 0xffff, 0, "SIIG Cyber Parallel Dual PCI (10x family)", 0, PUC_PORT_2P, 0x18, 8, 0, }, { 0x131f, 0x1030, 0xffff, 0, "SIIG Cyber Serial Dual PCI 16C550 (10x family)", DEFAULT_RCLK, PUC_PORT_2S, 0x18, 4, 0, }, { 0x131f, 0x1031, 0xffff, 0, "SIIG Cyber Serial Dual PCI 16C650 (10x family)", DEFAULT_RCLK, PUC_PORT_2S, 0x18, 4, 0, }, { 0x131f, 0x1032, 0xffff, 0, "SIIG Cyber Serial Dual PCI 16C850 (10x family)", DEFAULT_RCLK, PUC_PORT_2S, 0x18, 4, 0, }, { 0x131f, 0x1034, 0xffff, 0, /* XXX really? */ "SIIG Cyber 2S1P PCI 16C550 (10x family)", DEFAULT_RCLK, PUC_PORT_2S1P, 0x18, 4, 0, }, { 0x131f, 0x1035, 0xffff, 0, /* XXX really? */ "SIIG Cyber 2S1P PCI 16C650 (10x family)", DEFAULT_RCLK, PUC_PORT_2S1P, 0x18, 4, 0, }, { 0x131f, 0x1036, 0xffff, 0, /* XXX really? */ "SIIG Cyber 2S1P PCI 16C850 (10x family)", DEFAULT_RCLK, PUC_PORT_2S1P, 0x18, 4, 0, }, { 0x131f, 0x1050, 0xffff, 0, "SIIG Cyber 4S PCI 16C550 (10x family)", DEFAULT_RCLK, PUC_PORT_4S, 0x18, 4, 0, }, { 0x131f, 0x1051, 0xffff, 0, "SIIG Cyber 4S PCI 16C650 (10x family)", DEFAULT_RCLK, PUC_PORT_4S, 0x18, 4, 0, }, { 0x131f, 0x1052, 0xffff, 0, "SIIG Cyber 4S PCI 16C850 (10x family)", DEFAULT_RCLK, PUC_PORT_4S, 0x18, 4, 0, }, { 0x131f, 0x2010, 0xffff, 0, "SIIG Cyber I/O PCI 16C550 (20x family)", DEFAULT_RCLK, PUC_PORT_1S1P, 0x10, 4, 0, }, { 0x131f, 0x2011, 0xffff, 0, "SIIG Cyber I/O PCI 16C650 (20x family)", DEFAULT_RCLK, PUC_PORT_1S1P, 0x10, 4, 0, }, { 0x131f, 0x2012, 0xffff, 0, "SIIG Cyber I/O PCI 16C850 (20x family)", DEFAULT_RCLK, PUC_PORT_1S1P, 0x10, 4, 0, }, { 0x131f, 0x2021, 0xffff, 0, "SIIG Cyber Parallel Dual PCI (20x family)", 0, PUC_PORT_2P, 0x10, 8, 0, }, { 0x131f, 0x2030, 0xffff, 0, "SIIG Cyber Serial Dual PCI 16C550 (20x family)", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x131f, 0x2031, 0xffff, 0, "SIIG Cyber Serial Dual PCI 16C650 (20x family)", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x131f, 0x2032, 0xffff, 0, "SIIG Cyber Serial Dual PCI 16C850 (20x family)", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x131f, 0x2040, 0xffff, 0, "SIIG Cyber 2P1S PCI 16C550 (20x family)", DEFAULT_RCLK, PUC_PORT_1S2P, 0x10, -1, 0, .config_function = puc_config_siig }, { 0x131f, 0x2041, 0xffff, 0, "SIIG Cyber 2P1S PCI 16C650 (20x family)", DEFAULT_RCLK, PUC_PORT_1S2P, 0x10, -1, 0, .config_function = puc_config_siig }, { 0x131f, 0x2042, 0xffff, 0, "SIIG Cyber 2P1S PCI 16C850 (20x family)", DEFAULT_RCLK, PUC_PORT_1S2P, 0x10, -1, 0, .config_function = puc_config_siig }, { 0x131f, 0x2050, 0xffff, 0, "SIIG Cyber 4S PCI 16C550 (20x family)", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x131f, 0x2051, 0xffff, 0, "SIIG Cyber 4S PCI 16C650 (20x family)", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x131f, 0x2052, 0xffff, 0, "SIIG Cyber 4S PCI 16C850 (20x family)", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x131f, 0x2060, 0xffff, 0, "SIIG Cyber 2S1P PCI 16C550 (20x family)", DEFAULT_RCLK, PUC_PORT_2S1P, 0x10, 4, 0, }, { 0x131f, 0x2061, 0xffff, 0, "SIIG Cyber 2S1P PCI 16C650 (20x family)", DEFAULT_RCLK, PUC_PORT_2S1P, 0x10, 4, 0, }, { 0x131f, 0x2062, 0xffff, 0, "SIIG Cyber 2S1P PCI 16C850 (20x family)", DEFAULT_RCLK, PUC_PORT_2S1P, 0x10, 4, 0, }, { 0x131f, 0x2081, 0xffff, 0, "SIIG PS8000 8S PCI 16C650 (20x family)", DEFAULT_RCLK, PUC_PORT_8S, 0x10, -1, -1, .config_function = puc_config_siig }, { 0x135c, 0x0010, 0xffff, 0, "Quatech QSC-100", -3, /* max 8x clock rate */ PUC_PORT_4S, 0x14, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x0020, 0xffff, 0, "Quatech DSC-100", -1, /* max 2x clock rate */ PUC_PORT_2S, 0x14, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x0030, 0xffff, 0, "Quatech DSC-200/300", -1, /* max 2x clock rate */ PUC_PORT_2S, 0x14, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x0040, 0xffff, 0, "Quatech QSC-200/300", -3, /* max 8x clock rate */ PUC_PORT_4S, 0x14, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x0050, 0xffff, 0, "Quatech ESC-100D", -3, /* max 8x clock rate */ PUC_PORT_8S, 0x14, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x0060, 0xffff, 0, "Quatech ESC-100M", -3, /* max 8x clock rate */ PUC_PORT_8S, 0x14, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x0170, 0xffff, 0, "Quatech QSCLP-100", -1, /* max 2x clock rate */ PUC_PORT_4S, 0x18, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x0180, 0xffff, 0, "Quatech DSCLP-100", -1, /* max 3x clock rate */ PUC_PORT_2S, 0x18, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x01b0, 0xffff, 0, "Quatech DSCLP-200/300", -1, /* max 2x clock rate */ PUC_PORT_2S, 0x18, 0, 8, .config_function = puc_config_quatech }, { 0x135c, 0x01e0, 0xffff, 0, "Quatech ESCLP-100", -3, /* max 8x clock rate */ PUC_PORT_8S, 0x10, 0, 8, .config_function = puc_config_quatech }, { 0x1393, 0x1024, 0xffff, 0, "Moxa Technologies, Smartio CP-102E/PCIe", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x14, 0, -1, .config_function = puc_config_moxa }, { 0x1393, 0x1025, 0xffff, 0, "Moxa Technologies, Smartio CP-102EL/PCIe", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x14, 0, -1, .config_function = puc_config_moxa }, { 0x1393, 0x1040, 0xffff, 0, "Moxa Technologies, Smartio C104H/PCI", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x18, 0, 8, }, { 0x1393, 0x1041, 0xffff, 0, "Moxa Technologies, Smartio CP-104UL/PCI", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x18, 0, 8, }, { 0x1393, 0x1042, 0xffff, 0, "Moxa Technologies, Smartio CP-104JU/PCI", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x18, 0, 8, }, { 0x1393, 0x1043, 0xffff, 0, "Moxa Technologies, Smartio CP-104EL/PCIe", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x18, 0, 8, }, { 0x1393, 0x1045, 0xffff, 0, "Moxa Technologies, Smartio CP-104EL-A/PCIe", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x14, 0, -1, .config_function = puc_config_moxa }, { 0x1393, 0x1120, 0xffff, 0, "Moxa Technologies, CP-112UL", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x18, 0, 8, }, { 0x1393, 0x1141, 0xffff, 0, "Moxa Technologies, Industio CP-114", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x18, 0, 8, }, { 0x1393, 0x1144, 0xffff, 0, "Moxa Technologies, Smartio CP-114EL/PCIe", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x14, 0, -1, .config_function = puc_config_moxa }, { 0x1393, 0x1182, 0xffff, 0, "Moxa Technologies, Smartio CP-118EL-A/PCIe", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x14, 0, -1, .config_function = puc_config_moxa }, { 0x1393, 0x1680, 0xffff, 0, "Moxa Technologies, C168H/PCI", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x18, 0, 8, }, { 0x1393, 0x1681, 0xffff, 0, "Moxa Technologies, C168U/PCI", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x18, 0, 8, }, { 0x1393, 0x1682, 0xffff, 0, "Moxa Technologies, CP-168EL/PCIe", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x18, 0, 8, }, { 0x1393, 0x1683, 0xffff, 0, "Moxa Technologies, Smartio CP-168EL-A/PCIe", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x14, 0, -1, .config_function = puc_config_moxa }, { 0x13a8, 0x0152, 0xffff, 0, "Exar XR17C/D152", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x13a8, 0x0154, 0xffff, 0, "Exar XR17C154", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x13a8, 0x0158, 0xffff, 0, "Exar XR17C158", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x10, 0, -1, .config_function = puc_config_exar }, { 0x13a8, 0x0258, 0xffff, 0, "Exar XR17V258IV", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x10, 0, -1, .config_function = puc_config_exar }, /* The XR17V358 uses the 125MHz PCIe clock as its reference clock. */ { 0x13a8, 0x0358, 0xffff, 0, "Exar XR17V358", 125000000, PUC_PORT_8S, 0x10, 0, -1, .config_function = puc_config_exar_pcie }, + /* + * The Advantech PCI-1602 Rev. A use the first two ports of an Oxford + * Semiconductor OXuPCI954. Note these boards have a hardware bug in + * that they drive the RS-422/485 transmitters after power-on until a + * driver initalizes the UARTs. + */ { 0x13fe, 0x1600, 0x1602, 0x0002, - "Advantech PCI-1602", + "Advantech PCI-1602 Rev. A", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, + .config_function = puc_config_advantech }, + /* Advantech PCI-1602 Rev. B1/PCI-1603 are also based on OXuPCI952. */ + { 0x13fe, 0xa102, 0x13fe, 0xa102, + "Advantech 2-port PCI (PCI-1602 Rev. B1/PCI-1603)", + DEFAULT_RCLK * 8, + PUC_PORT_2S, 0x10, 4, 0, + .config_function = puc_config_advantech + }, + { 0x1407, 0x0100, 0xffff, 0, "Lava Computers Dual Serial", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x1407, 0x0101, 0xffff, 0, "Lava Computers Quatro A", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x1407, 0x0102, 0xffff, 0, "Lava Computers Quatro B", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x1407, 0x0120, 0xffff, 0, "Lava Computers Quattro-PCI A", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x1407, 0x0121, 0xffff, 0, "Lava Computers Quattro-PCI B", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x1407, 0x0180, 0xffff, 0, "Lava Computers Octo A", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x1407, 0x0181, 0xffff, 0, "Lava Computers Octo B", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x1409, 0x7268, 0xffff, 0, "Sunix SUN1888", 0, PUC_PORT_2P, 0x10, 0, 8, }, { 0x1409, 0x7168, 0xffff, 0, NULL, DEFAULT_RCLK * 8, PUC_PORT_NONSTANDARD, 0x10, -1, -1, .config_function = puc_config_timedia }, /* * Boards with an Oxford Semiconductor chip. * * Oxford Semiconductor provides documentation for their chip at: * * * As sold by Kouwell . * I/O Flex PCI I/O Card Model-223 with 4 serial and 1 parallel ports. */ { 0x1415, 0x9501, 0x10fc, 0xc070, "I-O DATA RSA-PCI2/R", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, }, { 0x1415, 0x9501, 0x131f, 0x2050, "SIIG Cyber 4 PCI 16550", DEFAULT_RCLK * 10, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1415, 0x9501, 0x131f, 0x2051, "SIIG Cyber 4S PCI 16C650 (20x family)", DEFAULT_RCLK * 10, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1415, 0x9501, 0x131f, 0x2052, "SIIG Quartet Serial 850", DEFAULT_RCLK * 10, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1415, 0x9501, 0x14db, 0x2150, "Kuroutoshikou SERIAL4P-LPPCI2", DEFAULT_RCLK * 10, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1415, 0x9501, 0xffff, 0, "Oxford Semiconductor OX16PCI954 UARTs", 0, PUC_PORT_4S, 0x10, 0, 8, .config_function = puc_config_oxford_pci954 }, { 0x1415, 0x950a, 0x131f, 0x2030, "SIIG Cyber 2S PCIe", DEFAULT_RCLK * 10, PUC_PORT_2S, 0x10, 0, 8, }, { 0x1415, 0x950a, 0x131f, 0x2032, "SIIG Cyber Serial Dual PCI 16C850", DEFAULT_RCLK * 10, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1415, 0x950a, 0x131f, 0x2061, "SIIG Cyber 2SP1 PCIe", DEFAULT_RCLK * 10, PUC_PORT_2S, 0x10, 0, 8, }, { 0x1415, 0x950a, 0xffff, 0, "Oxford Semiconductor OX16PCI954 UARTs", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1415, 0x9511, 0xffff, 0, "Oxford Semiconductor OX9160/OX16PCI954 UARTs (function 1)", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1415, 0x9521, 0xffff, 0, "Oxford Semiconductor OX16PCI952 UARTs", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x1415, 0x9538, 0xffff, 0, "Oxford Semiconductor OX16PCI958 UARTs", DEFAULT_RCLK, PUC_PORT_8S, 0x18, 0, 8, }, /* * Perle boards use Oxford Semiconductor chips, but they store the * Oxford Semiconductor device ID as a subvendor device ID and use * their own device IDs. */ { 0x155f, 0x0331, 0xffff, 0, "Perle Ultraport4 Express", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, { 0x155f, 0xB012, 0xffff, 0, "Perle Speed2 LE", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, }, { 0x155f, 0xB022, 0xffff, 0, "Perle Speed2 LE", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, }, { 0x155f, 0xB004, 0xffff, 0, "Perle Speed4 LE", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, { 0x155f, 0xB008, 0xffff, 0, "Perle Speed8 LE", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x10, 0, 8, }, /* * Oxford Semiconductor PCI Express Expresso family * * Found in many 'native' PCI Express serial boards such as: * * eMegatech MP954ER4 (4 port) and MP958ER8 (8 port) * * * Lindy 51189 (4 port) * * * StarTech.com PEX4S952 (4 port) and PEX8S952 (8 port) * */ { 0x1415, 0xc11b, 0xffff, 0, "Oxford Semiconductor OXPCIe952 1S1P", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x1415, 0xc138, 0xffff, 0, "Oxford Semiconductor OXPCIe952 UARTs", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x1415, 0xc158, 0xffff, 0, "Oxford Semiconductor OXPCIe952 UARTs", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x1415, 0xc15d, 0xffff, 0, "Oxford Semiconductor OXPCIe952 UARTs (function 1)", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x1415, 0xc208, 0xffff, 0, "Oxford Semiconductor OXPCIe954 UARTs", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x1415, 0xc20d, 0xffff, 0, "Oxford Semiconductor OXPCIe954 UARTs (function 1)", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x1415, 0xc308, 0xffff, 0, "Oxford Semiconductor OXPCIe958 UARTs", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x1415, 0xc30d, 0xffff, 0, "Oxford Semiconductor OXPCIe958 UARTs (function 1)", DEFAULT_RCLK * 0x22, PUC_PORT_NONSTANDARD, 0x10, 0, -1, .config_function = puc_config_oxford_pcie }, { 0x14d2, 0x8010, 0xffff, 0, "VScom PCI-100L", DEFAULT_RCLK * 8, PUC_PORT_1S, 0x14, 0, 0, }, { 0x14d2, 0x8020, 0xffff, 0, "VScom PCI-200L", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x14, 4, 0, }, { 0x14d2, 0x8028, 0xffff, 0, "VScom 200Li", DEFAULT_RCLK, PUC_PORT_2S, 0x20, 0, 8, }, /* * VScom (Titan?) PCI-800L. More modern variant of the * PCI-800. Uses 6 discrete 16550 UARTs, plus another * two of them obviously implemented as macro cells in * the ASIC. This causes the weird port access pattern * below, where two of the IO port ranges each access * one of the ASIC UARTs, and a block of IO addresses * access the external UARTs. */ { 0x14d2, 0x8080, 0xffff, 0, "Titan VScom PCI-800L", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x14, -1, -1, .config_function = puc_config_titan }, /* * VScom PCI-800H. Uses 8 16950 UART, behind a PCI chips that offers * 4 com port on PCI device 0 and 4 on PCI device 1. PCI device 0 has * device ID 3 and PCI device 1 device ID 4. */ { 0x14d2, 0xa003, 0xffff, 0, "Titan PCI-800H", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, { 0x14d2, 0xa004, 0xffff, 0, "Titan PCI-800H", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, { 0x14d2, 0xa005, 0xffff, 0, "Titan PCI-200H", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, }, { 0x14d2, 0xe020, 0xffff, 0, "Titan VScom PCI-200HV2", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 4, 0, }, { 0x14d2, 0xa007, 0xffff, 0, "Titan VScom PCIex-800H", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, { 0x14d2, 0xa008, 0xffff, 0, "Titan VScom PCIex-800H", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, { 0x14db, 0x2130, 0xffff, 0, "Avlab Technology, PCI IO 2S", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x14db, 0x2150, 0xffff, 0, "Avlab Low Profile PCI 4 Serial", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x14db, 0x2152, 0xffff, 0, "Avlab Low Profile PCI 4 Serial", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x1592, 0x0781, 0xffff, 0, "Syba Tech Ltd. PCI-4S2P-550-ECP", DEFAULT_RCLK, PUC_PORT_4S1P, 0x10, 0, -1, .config_function = puc_config_syba }, { 0x1fd4, 0x1999, 0x1fd4, 0x0002, "Sunix SER5xxxx 2-port serial", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, }, { 0x1fd4, 0x1999, 0x1fd4, 0x0004, "Sunix SER5xxxx 4-port serial", DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, { 0x1fd4, 0x1999, 0x1fd4, 0x0008, "Sunix SER5xxxx 8-port serial", DEFAULT_RCLK * 8, PUC_PORT_8S, -1, -1, -1, .config_function = puc_config_sunix }, { 0x1fd4, 0x1999, 0x1fd4, 0x0101, "Sunix MIO5xxxx 1-port serial and 1284 Printer port", DEFAULT_RCLK * 8, PUC_PORT_1S1P, -1, -1, -1, .config_function = puc_config_sunix }, { 0x1fd4, 0x1999, 0x1fd4, 0x0102, "Sunix MIO5xxxx 2-port serial and 1284 Printer port", DEFAULT_RCLK * 8, PUC_PORT_2S1P, -1, -1, -1, .config_function = puc_config_sunix }, { 0x1fd4, 0x1999, 0x1fd4, 0x0104, "Sunix MIO5xxxx 4-port serial and 1284 Printer port", DEFAULT_RCLK * 8, PUC_PORT_4S1P, -1, -1, -1, .config_function = puc_config_sunix }, { 0x5372, 0x6872, 0xffff, 0, "Feasso PCI FPP-02 2S1P", DEFAULT_RCLK, PUC_PORT_2S1P, 0x10, 4, 0, }, { 0x5372, 0x6873, 0xffff, 0, "Sun 1040 PCI Quad Serial", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x6666, 0x0001, 0xffff, 0, "Decision Computer Inc, PCCOM 4-port serial", DEFAULT_RCLK, PUC_PORT_4S, 0x1c, 0, 8, }, { 0x6666, 0x0002, 0xffff, 0, "Decision Computer Inc, PCCOM 8-port serial", DEFAULT_RCLK, PUC_PORT_8S, 0x1c, 0, 8, }, { 0x6666, 0x0004, 0xffff, 0, "PCCOM dual port RS232/422/485", DEFAULT_RCLK, PUC_PORT_2S, 0x1c, 0, 8, }, { 0x9710, 0x9815, 0xffff, 0, "NetMos NM9815 Dual 1284 Printer port", 0, PUC_PORT_2P, 0x10, 8, 0, }, /* * This is more specific than the generic NM9835 entry, and is placed * here to _prevent_ puc(4) from claiming this single port card. * * uart(4) will claim this device. */ { 0x9710, 0x9835, 0x1000, 1, "NetMos NM9835 based 1-port serial", DEFAULT_RCLK, PUC_PORT_1S, 0x10, 4, 0, }, { 0x9710, 0x9835, 0x1000, 2, "NetMos NM9835 based 2-port serial", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x9710, 0x9835, 0xffff, 0, "NetMos NM9835 Dual UART and 1284 Printer port", DEFAULT_RCLK, PUC_PORT_2S1P, 0x10, 4, 0, }, { 0x9710, 0x9845, 0x1000, 0x0006, "NetMos NM9845 6 Port UART", DEFAULT_RCLK, PUC_PORT_6S, 0x10, 4, 0, }, { 0x9710, 0x9845, 0xffff, 0, "NetMos NM9845 Quad UART and 1284 Printer port", DEFAULT_RCLK, PUC_PORT_4S1P, 0x10, 4, 0, }, { 0x9710, 0x9865, 0xa000, 0x3002, "NetMos NM9865 Dual UART", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 4, 0, }, { 0x9710, 0x9865, 0xa000, 0x3003, "NetMos NM9865 Triple UART", DEFAULT_RCLK, PUC_PORT_3S, 0x10, 4, 0, }, { 0x9710, 0x9865, 0xa000, 0x3004, "NetMos NM9865 Quad UART", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 4, 0, }, { 0x9710, 0x9865, 0xa000, 0x3011, "NetMos NM9865 Single UART and 1284 Printer port", DEFAULT_RCLK, PUC_PORT_1S1P, 0x10, 4, 0, }, { 0x9710, 0x9865, 0xa000, 0x3012, "NetMos NM9865 Dual UART and 1284 Printer port", DEFAULT_RCLK, PUC_PORT_2S1P, 0x10, 4, 0, }, { 0x9710, 0x9865, 0xa000, 0x3020, "NetMos NM9865 Dual 1284 Printer port", DEFAULT_RCLK, PUC_PORT_2P, 0x10, 4, 0, }, { 0xb00c, 0x021c, 0xffff, 0, "IC Book Labs Gunboat x4 Lite", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 0, 8, .config_function = puc_config_icbook }, { 0xb00c, 0x031c, 0xffff, 0, "IC Book Labs Gunboat x4 Pro", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 0, 8, .config_function = puc_config_icbook }, { 0xb00c, 0x041c, 0xffff, 0, "IC Book Labs Ironclad x8 Lite", DEFAULT_RCLK, PUC_PORT_8S, 0x10, 0, 8, .config_function = puc_config_icbook }, { 0xb00c, 0x051c, 0xffff, 0, "IC Book Labs Ironclad x8 Pro", DEFAULT_RCLK, PUC_PORT_8S, 0x10, 0, 8, .config_function = puc_config_icbook }, { 0xb00c, 0x081c, 0xffff, 0, "IC Book Labs Dreadnought x16 Pro", DEFAULT_RCLK * 8, PUC_PORT_16S, 0x10, 0, 8, .config_function = puc_config_icbook }, { 0xb00c, 0x091c, 0xffff, 0, "IC Book Labs Dreadnought x16 Lite", DEFAULT_RCLK, PUC_PORT_16S, 0x10, 0, 8, .config_function = puc_config_icbook }, { 0xb00c, 0x0a1c, 0xffff, 0, "IC Book Labs Gunboat x2 Low Profile", DEFAULT_RCLK, PUC_PORT_2S, 0x10, 0, 8, }, { 0xb00c, 0x0b1c, 0xffff, 0, "IC Book Labs Gunboat x4 Low Profile", DEFAULT_RCLK, PUC_PORT_4S, 0x10, 0, 8, .config_function = puc_config_icbook }, { 0xffff, 0, 0xffff, 0, NULL, 0 } }; static int +puc_config_advantech(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, + intptr_t *res __unused) +{ + const struct puc_cfg *cfg; + struct resource *cres; + struct puc_bar *bar; + device_t cdev, dev; + bus_size_t off; + int base, crtype, fixed, high, i, oxpcie; + uint8_t acr, func, mask; + + if (cmd != PUC_CFG_SETUP) + return (ENXIO); + + base = fixed = oxpcie = 0; + crtype = SYS_RES_IOPORT; + acr = mask = 0x0; + func = high = 1; + off = 0x60; + + cfg = sc->sc_cfg; + switch (cfg->subvendor) { + case 0x13fe: + switch (cfg->device) { + case 0xa102: + high = 0; + break; + default: + break; + } + default: + break; + } + if (fixed == 1) + goto setup; + + dev = sc->sc_dev; + cdev = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev), + pci_get_slot(dev), func); + if (cdev == NULL) { + device_printf(dev, "could not find config function\n"); + return (ENXIO); + } + + i = PCIR_BAR(0); + cres = bus_alloc_resource_any(cdev, crtype, &i, RF_ACTIVE); + if (cres == NULL) { + device_printf(dev, "could not allocate config resource\n"); + return (ENXIO); + } + + if (oxpcie == 0) { + mask = bus_read_1(cres, off); + if (pci_get_function(dev) == 1) + base = 4; + } + + setup: + for (i = 0; i < sc->sc_nports; ++i) { + device_printf(dev, "port %d: ", i); + bar = puc_get_bar(sc, cfg->rid + i * cfg->d_rid); + if (bar == NULL) { + printf("could not get BAR\n"); + continue; + } + + if (fixed == 0) { + if ((mask & (1 << (base + i))) == 0) { + acr = 0; + printf("RS-232\n"); + } else { + acr = (high == 1 ? 0x18 : 0x10); + printf("RS-422/RS-485, active-%s auto-DTR\n", + high == 1 ? "high" : "low"); + } + } + + bus_write_1(bar->b_res, REG_SPR, REG_ACR); + bus_write_1(bar->b_res, REG_ICR, acr); + } + + bus_release_resource(cdev, crtype, rman_get_rid(cres), cres); + return (0); +} + +static int puc_config_amc(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd, int port, intptr_t *res) { switch (cmd) { case PUC_CFG_GET_OFS: *res = 8 * (port & 1); return (0); case PUC_CFG_GET_RID: *res = 0x14 + (port >> 1) * 4; return (0); default: break; } return (ENXIO); } static int puc_config_diva(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { const struct puc_cfg *cfg = sc->sc_cfg; if (cmd == PUC_CFG_GET_OFS) { if (cfg->subdevice == 0x1282) /* Everest SP */ port <<= 1; else if (cfg->subdevice == 0x104b) /* Maestro SP2 */ port = (port == 3) ? 4 : port; *res = port * 8 + ((port > 2) ? 0x18 : 0); return (0); } return (ENXIO); } static int puc_config_exar(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd, int port, intptr_t *res) { if (cmd == PUC_CFG_GET_OFS) { *res = port * 0x200; return (0); } return (ENXIO); } static int puc_config_exar_pcie(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd, int port, intptr_t *res) { if (cmd == PUC_CFG_GET_OFS) { *res = port * 0x400; return (0); } return (ENXIO); } static int puc_config_icbook(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd, int port __unused, intptr_t *res) { if (cmd == PUC_CFG_GET_ILR) { *res = PUC_ILR_DIGI; return (0); } return (ENXIO); } static int puc_config_moxa(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { const struct puc_cfg *cfg = sc->sc_cfg; if (cmd == PUC_CFG_GET_OFS) { if (port == 3 && (cfg->device == 0x1045 || cfg->device == 0x1144)) port = 7; *res = port * 0x200; return 0; } return (ENXIO); } static int puc_config_quatech(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port __unused, intptr_t *res) { const struct puc_cfg *cfg = sc->sc_cfg; struct puc_bar *bar; uint8_t v0, v1; switch (cmd) { case PUC_CFG_SETUP: /* * Check if the scratchpad register is enabled or if the * interrupt status and options registers are active. */ bar = puc_get_bar(sc, cfg->rid); if (bar == NULL) return (ENXIO); - /* Set DLAB in the LCR register of UART 0. */ - bus_write_1(bar->b_res, 3, 0x80); - /* Write 0 to the SPR register of UART 0. */ - bus_write_1(bar->b_res, 7, 0); - /* Read back the contents of the SPR register of UART 0. */ - v0 = bus_read_1(bar->b_res, 7); - /* Write a specific value to the SPR register of UART 0. */ - bus_write_1(bar->b_res, 7, 0x80 + -cfg->clock); - /* Read back the contents of the SPR register of UART 0. */ - v1 = bus_read_1(bar->b_res, 7); - /* Clear DLAB in the LCR register of UART 0. */ - bus_write_1(bar->b_res, 3, 0); - /* Save the two values read-back from the SPR register. */ + bus_write_1(bar->b_res, REG_LCR, LCR_DLAB); + bus_write_1(bar->b_res, REG_SPR, 0); + v0 = bus_read_1(bar->b_res, REG_SPR); + bus_write_1(bar->b_res, REG_SPR, 0x80 + -cfg->clock); + v1 = bus_read_1(bar->b_res, REG_SPR); + bus_write_1(bar->b_res, REG_LCR, 0); sc->sc_cfg_data = (v0 << 8) | v1; if (v0 == 0 && v1 == 0x80 + -cfg->clock) { /* * The SPR register echoed the two values written - * by us. This means that the SPAD jumper is set. + * by us. This means that the SPAD jumper is set. */ device_printf(sc->sc_dev, "warning: extra features " "not usable -- SPAD compatibility enabled\n"); return (0); } if (v0 != 0) { /* - * The first value doesn't match. This can only mean + * The first value doesn't match. This can only mean * that the SPAD jumper is not set and that a non- * standard fixed clock multiplier jumper is set. */ if (bootverbose) device_printf(sc->sc_dev, "fixed clock rate " "multiplier of %d\n", 1 << v0); if (v0 < -cfg->clock) device_printf(sc->sc_dev, "warning: " "suboptimal fixed clock rate multiplier " "setting\n"); return (0); } /* - * The first value matched, but the second didn't. We know - * that the SPAD jumper is not set. We also know that the + * The first value matched, but the second didn't. We know + * that the SPAD jumper is not set. We also know that the * clock rate multiplier is software controlled *and* that * we just programmed it to the maximum allowed. */ if (bootverbose) device_printf(sc->sc_dev, "clock rate multiplier of " "%d selected\n", 1 << -cfg->clock); return (0); case PUC_CFG_GET_CLOCK: v0 = (sc->sc_cfg_data >> 8) & 0xff; v1 = sc->sc_cfg_data & 0xff; if (v0 == 0 && v1 == 0x80 + -cfg->clock) { /* * XXX With the SPAD jumper applied, there's no * easy way of knowing if there's also a clock - * rate multiplier jumper installed. Let's hope - * not... + * rate multiplier jumper installed. Let's hope + * not ... */ *res = DEFAULT_RCLK; } else if (v0 == 0) { /* * No clock rate multiplier jumper installed, * so we programmed the board with the maximum * multiplier allowed as given to us in the * clock field of the config record (negated). */ *res = DEFAULT_RCLK << -cfg->clock; } else *res = DEFAULT_RCLK << v0; return (0); case PUC_CFG_GET_ILR: v0 = (sc->sc_cfg_data >> 8) & 0xff; v1 = sc->sc_cfg_data & 0xff; *res = (v0 == 0 && v1 == 0x80 + -cfg->clock) ? PUC_ILR_NONE : PUC_ILR_QUATECH; return (0); default: break; } return (ENXIO); } static int puc_config_syba(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { static int base[] = { 0x251, 0x3f0, 0 }; const struct puc_cfg *cfg = sc->sc_cfg; struct puc_bar *bar; int efir, idx, ofs; uint8_t v; switch (cmd) { case PUC_CFG_SETUP: bar = puc_get_bar(sc, cfg->rid); if (bar == NULL) return (ENXIO); /* configure both W83877TFs */ bus_write_1(bar->b_res, 0x250, 0x89); bus_write_1(bar->b_res, 0x3f0, 0x87); bus_write_1(bar->b_res, 0x3f0, 0x87); idx = 0; while (base[idx] != 0) { efir = base[idx]; bus_write_1(bar->b_res, efir, 0x09); v = bus_read_1(bar->b_res, efir + 1); if ((v & 0x0f) != 0x0c) return (ENXIO); bus_write_1(bar->b_res, efir, 0x16); v = bus_read_1(bar->b_res, efir + 1); bus_write_1(bar->b_res, efir, 0x16); bus_write_1(bar->b_res, efir + 1, v | 0x04); bus_write_1(bar->b_res, efir, 0x16); bus_write_1(bar->b_res, efir + 1, v & ~0x04); ofs = base[idx] & 0x300; bus_write_1(bar->b_res, efir, 0x23); bus_write_1(bar->b_res, efir + 1, (ofs + 0x78) >> 2); bus_write_1(bar->b_res, efir, 0x24); bus_write_1(bar->b_res, efir + 1, (ofs + 0xf8) >> 2); bus_write_1(bar->b_res, efir, 0x25); bus_write_1(bar->b_res, efir + 1, (ofs + 0xe8) >> 2); bus_write_1(bar->b_res, efir, 0x17); bus_write_1(bar->b_res, efir + 1, 0x03); bus_write_1(bar->b_res, efir, 0x28); bus_write_1(bar->b_res, efir + 1, 0x43); idx++; } bus_write_1(bar->b_res, 0x250, 0xaa); bus_write_1(bar->b_res, 0x3f0, 0xaa); return (0); case PUC_CFG_GET_OFS: switch (port) { case 0: *res = 0x2f8; return (0); case 1: *res = 0x2e8; return (0); case 2: *res = 0x3f8; return (0); case 3: *res = 0x3e8; return (0); case 4: *res = 0x278; return (0); } break; default: break; } return (ENXIO); } static int puc_config_siig(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { const struct puc_cfg *cfg = sc->sc_cfg; switch (cmd) { case PUC_CFG_GET_OFS: if (cfg->ports == PUC_PORT_8S) { *res = (port > 4) ? 8 * (port - 4) : 0; return (0); } break; case PUC_CFG_GET_RID: if (cfg->ports == PUC_PORT_8S) { *res = 0x10 + ((port > 4) ? 0x10 : 4 * port); return (0); } if (cfg->ports == PUC_PORT_2S1P) { switch (port) { case 0: *res = 0x10; return (0); case 1: *res = 0x14; return (0); case 2: *res = 0x1c; return (0); } } break; default: break; } return (ENXIO); } static int puc_config_timedia(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { static const uint16_t dual[] = { 0x0002, 0x4036, 0x4037, 0x4038, 0x4078, 0x4079, 0x4085, 0x4088, 0x4089, 0x5037, 0x5078, 0x5079, 0x5085, 0x6079, 0x7079, 0x8079, 0x8137, 0x8138, 0x8237, 0x8238, 0x9079, 0x9137, 0x9138, 0x9237, 0x9238, 0xA079, 0xB079, 0xC079, 0xD079, 0 }; static const uint16_t quad[] = { 0x4055, 0x4056, 0x4095, 0x4096, 0x5056, 0x8156, 0x8157, 0x8256, 0x8257, 0x9056, 0x9156, 0x9157, 0x9158, 0x9159, 0x9256, 0x9257, 0xA056, 0xA157, 0xA158, 0xA159, 0xB056, 0xB157, 0 }; static const uint16_t octa[] = { 0x4065, 0x4066, 0x5065, 0x5066, 0x8166, 0x9066, 0x9166, 0x9167, 0x9168, 0xA066, 0xA167, 0xA168, 0 }; static const struct { int ports; const uint16_t *ids; } subdevs[] = { { 2, dual }, { 4, quad }, { 8, octa }, { 0, NULL } }; static char desc[64]; int dev, id; uint16_t subdev; switch (cmd) { case PUC_CFG_GET_CLOCK: if (port < 2) *res = DEFAULT_RCLK * 8; else *res = DEFAULT_RCLK; return (0); case PUC_CFG_GET_DESC: snprintf(desc, sizeof(desc), "Timedia technology %d Port Serial", (int)sc->sc_cfg_data); *res = (intptr_t)desc; return (0); case PUC_CFG_GET_NPORTS: subdev = pci_get_subdevice(sc->sc_dev); dev = 0; while (subdevs[dev].ports != 0) { id = 0; while (subdevs[dev].ids[id] != 0) { if (subdev == subdevs[dev].ids[id]) { sc->sc_cfg_data = subdevs[dev].ports; *res = sc->sc_cfg_data; return (0); } id++; } dev++; } return (ENXIO); case PUC_CFG_GET_OFS: *res = (port == 1 || port == 3) ? 8 : 0; return (0); case PUC_CFG_GET_RID: *res = 0x10 + ((port > 3) ? port - 2 : port >> 1) * 4; return (0); case PUC_CFG_GET_TYPE: *res = PUC_TYPE_SERIAL; return (0); default: break; } return (ENXIO); } static int puc_config_oxford_pci954(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port __unused, intptr_t *res) { switch (cmd) { case PUC_CFG_GET_CLOCK: /* * OXu16PCI954 use a 14.7456 MHz clock by default while * OX16PCI954 and OXm16PCI954 employ a 1.8432 MHz one. */ if (pci_get_revid(sc->sc_dev) == 1) *res = DEFAULT_RCLK * 8; else *res = DEFAULT_RCLK; return (0); default: break; } return (ENXIO); } static int puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { const struct puc_cfg *cfg = sc->sc_cfg; int idx; struct puc_bar *bar; uint8_t value; switch (cmd) { case PUC_CFG_SETUP: device_printf(sc->sc_dev, "%d UARTs detected\n", sc->sc_nports); /* Set UARTs to enhanced mode */ bar = puc_get_bar(sc, cfg->rid); if (bar == NULL) return (ENXIO); for (idx = 0; idx < sc->sc_nports; idx++) { value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) + 0x92); bus_write_1(bar->b_res, 0x1000 + (idx << 9) + 0x92, value | 0x10); } return (0); case PUC_CFG_GET_LEN: *res = 0x200; return (0); case PUC_CFG_GET_NPORTS: /* * Check if we are being called from puc_bfe_attach() - * or puc_bfe_probe(). If puc_bfe_probe(), we cannot - * puc_get_bar(), so we return a value of 16. This has cosmetic - * side-effects at worst; in PUC_CFG_GET_DESC, - * (int)sc->sc_cfg_data will not contain the true number of - * ports in PUC_CFG_GET_DESC, but we are not implementing that - * call for this device family anyway. + * or puc_bfe_probe(). If puc_bfe_probe(), we cannot + * puc_get_bar(), so we return a value of 16. This has + * cosmetic side-effects at worst; in PUC_CFG_GET_DESC, + * sc->sc_cfg_data will not contain the true number of + * ports in PUC_CFG_GET_DESC, but we are not implementing + * that call for this device family anyway. * - * The check is for initialisation of sc->sc_bar[idx], which is - * only done in puc_bfe_attach(). + * The check is for initialization of sc->sc_bar[idx], + * which is only done in puc_bfe_attach(). */ idx = 0; do { if (sc->sc_bar[idx++].b_rid != -1) { sc->sc_cfg_data = 16; *res = sc->sc_cfg_data; return (0); } } while (idx < PUC_PCI_BARS); bar = puc_get_bar(sc, cfg->rid); if (bar == NULL) return (ENXIO); value = bus_read_1(bar->b_res, 0x04); if (value == 0) return (ENXIO); sc->sc_cfg_data = value; *res = sc->sc_cfg_data; return (0); case PUC_CFG_GET_OFS: *res = 0x1000 + (port << 9); return (0); case PUC_CFG_GET_TYPE: *res = PUC_TYPE_SERIAL; return (0); default: break; } return (ENXIO); } static int puc_config_sunix(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { int error; switch (cmd) { case PUC_CFG_GET_OFS: error = puc_config(sc, PUC_CFG_GET_TYPE, port, res); if (error != 0) return (error); *res = (*res == PUC_TYPE_SERIAL) ? (port & 3) * 8 : 0; return (0); case PUC_CFG_GET_RID: error = puc_config(sc, PUC_CFG_GET_TYPE, port, res); if (error != 0) return (error); *res = (*res == PUC_TYPE_SERIAL && port <= 3) ? 0x10 : 0x14; return (0); default: break; } return (ENXIO); } static int puc_config_titan(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd, int port, intptr_t *res) { switch (cmd) { case PUC_CFG_GET_OFS: *res = (port < 3) ? 0 : (port - 2) << 3; return (0); case PUC_CFG_GET_RID: *res = 0x14 + ((port >= 2) ? 0x0c : port << 2); return (0); default: break; } return (ENXIO); } Index: projects/clang380-import/sys/dev/rtwn/if_rtwn.c =================================================================== --- projects/clang380-import/sys/dev/rtwn/if_rtwn.c (revision 293686) +++ projects/clang380-import/sys/dev/rtwn/if_rtwn.c (revision 293687) @@ -1,3490 +1,3490 @@ /* $OpenBSD: if_rtwn.c,v 1.6 2015/08/28 00:03:53 deraadt Exp $ */ /*- * Copyright (c) 2010 Damien Bergamini * Copyright (c) 2015 Stefan Sperling * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for Realtek RTL8188CE */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define RTWN_DEBUG #ifdef RTWN_DEBUG #define DPRINTF(x) do { if (sc->sc_debug > 0) printf x; } while (0) #define DPRINTFN(n, x) do { if (sc->sc_debug >= (n)) printf x; } while (0) #else #define DPRINTF(x) #define DPRINTFN(n, x) #endif /* * PCI configuration space registers. */ #define RTWN_PCI_IOBA 0x10 /* i/o mapped base */ #define RTWN_PCI_MMBA 0x18 /* memory mapped base */ #define RTWN_INT_ENABLE (R92C_IMR_ROK | R92C_IMR_VODOK | R92C_IMR_VIDOK | \ R92C_IMR_BEDOK | R92C_IMR_BKDOK | R92C_IMR_MGNTDOK | \ R92C_IMR_HIGHDOK | R92C_IMR_BDOK | R92C_IMR_RDU | \ R92C_IMR_RXFOVW) struct rtwn_ident { uint16_t vendor; uint16_t device; const char *name; }; static const struct rtwn_ident rtwn_ident_table[] = { { 0x10ec, 0x8176, "Realtek RTL8188CE" }, { 0, 0, NULL } }; static void rtwn_dma_map_addr(void *, bus_dma_segment_t *, int, int); static void rtwn_setup_rx_desc(struct rtwn_softc *, struct r92c_rx_desc *, bus_addr_t, size_t, int); static int rtwn_alloc_rx_list(struct rtwn_softc *); static void rtwn_reset_rx_list(struct rtwn_softc *); static void rtwn_free_rx_list(struct rtwn_softc *); static int rtwn_alloc_tx_list(struct rtwn_softc *, int); static void rtwn_reset_tx_list(struct rtwn_softc *, int); static void rtwn_free_tx_list(struct rtwn_softc *, int); static struct ieee80211vap *rtwn_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void rtwn_vap_delete(struct ieee80211vap *); static void rtwn_write_1(struct rtwn_softc *, uint16_t, uint8_t); static void rtwn_write_2(struct rtwn_softc *, uint16_t, uint16_t); static void rtwn_write_4(struct rtwn_softc *, uint16_t, uint32_t); static uint8_t rtwn_read_1(struct rtwn_softc *, uint16_t); static uint16_t rtwn_read_2(struct rtwn_softc *, uint16_t); static uint32_t rtwn_read_4(struct rtwn_softc *, uint16_t); static int rtwn_fw_cmd(struct rtwn_softc *, uint8_t, const void *, int); static void rtwn_rf_write(struct rtwn_softc *, int, uint8_t, uint32_t); static uint32_t rtwn_rf_read(struct rtwn_softc *, int, uint8_t); static int rtwn_llt_write(struct rtwn_softc *, uint32_t, uint32_t); static uint8_t rtwn_efuse_read_1(struct rtwn_softc *, uint16_t); static void rtwn_efuse_read(struct rtwn_softc *); static int rtwn_read_chipid(struct rtwn_softc *); static void rtwn_read_rom(struct rtwn_softc *); static int rtwn_ra_init(struct rtwn_softc *); static void rtwn_tsf_sync_enable(struct rtwn_softc *); static void rtwn_set_led(struct rtwn_softc *, int, int); static void rtwn_calib_to(void *); static int rtwn_newstate(struct ieee80211vap *, enum ieee80211_state, int); static int rtwn_updateedca(struct ieee80211com *); static void rtwn_update_avgrssi(struct rtwn_softc *, int, int8_t); static int8_t rtwn_get_rssi(struct rtwn_softc *, int, void *); static void rtwn_rx_frame(struct rtwn_softc *, struct r92c_rx_desc *, struct rtwn_rx_data *, int); static int rtwn_tx(struct rtwn_softc *, struct mbuf *, struct ieee80211_node *); static void rtwn_tx_done(struct rtwn_softc *, int); static int rtwn_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static int rtwn_transmit(struct ieee80211com *, struct mbuf *); static void rtwn_parent(struct ieee80211com *); static void rtwn_start(struct rtwn_softc *sc); static void rtwn_watchdog(void *); static int rtwn_power_on(struct rtwn_softc *); static int rtwn_llt_init(struct rtwn_softc *); static void rtwn_fw_reset(struct rtwn_softc *); static void rtwn_fw_loadpage(struct rtwn_softc *, int, const uint8_t *, int); static int rtwn_load_firmware(struct rtwn_softc *); static int rtwn_dma_init(struct rtwn_softc *); static void rtwn_mac_init(struct rtwn_softc *); static void rtwn_bb_init(struct rtwn_softc *); static void rtwn_rf_init(struct rtwn_softc *); static void rtwn_cam_init(struct rtwn_softc *); static void rtwn_pa_bias_init(struct rtwn_softc *); static void rtwn_rxfilter_init(struct rtwn_softc *); static void rtwn_edca_init(struct rtwn_softc *); static void rtwn_write_txpower(struct rtwn_softc *, int, uint16_t[]); static void rtwn_get_txpower(struct rtwn_softc *, int, struct ieee80211_channel *, struct ieee80211_channel *, uint16_t[]); static void rtwn_set_txpower(struct rtwn_softc *, struct ieee80211_channel *, struct ieee80211_channel *); static void rtwn_scan_start(struct ieee80211com *); static void rtwn_scan_end(struct ieee80211com *); static void rtwn_set_channel(struct ieee80211com *); static void rtwn_update_mcast(struct ieee80211com *); static void rtwn_set_chan(struct rtwn_softc *, struct ieee80211_channel *, struct ieee80211_channel *); static int rtwn_iq_calib_chain(struct rtwn_softc *, int, uint16_t[2], uint16_t[2]); static void rtwn_iq_calib_run(struct rtwn_softc *, int, uint16_t[2][2], uint16_t[2][2]); static int rtwn_iq_calib_compare_results(uint16_t[2][2], uint16_t[2][2], uint16_t[2][2], uint16_t[2][2], int); static void rtwn_iq_calib_write_results(struct rtwn_softc *, uint16_t[2], uint16_t[2], int); static void rtwn_iq_calib(struct rtwn_softc *); static void rtwn_lc_calib(struct rtwn_softc *); static void rtwn_temp_calib(struct rtwn_softc *); static void rtwn_init_locked(struct rtwn_softc *); static void rtwn_init(struct rtwn_softc *); static void rtwn_stop_locked(struct rtwn_softc *); static void rtwn_stop(struct rtwn_softc *); static void rtwn_intr(void *); static void rtwn_hw_reset(void *, int); /* Aliases. */ #define rtwn_bb_write rtwn_write_4 #define rtwn_bb_read rtwn_read_4 static int rtwn_probe(device_t); static int rtwn_attach(device_t); static int rtwn_detach(device_t); static int rtwn_shutdown(device_t); static int rtwn_suspend(device_t); static int rtwn_resume(device_t); static device_method_t rtwn_methods[] = { /* Device interface */ DEVMETHOD(device_probe, rtwn_probe), DEVMETHOD(device_attach, rtwn_attach), DEVMETHOD(device_detach, rtwn_detach), DEVMETHOD(device_shutdown, rtwn_shutdown), DEVMETHOD(device_suspend, rtwn_suspend), DEVMETHOD(device_resume, rtwn_resume), DEVMETHOD_END }; static driver_t rtwn_driver = { "rtwn", rtwn_methods, sizeof (struct rtwn_softc) }; static devclass_t rtwn_devclass; DRIVER_MODULE(rtwn, pci, rtwn_driver, rtwn_devclass, NULL, NULL); MODULE_VERSION(rtwn, 1); MODULE_DEPEND(rtwn, pci, 1, 1, 1); MODULE_DEPEND(rtwn, wlan, 1, 1, 1); MODULE_DEPEND(rtwn, firmware, 1, 1, 1); static int rtwn_probe(device_t dev) { const struct rtwn_ident *ident; for (ident = rtwn_ident_table; ident->name != NULL; ident++) { if (pci_get_vendor(dev) == ident->vendor && pci_get_device(dev) == ident->device) { device_set_desc(dev, ident->name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int rtwn_attach(device_t dev) { struct rtwn_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; uint32_t lcsr; uint8_t bands[howmany(IEEE80211_MODE_MAX, 8)]; int i, count, error, rid; sc->sc_dev = dev; sc->sc_debug = 0; /* * Get the offset of the PCI Express Capability Structure in PCI * Configuration Space. */ error = pci_find_cap(dev, PCIY_EXPRESS, &sc->sc_cap_off); if (error != 0) { device_printf(dev, "PCIe capability structure not found!\n"); return (error); } /* Enable bus-mastering. */ pci_enable_busmaster(dev); rid = PCIR_BAR(2); sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem == NULL) { device_printf(dev, "can't map mem space\n"); return (ENOMEM); } sc->sc_st = rman_get_bustag(sc->mem); sc->sc_sh = rman_get_bushandle(sc->mem); /* Install interrupt handler. */ count = 1; rid = 0; if (pci_alloc_msi(dev, &count) == 0) rid = 1; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->irq == NULL) { device_printf(dev, "can't map interrupt\n"); return (ENXIO); } RTWN_LOCK_INIT(sc); callout_init_mtx(&sc->calib_to, &sc->sc_mtx, 0); callout_init_mtx(&sc->watchdog_to, &sc->sc_mtx, 0); TASK_INIT(&sc->sc_reinit_task, 0, rtwn_hw_reset, sc); mbufq_init(&sc->sc_snd, ifqmaxlen); error = rtwn_read_chipid(sc); if (error != 0) { device_printf(dev, "unsupported test chip\n"); goto fail; } /* Disable PCIe Active State Power Management (ASPM). */ lcsr = pci_read_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, 4); lcsr &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, lcsr, 4); /* Allocate Tx/Rx buffers. */ error = rtwn_alloc_rx_list(sc); if (error != 0) { device_printf(dev, "could not allocate Rx buffers\n"); goto fail; } for (i = 0; i < RTWN_NTXQUEUES; i++) { error = rtwn_alloc_tx_list(sc, i); if (error != 0) { device_printf(dev, "could not allocate Tx buffers\n"); goto fail; } } /* Determine number of Tx/Rx chains. */ if (sc->chip & RTWN_CHIP_92C) { sc->ntxchains = (sc->chip & RTWN_CHIP_92C_1T2R) ? 1 : 2; sc->nrxchains = 2; } else { sc->ntxchains = 1; sc->nrxchains = 1; } rtwn_read_rom(sc); device_printf(sc->sc_dev, "MAC/BB RTL%s, RF 6052 %dT%dR\n", (sc->chip & RTWN_CHIP_92C) ? "8192CE" : "8188CE", sc->ntxchains, sc->nrxchains); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(dev); ic->ic_opmode = IEEE80211_M_STA; ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ /* set device capabilities */ ic->ic_caps = IEEE80211_C_STA /* station mode */ | IEEE80211_C_MONITOR /* monitor mode */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_SHSLOT /* short slot time supported */ | IEEE80211_C_WPA /* capable of WPA1+WPA2 */ | IEEE80211_C_BGSCAN /* capable of bg scanning */ | IEEE80211_C_WME /* 802.11e */ ; memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); ieee80211_init_channels(ic, NULL, bands); ieee80211_ifattach(ic); ic->ic_wme.wme_update = rtwn_updateedca; ic->ic_update_mcast = rtwn_update_mcast; ic->ic_scan_start =rtwn_scan_start; ic->ic_scan_end = rtwn_scan_end; ic->ic_set_channel = rtwn_set_channel; ic->ic_raw_xmit = rtwn_raw_xmit; ic->ic_transmit = rtwn_transmit; ic->ic_parent = rtwn_parent; ic->ic_vap_create = rtwn_vap_create; ic->ic_vap_delete = rtwn_vap_delete; ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), RTWN_TX_RADIOTAP_PRESENT, &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap), RTWN_RX_RADIOTAP_PRESENT); /* * Hook our interrupt after all initialization is complete. */ error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, rtwn_intr, sc, &sc->sc_ih); if (error != 0) { device_printf(dev, "can't establish interrupt, error %d\n", error); goto fail; } if (bootverbose) ieee80211_announce(ic); return (0); fail: rtwn_detach(dev); return (error); } static int rtwn_detach(device_t dev) { struct rtwn_softc *sc = device_get_softc(dev); int i; if (sc->sc_ic.ic_softc != NULL) { ieee80211_draintask(&sc->sc_ic, &sc->sc_reinit_task); rtwn_stop(sc); callout_drain(&sc->calib_to); callout_drain(&sc->watchdog_to); ieee80211_ifdetach(&sc->sc_ic); mbufq_drain(&sc->sc_snd); } /* Uninstall interrupt handler. */ if (sc->irq != NULL) { bus_teardown_intr(dev, sc->irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq), sc->irq); pci_release_msi(dev); } /* Free Tx/Rx buffers. */ for (i = 0; i < RTWN_NTXQUEUES; i++) rtwn_free_tx_list(sc, i); rtwn_free_rx_list(sc); if (sc->mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem), sc->mem); RTWN_LOCK_DESTROY(sc); return (0); } static int rtwn_shutdown(device_t dev) { return (0); } static int rtwn_suspend(device_t dev) { return (0); } static int rtwn_resume(device_t dev) { return (0); } static void rtwn_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs)); *(bus_addr_t *)arg = segs[0].ds_addr; } static void rtwn_setup_rx_desc(struct rtwn_softc *sc, struct r92c_rx_desc *desc, bus_addr_t addr, size_t len, int idx) { memset(desc, 0, sizeof(*desc)); desc->rxdw0 = htole32(SM(R92C_RXDW0_PKTLEN, len) | ((idx == RTWN_RX_LIST_COUNT - 1) ? R92C_RXDW0_EOR : 0)); desc->rxbufaddr = htole32(addr); bus_space_barrier(sc->sc_st, sc->sc_sh, 0, sc->sc_mapsize, BUS_SPACE_BARRIER_WRITE); desc->rxdw0 |= htole32(R92C_RXDW0_OWN); } static int rtwn_alloc_rx_list(struct rtwn_softc *sc) { struct rtwn_rx_ring *rx_ring = &sc->rx_ring; struct rtwn_rx_data *rx_data; bus_size_t size; int i, error; /* Allocate Rx descriptors. */ size = sizeof(struct r92c_rx_desc) * RTWN_RX_LIST_COUNT; error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &rx_ring->desc_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create rx desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(rx_ring->desc_dmat, (void **)&rx_ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &rx_ring->desc_map); if (error != 0) { device_printf(sc->sc_dev, "could not allocate rx desc\n"); goto fail; } error = bus_dmamap_load(rx_ring->desc_dmat, rx_ring->desc_map, rx_ring->desc, size, rtwn_dma_map_addr, &rx_ring->paddr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not load rx desc DMA map\n"); goto fail; } bus_dmamap_sync(rx_ring->desc_dmat, rx_ring->desc_map, BUS_DMASYNC_PREWRITE); /* Create RX buffer DMA tag. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &rx_ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create rx buf DMA tag\n"); goto fail; } /* Allocate Rx buffers. */ for (i = 0; i < RTWN_RX_LIST_COUNT; i++) { rx_data = &rx_ring->rx_data[i]; error = bus_dmamap_create(rx_ring->data_dmat, 0, &rx_data->map); if (error != 0) { device_printf(sc->sc_dev, "could not create rx buf DMA map\n"); goto fail; } rx_data->m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (rx_data->m == NULL) { device_printf(sc->sc_dev, "could not allocate rx mbuf\n"); error = ENOMEM; goto fail; } error = bus_dmamap_load(rx_ring->data_dmat, rx_data->map, mtod(rx_data->m, void *), MCLBYTES, rtwn_dma_map_addr, &rx_data->paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "could not load rx buf DMA map"); goto fail; } rtwn_setup_rx_desc(sc, &rx_ring->desc[i], rx_data->paddr, MCLBYTES, i); } return (0); fail: rtwn_free_rx_list(sc); return (error); } static void rtwn_reset_rx_list(struct rtwn_softc *sc) { struct rtwn_rx_ring *rx_ring = &sc->rx_ring; struct rtwn_rx_data *rx_data; int i; for (i = 0; i < RTWN_RX_LIST_COUNT; i++) { rx_data = &rx_ring->rx_data[i]; rtwn_setup_rx_desc(sc, &rx_ring->desc[i], rx_data->paddr, MCLBYTES, i); } } static void rtwn_free_rx_list(struct rtwn_softc *sc) { struct rtwn_rx_ring *rx_ring = &sc->rx_ring; struct rtwn_rx_data *rx_data; int i; if (rx_ring->desc_dmat != NULL) { if (rx_ring->desc != NULL) { bus_dmamap_unload(rx_ring->desc_dmat, rx_ring->desc_map); bus_dmamem_free(rx_ring->desc_dmat, rx_ring->desc, rx_ring->desc_map); rx_ring->desc = NULL; } bus_dma_tag_destroy(rx_ring->desc_dmat); rx_ring->desc_dmat = NULL; } for (i = 0; i < RTWN_RX_LIST_COUNT; i++) { rx_data = &rx_ring->rx_data[i]; if (rx_data->m != NULL) { bus_dmamap_unload(rx_ring->data_dmat, rx_data->map); m_freem(rx_data->m); rx_data->m = NULL; } bus_dmamap_destroy(rx_ring->data_dmat, rx_data->map); rx_data->map = NULL; } if (rx_ring->data_dmat != NULL) { bus_dma_tag_destroy(rx_ring->data_dmat); rx_ring->data_dmat = NULL; } } static int rtwn_alloc_tx_list(struct rtwn_softc *sc, int qid) { struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid]; struct rtwn_tx_data *tx_data; bus_size_t size; int i, error; size = sizeof(struct r92c_tx_desc) * RTWN_TX_LIST_COUNT; error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &tx_ring->desc_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create tx ring DMA tag\n"); goto fail; } error = bus_dmamem_alloc(tx_ring->desc_dmat, (void **)&tx_ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &tx_ring->desc_map); if (error != 0) { device_printf(sc->sc_dev, "can't map tx ring DMA memory\n"); goto fail; } error = bus_dmamap_load(tx_ring->desc_dmat, tx_ring->desc_map, tx_ring->desc, size, rtwn_dma_map_addr, &tx_ring->paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "could not load desc DMA map\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &tx_ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create tx buf DMA tag\n"); goto fail; } for (i = 0; i < RTWN_TX_LIST_COUNT; i++) { struct r92c_tx_desc *desc = &tx_ring->desc[i]; /* setup tx desc */ desc->nextdescaddr = htole32(tx_ring->paddr + + sizeof(struct r92c_tx_desc) * ((i + 1) % RTWN_TX_LIST_COUNT)); tx_data = &tx_ring->tx_data[i]; error = bus_dmamap_create(tx_ring->data_dmat, 0, &tx_data->map); if (error != 0) { device_printf(sc->sc_dev, "could not create tx buf DMA map\n"); goto fail; } tx_data->m = NULL; tx_data->ni = NULL; } return (0); fail: rtwn_free_tx_list(sc, qid); return (error); } static void rtwn_reset_tx_list(struct rtwn_softc *sc, int qid) { struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid]; int i; for (i = 0; i < RTWN_TX_LIST_COUNT; i++) { struct r92c_tx_desc *desc = &tx_ring->desc[i]; struct rtwn_tx_data *tx_data = &tx_ring->tx_data[i]; memset(desc, 0, sizeof(*desc) - (sizeof(desc->reserved) + sizeof(desc->nextdescaddr64) + sizeof(desc->nextdescaddr))); if (tx_data->m != NULL) { bus_dmamap_unload(tx_ring->data_dmat, tx_data->map); m_freem(tx_data->m); tx_data->m = NULL; } if (tx_data->ni != NULL) { ieee80211_free_node(tx_data->ni); tx_data->ni = NULL; } } bus_dmamap_sync(tx_ring->desc_dmat, tx_ring->desc_map, BUS_DMASYNC_POSTWRITE); sc->qfullmsk &= ~(1 << qid); tx_ring->queued = 0; tx_ring->cur = 0; } static void rtwn_free_tx_list(struct rtwn_softc *sc, int qid) { struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid]; struct rtwn_tx_data *tx_data; int i; if (tx_ring->desc_dmat != NULL) { if (tx_ring->desc != NULL) { bus_dmamap_unload(tx_ring->desc_dmat, tx_ring->desc_map); bus_dmamem_free(tx_ring->desc_dmat, tx_ring->desc, tx_ring->desc_map); } bus_dma_tag_destroy(tx_ring->desc_dmat); } for (i = 0; i < RTWN_TX_LIST_COUNT; i++) { tx_data = &tx_ring->tx_data[i]; if (tx_data->m != NULL) { bus_dmamap_unload(tx_ring->data_dmat, tx_data->map); m_freem(tx_data->m); tx_data->m = NULL; } } if (tx_ring->data_dmat != NULL) { bus_dma_tag_destroy(tx_ring->data_dmat); tx_ring->data_dmat = NULL; } sc->qfullmsk &= ~(1 << qid); tx_ring->queued = 0; tx_ring->cur = 0; } static struct ieee80211vap * rtwn_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct rtwn_vap *rvp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) return (NULL); rvp = malloc(sizeof(struct rtwn_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &rvp->vap; if (ieee80211_vap_setup(ic, vap, name, unit, opmode, flags | IEEE80211_CLONE_NOBEACONS, bssid) != 0) { /* out of memory */ free(rvp, M_80211_VAP); return (NULL); } /* Override state transition machine. */ rvp->newstate = vap->iv_newstate; vap->iv_newstate = rtwn_newstate; /* Complete setup. */ ieee80211_vap_attach(vap, ieee80211_media_change, ieee80211_media_status, mac); ic->ic_opmode = opmode; return (vap); } static void rtwn_vap_delete(struct ieee80211vap *vap) { struct rtwn_vap *rvp = RTWN_VAP(vap); ieee80211_vap_detach(vap); free(rvp, M_80211_VAP); } static void rtwn_write_1(struct rtwn_softc *sc, uint16_t addr, uint8_t val) { bus_space_write_1(sc->sc_st, sc->sc_sh, addr, val); } static void rtwn_write_2(struct rtwn_softc *sc, uint16_t addr, uint16_t val) { val = htole16(val); bus_space_write_2(sc->sc_st, sc->sc_sh, addr, val); } static void rtwn_write_4(struct rtwn_softc *sc, uint16_t addr, uint32_t val) { val = htole32(val); bus_space_write_4(sc->sc_st, sc->sc_sh, addr, val); } static uint8_t rtwn_read_1(struct rtwn_softc *sc, uint16_t addr) { return (bus_space_read_1(sc->sc_st, sc->sc_sh, addr)); } static uint16_t rtwn_read_2(struct rtwn_softc *sc, uint16_t addr) { return (bus_space_read_2(sc->sc_st, sc->sc_sh, addr)); } static uint32_t rtwn_read_4(struct rtwn_softc *sc, uint16_t addr) { return (bus_space_read_4(sc->sc_st, sc->sc_sh, addr)); } static int rtwn_fw_cmd(struct rtwn_softc *sc, uint8_t id, const void *buf, int len) { struct r92c_fw_cmd cmd; int ntries; /* Wait for current FW box to be empty. */ for (ntries = 0; ntries < 100; ntries++) { if (!(rtwn_read_1(sc, R92C_HMETFR) & (1 << sc->fwcur))) break; DELAY(1); } if (ntries == 100) { device_printf(sc->sc_dev, "could not send firmware command %d\n", id); return (ETIMEDOUT); } memset(&cmd, 0, sizeof(cmd)); cmd.id = id; if (len > 3) cmd.id |= R92C_CMD_FLAG_EXT; KASSERT(len <= sizeof(cmd.msg), ("rtwn_fw_cmd\n")); memcpy(cmd.msg, buf, len); /* Write the first word last since that will trigger the FW. */ rtwn_write_2(sc, R92C_HMEBOX_EXT(sc->fwcur), *((uint8_t *)&cmd + 4)); rtwn_write_4(sc, R92C_HMEBOX(sc->fwcur), *((uint8_t *)&cmd + 0)); sc->fwcur = (sc->fwcur + 1) % R92C_H2C_NBOX; /* Give firmware some time for processing. */ DELAY(2000); return (0); } static void rtwn_rf_write(struct rtwn_softc *sc, int chain, uint8_t addr, uint32_t val) { rtwn_bb_write(sc, R92C_LSSI_PARAM(chain), SM(R92C_LSSI_PARAM_ADDR, addr) | SM(R92C_LSSI_PARAM_DATA, val)); } static uint32_t rtwn_rf_read(struct rtwn_softc *sc, int chain, uint8_t addr) { uint32_t reg[R92C_MAX_CHAINS], val; reg[0] = rtwn_bb_read(sc, R92C_HSSI_PARAM2(0)); if (chain != 0) reg[chain] = rtwn_bb_read(sc, R92C_HSSI_PARAM2(chain)); rtwn_bb_write(sc, R92C_HSSI_PARAM2(0), reg[0] & ~R92C_HSSI_PARAM2_READ_EDGE); DELAY(1000); rtwn_bb_write(sc, R92C_HSSI_PARAM2(chain), RW(reg[chain], R92C_HSSI_PARAM2_READ_ADDR, addr) | R92C_HSSI_PARAM2_READ_EDGE); DELAY(1000); rtwn_bb_write(sc, R92C_HSSI_PARAM2(0), reg[0] | R92C_HSSI_PARAM2_READ_EDGE); DELAY(1000); if (rtwn_bb_read(sc, R92C_HSSI_PARAM1(chain)) & R92C_HSSI_PARAM1_PI) val = rtwn_bb_read(sc, R92C_HSPI_READBACK(chain)); else val = rtwn_bb_read(sc, R92C_LSSI_READBACK(chain)); return (MS(val, R92C_LSSI_READBACK_DATA)); } static int rtwn_llt_write(struct rtwn_softc *sc, uint32_t addr, uint32_t data) { int ntries; rtwn_write_4(sc, R92C_LLT_INIT, SM(R92C_LLT_INIT_OP, R92C_LLT_INIT_OP_WRITE) | SM(R92C_LLT_INIT_ADDR, addr) | SM(R92C_LLT_INIT_DATA, data)); /* Wait for write operation to complete. */ for (ntries = 0; ntries < 20; ntries++) { if (MS(rtwn_read_4(sc, R92C_LLT_INIT), R92C_LLT_INIT_OP) == R92C_LLT_INIT_OP_NO_ACTIVE) return (0); DELAY(5); } return (ETIMEDOUT); } static uint8_t rtwn_efuse_read_1(struct rtwn_softc *sc, uint16_t addr) { uint32_t reg; int ntries; reg = rtwn_read_4(sc, R92C_EFUSE_CTRL); reg = RW(reg, R92C_EFUSE_CTRL_ADDR, addr); reg &= ~R92C_EFUSE_CTRL_VALID; rtwn_write_4(sc, R92C_EFUSE_CTRL, reg); /* Wait for read operation to complete. */ for (ntries = 0; ntries < 100; ntries++) { reg = rtwn_read_4(sc, R92C_EFUSE_CTRL); if (reg & R92C_EFUSE_CTRL_VALID) return (MS(reg, R92C_EFUSE_CTRL_DATA)); DELAY(5); } device_printf(sc->sc_dev, "could not read efuse byte at address 0x%x\n", addr); return (0xff); } static void rtwn_efuse_read(struct rtwn_softc *sc) { uint8_t *rom = (uint8_t *)&sc->rom; uint16_t addr = 0; uint32_t reg; uint8_t off, msk; int i; reg = rtwn_read_2(sc, R92C_SYS_ISO_CTRL); if (!(reg & R92C_SYS_ISO_CTRL_PWC_EV12V)) { rtwn_write_2(sc, R92C_SYS_ISO_CTRL, reg | R92C_SYS_ISO_CTRL_PWC_EV12V); } reg = rtwn_read_2(sc, R92C_SYS_FUNC_EN); if (!(reg & R92C_SYS_FUNC_EN_ELDR)) { rtwn_write_2(sc, R92C_SYS_FUNC_EN, reg | R92C_SYS_FUNC_EN_ELDR); } reg = rtwn_read_2(sc, R92C_SYS_CLKR); if ((reg & (R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M)) != (R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M)) { rtwn_write_2(sc, R92C_SYS_CLKR, reg | R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M); } memset(&sc->rom, 0xff, sizeof(sc->rom)); while (addr < 512) { reg = rtwn_efuse_read_1(sc, addr); if (reg == 0xff) break; addr++; off = reg >> 4; msk = reg & 0xf; for (i = 0; i < 4; i++) { if (msk & (1 << i)) continue; rom[off * 8 + i * 2 + 0] = rtwn_efuse_read_1(sc, addr); addr++; rom[off * 8 + i * 2 + 1] = rtwn_efuse_read_1(sc, addr); addr++; } } #ifdef RTWN_DEBUG if (sc->sc_debug >= 2) { /* Dump ROM content. */ printf("\n"); for (i = 0; i < sizeof(sc->rom); i++) printf("%02x:", rom[i]); printf("\n"); } #endif } static int rtwn_read_chipid(struct rtwn_softc *sc) { uint32_t reg; reg = rtwn_read_4(sc, R92C_SYS_CFG); if (reg & R92C_SYS_CFG_TRP_VAUX_EN) /* Unsupported test chip. */ return (EIO); if (reg & R92C_SYS_CFG_TYPE_92C) { sc->chip |= RTWN_CHIP_92C; /* Check if it is a castrated 8192C. */ if (MS(rtwn_read_4(sc, R92C_HPON_FSM), R92C_HPON_FSM_CHIP_BONDING_ID) == R92C_HPON_FSM_CHIP_BONDING_ID_92C_1T2R) sc->chip |= RTWN_CHIP_92C_1T2R; } if (reg & R92C_SYS_CFG_VENDOR_UMC) { sc->chip |= RTWN_CHIP_UMC; if (MS(reg, R92C_SYS_CFG_CHIP_VER_RTL) == 0) sc->chip |= RTWN_CHIP_UMC_A_CUT; } return (0); } static void rtwn_read_rom(struct rtwn_softc *sc) { struct r92c_rom *rom = &sc->rom; /* Read full ROM image. */ rtwn_efuse_read(sc); if (rom->id != 0x8129) device_printf(sc->sc_dev, "invalid EEPROM ID 0x%x\n", rom->id); /* XXX Weird but this is what the vendor driver does. */ sc->pa_setting = rtwn_efuse_read_1(sc, 0x1fa); DPRINTF(("PA setting=0x%x\n", sc->pa_setting)); sc->board_type = MS(rom->rf_opt1, R92C_ROM_RF1_BOARD_TYPE); sc->regulatory = MS(rom->rf_opt1, R92C_ROM_RF1_REGULATORY); DPRINTF(("regulatory type=%d\n", sc->regulatory)); IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, rom->macaddr); } /* * Initialize rate adaptation in firmware. */ static int rtwn_ra_init(struct rtwn_softc *sc) { static const uint8_t map[] = { 2, 4, 11, 22, 12, 18, 24, 36, 48, 72, 96, 108 }; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = ieee80211_ref_node(vap->iv_bss); struct ieee80211_rateset *rs = &ni->ni_rates; struct r92c_fw_cmd_macid_cfg cmd; uint32_t rates, basicrates; uint8_t mode; int maxrate, maxbasicrate, error, i, j; /* Get normal and basic rates mask. */ rates = basicrates = 0; maxrate = maxbasicrate = 0; for (i = 0; i < rs->rs_nrates; i++) { /* Convert 802.11 rate to HW rate index. */ for (j = 0; j < nitems(map); j++) if ((rs->rs_rates[i] & IEEE80211_RATE_VAL) == map[j]) break; if (j == nitems(map)) /* Unknown rate, skip. */ continue; rates |= 1 << j; if (j > maxrate) maxrate = j; if (rs->rs_rates[i] & IEEE80211_RATE_BASIC) { basicrates |= 1 << j; if (j > maxbasicrate) maxbasicrate = j; } } if (ic->ic_curmode == IEEE80211_MODE_11B) mode = R92C_RAID_11B; else mode = R92C_RAID_11BG; DPRINTF(("mode=0x%x rates=0x%08x, basicrates=0x%08x\n", mode, rates, basicrates)); /* Set rates mask for group addressed frames. */ cmd.macid = RTWN_MACID_BC | RTWN_MACID_VALID; cmd.mask = htole32(mode << 28 | basicrates); error = rtwn_fw_cmd(sc, R92C_CMD_MACID_CONFIG, &cmd, sizeof(cmd)); if (error != 0) { device_printf(sc->sc_dev, "could not add broadcast station\n"); return (error); } /* Set initial MRR rate. */ DPRINTF(("maxbasicrate=%d\n", maxbasicrate)); rtwn_write_1(sc, R92C_INIDATA_RATE_SEL(RTWN_MACID_BC), maxbasicrate); /* Set rates mask for unicast frames. */ cmd.macid = RTWN_MACID_BSS | RTWN_MACID_VALID; cmd.mask = htole32(mode << 28 | rates); error = rtwn_fw_cmd(sc, R92C_CMD_MACID_CONFIG, &cmd, sizeof(cmd)); if (error != 0) { device_printf(sc->sc_dev, "could not add BSS station\n"); return (error); } /* Set initial MRR rate. */ DPRINTF(("maxrate=%d\n", maxrate)); rtwn_write_1(sc, R92C_INIDATA_RATE_SEL(RTWN_MACID_BSS), maxrate); /* Configure Automatic Rate Fallback Register. */ if (ic->ic_curmode == IEEE80211_MODE_11B) { if (rates & 0x0c) rtwn_write_4(sc, R92C_ARFR(0), htole32(rates & 0x0d)); else rtwn_write_4(sc, R92C_ARFR(0), htole32(rates & 0x0f)); } else rtwn_write_4(sc, R92C_ARFR(0), htole32(rates & 0x0ff5)); /* Indicate highest supported rate. */ ni->ni_txrate = rs->rs_rates[rs->rs_nrates - 1]; return (0); } static void rtwn_tsf_sync_enable(struct rtwn_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = vap->iv_bss; uint64_t tsf; /* Enable TSF synchronization. */ rtwn_write_1(sc, R92C_BCN_CTRL, rtwn_read_1(sc, R92C_BCN_CTRL) & ~R92C_BCN_CTRL_DIS_TSF_UDT0); rtwn_write_1(sc, R92C_BCN_CTRL, rtwn_read_1(sc, R92C_BCN_CTRL) & ~R92C_BCN_CTRL_EN_BCN); /* Set initial TSF. */ memcpy(&tsf, ni->ni_tstamp.data, 8); tsf = le64toh(tsf); tsf = tsf - (tsf % (vap->iv_bss->ni_intval * IEEE80211_DUR_TU)); tsf -= IEEE80211_DUR_TU; rtwn_write_4(sc, R92C_TSFTR + 0, tsf); rtwn_write_4(sc, R92C_TSFTR + 4, tsf >> 32); rtwn_write_1(sc, R92C_BCN_CTRL, rtwn_read_1(sc, R92C_BCN_CTRL) | R92C_BCN_CTRL_EN_BCN); } static void rtwn_set_led(struct rtwn_softc *sc, int led, int on) { uint8_t reg; if (led == RTWN_LED_LINK) { reg = rtwn_read_1(sc, R92C_LEDCFG2) & 0xf0; if (!on) reg |= R92C_LEDCFG2_DIS; else reg |= R92C_LEDCFG2_EN; rtwn_write_1(sc, R92C_LEDCFG2, reg); sc->ledlink = on; /* Save LED state. */ } } static void rtwn_calib_to(void *arg) { struct rtwn_softc *sc = arg; struct r92c_fw_cmd_rssi cmd; if (sc->avg_pwdb != -1) { /* Indicate Rx signal strength to FW for rate adaptation. */ memset(&cmd, 0, sizeof(cmd)); cmd.macid = 0; /* BSS. */ cmd.pwdb = sc->avg_pwdb; DPRINTFN(3, ("sending RSSI command avg=%d\n", sc->avg_pwdb)); rtwn_fw_cmd(sc, R92C_CMD_RSSI_SETTING, &cmd, sizeof(cmd)); } /* Do temperature compensation. */ rtwn_temp_calib(sc); callout_reset(&sc->calib_to, hz * 2, rtwn_calib_to, sc); } static int rtwn_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct rtwn_vap *rvp = RTWN_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni = vap->iv_bss; struct rtwn_softc *sc = ic->ic_softc; uint32_t reg; IEEE80211_UNLOCK(ic); RTWN_LOCK(sc); if (vap->iv_state == IEEE80211_S_RUN) { /* Stop calibration. */ callout_stop(&sc->calib_to); /* Turn link LED off. */ rtwn_set_led(sc, RTWN_LED_LINK, 0); /* Set media status to 'No Link'. */ reg = rtwn_read_4(sc, R92C_CR); reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_NOLINK); rtwn_write_4(sc, R92C_CR, reg); /* Stop Rx of data frames. */ rtwn_write_2(sc, R92C_RXFLTMAP2, 0); /* Rest TSF. */ rtwn_write_1(sc, R92C_DUAL_TSF_RST, 0x03); /* Disable TSF synchronization. */ rtwn_write_1(sc, R92C_BCN_CTRL, rtwn_read_1(sc, R92C_BCN_CTRL) | R92C_BCN_CTRL_DIS_TSF_UDT0); /* Reset EDCA parameters. */ rtwn_write_4(sc, R92C_EDCA_VO_PARAM, 0x002f3217); rtwn_write_4(sc, R92C_EDCA_VI_PARAM, 0x005e4317); rtwn_write_4(sc, R92C_EDCA_BE_PARAM, 0x00105320); rtwn_write_4(sc, R92C_EDCA_BK_PARAM, 0x0000a444); } switch (nstate) { case IEEE80211_S_INIT: /* Turn link LED off. */ rtwn_set_led(sc, RTWN_LED_LINK, 0); break; case IEEE80211_S_SCAN: if (vap->iv_state != IEEE80211_S_SCAN) { /* Allow Rx from any BSSID. */ rtwn_write_4(sc, R92C_RCR, rtwn_read_4(sc, R92C_RCR) & ~(R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN)); /* Set gain for scanning. */ reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(0)); reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x20); rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), reg); reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(1)); reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x20); rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(1), reg); } /* Make link LED blink during scan. */ rtwn_set_led(sc, RTWN_LED_LINK, !sc->ledlink); /* Pause AC Tx queues. */ rtwn_write_1(sc, R92C_TXPAUSE, rtwn_read_1(sc, R92C_TXPAUSE) | 0x0f); break; case IEEE80211_S_AUTH: /* Set initial gain under link. */ reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(0)); reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x32); rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), reg); reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(1)); reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x32); rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(1), reg); rtwn_set_chan(sc, ic->ic_curchan, NULL); break; case IEEE80211_S_RUN: if (ic->ic_opmode == IEEE80211_M_MONITOR) { /* Enable Rx of data frames. */ rtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff); /* Turn link LED on. */ rtwn_set_led(sc, RTWN_LED_LINK, 1); break; } /* Set media status to 'Associated'. */ reg = rtwn_read_4(sc, R92C_CR); reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_INFRA); rtwn_write_4(sc, R92C_CR, reg); /* Set BSSID. */ rtwn_write_4(sc, R92C_BSSID + 0, LE_READ_4(&ni->ni_bssid[0])); rtwn_write_4(sc, R92C_BSSID + 4, LE_READ_2(&ni->ni_bssid[4])); if (ic->ic_curmode == IEEE80211_MODE_11B) rtwn_write_1(sc, R92C_INIRTS_RATE_SEL, 0); else /* 802.11b/g */ rtwn_write_1(sc, R92C_INIRTS_RATE_SEL, 3); /* Enable Rx of data frames. */ rtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff); /* Flush all AC queues. */ rtwn_write_1(sc, R92C_TXPAUSE, 0); /* Set beacon interval. */ rtwn_write_2(sc, R92C_BCN_INTERVAL, ni->ni_intval); /* Allow Rx from our BSSID only. */ rtwn_write_4(sc, R92C_RCR, rtwn_read_4(sc, R92C_RCR) | R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN); /* Enable TSF synchronization. */ rtwn_tsf_sync_enable(sc); rtwn_write_1(sc, R92C_SIFS_CCK + 1, 10); rtwn_write_1(sc, R92C_SIFS_OFDM + 1, 10); rtwn_write_1(sc, R92C_SPEC_SIFS + 1, 10); rtwn_write_1(sc, R92C_MAC_SPEC_SIFS + 1, 10); rtwn_write_1(sc, R92C_R2T_SIFS + 1, 10); rtwn_write_1(sc, R92C_T2T_SIFS + 1, 10); /* Intialize rate adaptation. */ rtwn_ra_init(sc); /* Turn link LED on. */ rtwn_set_led(sc, RTWN_LED_LINK, 1); sc->avg_pwdb = -1; /* Reset average RSSI. */ /* Reset temperature calibration state machine. */ sc->thcal_state = 0; sc->thcal_lctemp = 0; /* Start periodic calibration. */ callout_reset(&sc->calib_to, hz * 2, rtwn_calib_to, sc); break; default: break; } RTWN_UNLOCK(sc); IEEE80211_LOCK(ic); return (rvp->newstate(vap, nstate, arg)); } static int rtwn_updateedca(struct ieee80211com *ic) { struct rtwn_softc *sc = ic->ic_softc; const uint16_t aci2reg[WME_NUM_AC] = { R92C_EDCA_BE_PARAM, R92C_EDCA_BK_PARAM, R92C_EDCA_VI_PARAM, R92C_EDCA_VO_PARAM }; int aci, aifs, slottime; IEEE80211_LOCK(ic); slottime = (ic->ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20; for (aci = 0; aci < WME_NUM_AC; aci++) { const struct wmeParams *ac = &ic->ic_wme.wme_chanParams.cap_wmeParams[aci]; /* AIFS[AC] = AIFSN[AC] * aSlotTime + aSIFSTime. */ aifs = ac->wmep_aifsn * slottime + 10; rtwn_write_4(sc, aci2reg[aci], SM(R92C_EDCA_PARAM_TXOP, ac->wmep_txopLimit) | SM(R92C_EDCA_PARAM_ECWMIN, ac->wmep_logcwmin) | SM(R92C_EDCA_PARAM_ECWMAX, ac->wmep_logcwmax) | SM(R92C_EDCA_PARAM_AIFS, aifs)); } IEEE80211_UNLOCK(ic); return (0); } static void rtwn_update_avgrssi(struct rtwn_softc *sc, int rate, int8_t rssi) { int pwdb; /* Convert antenna signal to percentage. */ if (rssi <= -100 || rssi >= 20) pwdb = 0; else if (rssi >= 0) pwdb = 100; else pwdb = 100 + rssi; if (rate <= 3) { /* CCK gain is smaller than OFDM/MCS gain. */ pwdb += 6; if (pwdb > 100) pwdb = 100; if (pwdb <= 14) pwdb -= 4; else if (pwdb <= 26) pwdb -= 8; else if (pwdb <= 34) pwdb -= 6; else if (pwdb <= 42) pwdb -= 2; } if (sc->avg_pwdb == -1) /* Init. */ sc->avg_pwdb = pwdb; else if (sc->avg_pwdb < pwdb) sc->avg_pwdb = ((sc->avg_pwdb * 19 + pwdb) / 20) + 1; else sc->avg_pwdb = ((sc->avg_pwdb * 19 + pwdb) / 20); DPRINTFN(4, ("PWDB=%d EMA=%d\n", pwdb, sc->avg_pwdb)); } static int8_t rtwn_get_rssi(struct rtwn_softc *sc, int rate, void *physt) { static const int8_t cckoff[] = { 16, -12, -26, -46 }; struct r92c_rx_phystat *phy; struct r92c_rx_cck *cck; uint8_t rpt; int8_t rssi; if (rate <= 3) { cck = (struct r92c_rx_cck *)physt; if (sc->sc_flags & RTWN_FLAG_CCK_HIPWR) { rpt = (cck->agc_rpt >> 5) & 0x3; rssi = (cck->agc_rpt & 0x1f) << 1; } else { rpt = (cck->agc_rpt >> 6) & 0x3; rssi = cck->agc_rpt & 0x3e; } rssi = cckoff[rpt] - rssi; } else { /* OFDM/HT. */ phy = (struct r92c_rx_phystat *)physt; rssi = ((le32toh(phy->phydw1) >> 1) & 0x7f) - 110; } return (rssi); } static void rtwn_rx_frame(struct rtwn_softc *sc, struct r92c_rx_desc *rx_desc, struct rtwn_rx_data *rx_data, int desc_idx) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_frame *wh; struct ieee80211_node *ni; struct r92c_rx_phystat *phy = NULL; uint32_t rxdw0, rxdw3; struct mbuf *m, *m1; bus_dma_segment_t segs[1]; bus_addr_t physaddr; uint8_t rate; int8_t rssi = 0, nf; int infosz, nsegs, pktlen, shift, error; rxdw0 = le32toh(rx_desc->rxdw0); rxdw3 = le32toh(rx_desc->rxdw3); if (__predict_false(rxdw0 & (R92C_RXDW0_CRCERR | R92C_RXDW0_ICVERR))) { /* * This should not happen since we setup our Rx filter * to not receive these frames. */ counter_u64_add(ic->ic_ierrors, 1); return; } pktlen = MS(rxdw0, R92C_RXDW0_PKTLEN); if (__predict_false(pktlen < sizeof(*wh) || pktlen > MCLBYTES)) { counter_u64_add(ic->ic_ierrors, 1); return; } rate = MS(rxdw3, R92C_RXDW3_RATE); infosz = MS(rxdw0, R92C_RXDW0_INFOSZ) * 8; if (infosz > sizeof(struct r92c_rx_phystat)) infosz = sizeof(struct r92c_rx_phystat); shift = MS(rxdw0, R92C_RXDW0_SHIFT); /* Get RSSI from PHY status descriptor if present. */ if (infosz != 0 && (rxdw0 & R92C_RXDW0_PHYST)) { phy = mtod(rx_data->m, struct r92c_rx_phystat *); rssi = rtwn_get_rssi(sc, rate, phy); /* Update our average RSSI. */ rtwn_update_avgrssi(sc, rate, rssi); } DPRINTFN(5, ("Rx frame len=%d rate=%d infosz=%d shift=%d rssi=%d\n", pktlen, rate, infosz, shift, rssi)); m1 = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m1 == NULL) { counter_u64_add(ic->ic_ierrors, 1); return; } bus_dmamap_unload(sc->rx_ring.data_dmat, rx_data->map); error = bus_dmamap_load(sc->rx_ring.data_dmat, rx_data->map, mtod(m1, void *), MCLBYTES, rtwn_dma_map_addr, &physaddr, 0); if (error != 0) { m_freem(m1); if (bus_dmamap_load_mbuf_sg(sc->rx_ring.data_dmat, rx_data->map, rx_data->m, segs, &nsegs, 0)) panic("%s: could not load old RX mbuf", device_get_name(sc->sc_dev)); /* Physical address may have changed. */ rtwn_setup_rx_desc(sc, rx_desc, physaddr, MCLBYTES, desc_idx); counter_u64_add(ic->ic_ierrors, 1); return; } /* Finalize mbuf. */ m = rx_data->m; rx_data->m = m1; m->m_pkthdr.len = m->m_len = pktlen + infosz + shift; /* Update RX descriptor. */ rtwn_setup_rx_desc(sc, rx_desc, physaddr, MCLBYTES, desc_idx); /* Get ieee80211 frame header. */ if (rxdw0 & R92C_RXDW0_PHYST) m_adj(m, infosz + shift); else m_adj(m, shift); nf = -95; if (ieee80211_radiotap_active(ic)) { struct rtwn_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = 0; if (!(rxdw3 & R92C_RXDW3_HT)) { switch (rate) { /* CCK. */ case 0: tap->wr_rate = 2; break; case 1: tap->wr_rate = 4; break; case 2: tap->wr_rate = 11; break; case 3: tap->wr_rate = 22; break; /* OFDM. */ case 4: tap->wr_rate = 12; break; case 5: tap->wr_rate = 18; break; case 6: tap->wr_rate = 24; break; case 7: tap->wr_rate = 36; break; case 8: tap->wr_rate = 48; break; case 9: tap->wr_rate = 72; break; case 10: tap->wr_rate = 96; break; case 11: tap->wr_rate = 108; break; } } else if (rate >= 12) { /* MCS0~15. */ /* Bit 7 set means HT MCS instead of rate. */ tap->wr_rate = 0x80 | (rate - 12); } tap->wr_dbm_antsignal = rssi; tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq); tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags); } RTWN_UNLOCK(sc); wh = mtod(m, struct ieee80211_frame *); /* Send the frame to the 802.11 layer. */ ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh); if (ni != NULL) { (void)ieee80211_input(ni, m, rssi - nf, nf); /* Node is no longer needed. */ ieee80211_free_node(ni); } else (void)ieee80211_input_all(ic, m, rssi - nf, nf); RTWN_LOCK(sc); } static int rtwn_tx(struct rtwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct rtwn_tx_ring *tx_ring; struct rtwn_tx_data *data; struct r92c_tx_desc *txd; bus_dma_segment_t segs[1]; uint16_t qos; uint8_t raid, type, tid, qid; int nsegs, error; wh = mtod(m, struct ieee80211_frame *); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; /* Encrypt the frame if need be. */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_encap(ni, m); if (k == NULL) { m_freem(m); return (ENOBUFS); } /* 802.11 header may have moved. */ wh = mtod(m, struct ieee80211_frame *); } if (IEEE80211_QOS_HAS_SEQ(wh)) { qos = ((const struct ieee80211_qosframe *)wh)->i_qos[0]; tid = qos & IEEE80211_QOS_TID; } else { qos = 0; tid = 0; } switch (type) { case IEEE80211_FC0_TYPE_CTL: case IEEE80211_FC0_TYPE_MGT: qid = RTWN_VO_QUEUE; break; default: qid = M_WME_GETAC(m); break; } /* Grab a Tx buffer from the ring. */ tx_ring = &sc->tx_ring[qid]; data = &tx_ring->tx_data[tx_ring->cur]; if (data->m != NULL) { m_freem(m); return (ENOBUFS); } /* Fill Tx descriptor. */ txd = &tx_ring->desc[tx_ring->cur]; if (htole32(txd->txdw0) & R92C_RXDW0_OWN) { m_freem(m); return (ENOBUFS); } txd->txdw0 = htole32( SM(R92C_TXDW0_PKTLEN, m->m_pkthdr.len) | SM(R92C_TXDW0_OFFSET, sizeof(*txd)) | R92C_TXDW0_FSG | R92C_TXDW0_LSG); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) txd->txdw0 |= htole32(R92C_TXDW0_BMCAST); txd->txdw1 = 0; txd->txdw4 = 0; txd->txdw5 = 0; /* XXX TODO: rate control; implement low-rate for EAPOL */ if (!IEEE80211_IS_MULTICAST(wh->i_addr1) && type == IEEE80211_FC0_TYPE_DATA) { if (ic->ic_curmode == IEEE80211_MODE_11B) raid = R92C_RAID_11B; else raid = R92C_RAID_11BG; txd->txdw1 |= htole32( SM(R92C_TXDW1_MACID, RTWN_MACID_BSS) | SM(R92C_TXDW1_QSEL, R92C_TXDW1_QSEL_BE) | SM(R92C_TXDW1_RAID, raid) | R92C_TXDW1_AGGBK); if (ic->ic_flags & IEEE80211_F_USEPROT) { if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) { txd->txdw4 |= htole32(R92C_TXDW4_CTS2SELF | R92C_TXDW4_HWRTSEN); } else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) { txd->txdw4 |= htole32(R92C_TXDW4_RTSEN | R92C_TXDW4_HWRTSEN); } } /* XXX TODO: implement rate control */ /* Send RTS at OFDM24. */ txd->txdw4 |= htole32(SM(R92C_TXDW4_RTSRATE, 8)); txd->txdw5 |= htole32(SM(R92C_TXDW5_RTSRATE_FBLIMIT, 0xf)); /* Send data at OFDM54. */ txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 11)); txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE_FBLIMIT, 0x1f)); } else { txd->txdw1 |= htole32( SM(R92C_TXDW1_MACID, 0) | SM(R92C_TXDW1_QSEL, R92C_TXDW1_QSEL_MGNT) | SM(R92C_TXDW1_RAID, R92C_RAID_11B)); /* Force CCK1. */ txd->txdw4 |= htole32(R92C_TXDW4_DRVRATE); txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 0)); } /* Set sequence number (already little endian). */ - txd->txdseq = *(uint16_t *)wh->i_seq; + txd->txdseq = htole16(M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE); if (!qos) { /* Use HW sequence numbering for non-QoS frames. */ txd->txdw4 |= htole32(R92C_TXDW4_HWSEQ); txd->txdseq |= htole16(0x8000); } else txd->txdw4 |= htole32(R92C_TXDW4_QOS); error = bus_dmamap_load_mbuf_sg(tx_ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return (error); } if (error != 0) { struct mbuf *mnew; mnew = m_defrag(m, M_NOWAIT); if (mnew == NULL) { device_printf(sc->sc_dev, "can't defragment mbuf\n"); m_freem(m); return (ENOBUFS); } m = mnew; error = bus_dmamap_load_mbuf_sg(tx_ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return (error); } } txd->txbufaddr = htole32(segs[0].ds_addr); txd->txbufsize = htole16(m->m_pkthdr.len); bus_space_barrier(sc->sc_st, sc->sc_sh, 0, sc->sc_mapsize, BUS_SPACE_BARRIER_WRITE); txd->txdw0 |= htole32(R92C_TXDW0_OWN); bus_dmamap_sync(tx_ring->desc_dmat, tx_ring->desc_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(tx_ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); data->m = m; data->ni = ni; if (ieee80211_radiotap_active_vap(vap)) { struct rtwn_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq); tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags); ieee80211_radiotap_tx(vap, m); } tx_ring->cur = (tx_ring->cur + 1) % RTWN_TX_LIST_COUNT; tx_ring->queued++; if (tx_ring->queued >= (RTWN_TX_LIST_COUNT - 1)) sc->qfullmsk |= (1 << qid); /* Kick TX. */ rtwn_write_2(sc, R92C_PCIE_CTRL_REG, (1 << qid)); return (0); } static void rtwn_tx_done(struct rtwn_softc *sc, int qid) { struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid]; struct rtwn_tx_data *tx_data; struct r92c_tx_desc *tx_desc; int i; bus_dmamap_sync(tx_ring->desc_dmat, tx_ring->desc_map, BUS_DMASYNC_POSTREAD); for (i = 0; i < RTWN_TX_LIST_COUNT; i++) { tx_data = &tx_ring->tx_data[i]; if (tx_data->m == NULL) continue; tx_desc = &tx_ring->desc[i]; if (le32toh(tx_desc->txdw0) & R92C_TXDW0_OWN) continue; bus_dmamap_unload(tx_ring->desc_dmat, tx_ring->desc_map); /* * XXX TODO: figure out whether the transmit succeeded or not. * .. and then notify rate control. */ ieee80211_tx_complete(tx_data->ni, tx_data->m, 0); tx_data->ni = NULL; tx_data->m = NULL; sc->sc_tx_timer = 0; tx_ring->queued--; } if (tx_ring->queued < (RTWN_TX_LIST_COUNT - 1)) sc->qfullmsk &= ~(1 << qid); rtwn_start(sc); } static int rtwn_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct rtwn_softc *sc = ic->ic_softc; RTWN_LOCK(sc); /* Prevent management frames from being sent if we're not ready. */ if (!(sc->sc_flags & RTWN_RUNNING)) { RTWN_UNLOCK(sc); m_freem(m); return (ENETDOWN); } if (rtwn_tx(sc, m, ni) != 0) { m_freem(m); RTWN_UNLOCK(sc); return (EIO); } sc->sc_tx_timer = 5; RTWN_UNLOCK(sc); return (0); } static int rtwn_transmit(struct ieee80211com *ic, struct mbuf *m) { struct rtwn_softc *sc = ic->ic_softc; int error; RTWN_LOCK(sc); if ((sc->sc_flags & RTWN_RUNNING) == 0) { RTWN_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { RTWN_UNLOCK(sc); return (error); } rtwn_start(sc); RTWN_UNLOCK(sc); return (0); } static void rtwn_parent(struct ieee80211com *ic) { struct rtwn_softc *sc = ic->ic_softc; int startall = 0; RTWN_LOCK(sc); if (ic->ic_nrunning> 0) { if (!(sc->sc_flags & RTWN_RUNNING)) { rtwn_init_locked(sc); startall = 1; } } else if (sc->sc_flags & RTWN_RUNNING) rtwn_stop_locked(sc); RTWN_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } static void rtwn_start(struct rtwn_softc *sc) { struct ieee80211_node *ni; struct mbuf *m; RTWN_LOCK_ASSERT(sc); if ((sc->sc_flags & RTWN_RUNNING) == 0) return; while (sc->qfullmsk == 0 && (m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; if (rtwn_tx(sc, m, ni) != 0) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); continue; } sc->sc_tx_timer = 5; } } static void rtwn_watchdog(void *arg) { struct rtwn_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; RTWN_LOCK_ASSERT(sc); KASSERT(sc->sc_flags & RTWN_RUNNING, ("not running")); if (sc->sc_tx_timer != 0 && --sc->sc_tx_timer == 0) { ic_printf(ic, "device timeout\n"); ieee80211_runtask(ic, &sc->sc_reinit_task); return; } callout_reset(&sc->watchdog_to, hz, rtwn_watchdog, sc); } static int rtwn_power_on(struct rtwn_softc *sc) { uint32_t reg; int ntries; /* Wait for autoload done bit. */ for (ntries = 0; ntries < 1000; ntries++) { if (rtwn_read_1(sc, R92C_APS_FSMCO) & R92C_APS_FSMCO_PFM_ALDN) break; DELAY(5); } if (ntries == 1000) { device_printf(sc->sc_dev, "timeout waiting for chip autoload\n"); return (ETIMEDOUT); } /* Unlock ISO/CLK/Power control register. */ rtwn_write_1(sc, R92C_RSV_CTRL, 0); /* TODO: check if we need this for 8188CE */ if (sc->board_type != R92C_BOARD_TYPE_DONGLE) { /* bt coex */ reg = rtwn_read_4(sc, R92C_APS_FSMCO); reg |= (R92C_APS_FSMCO_SOP_ABG | R92C_APS_FSMCO_SOP_AMB | R92C_APS_FSMCO_XOP_BTCK); rtwn_write_4(sc, R92C_APS_FSMCO, reg); } /* Move SPS into PWM mode. */ rtwn_write_1(sc, R92C_SPS0_CTRL, 0x2b); /* Set low byte to 0x0f, leave others unchanged. */ rtwn_write_4(sc, R92C_AFE_XTAL_CTRL, (rtwn_read_4(sc, R92C_AFE_XTAL_CTRL) & 0xffffff00) | 0x0f); /* TODO: check if we need this for 8188CE */ if (sc->board_type != R92C_BOARD_TYPE_DONGLE) { /* bt coex */ reg = rtwn_read_4(sc, R92C_AFE_XTAL_CTRL); reg &= (~0x00024800); /* XXX magic from linux */ rtwn_write_4(sc, R92C_AFE_XTAL_CTRL, reg); } rtwn_write_2(sc, R92C_SYS_ISO_CTRL, (rtwn_read_2(sc, R92C_SYS_ISO_CTRL) & 0xff) | R92C_SYS_ISO_CTRL_PWC_EV12V | R92C_SYS_ISO_CTRL_DIOR); DELAY(200); /* TODO: linux does additional btcoex stuff here */ /* Auto enable WLAN. */ rtwn_write_2(sc, R92C_APS_FSMCO, rtwn_read_2(sc, R92C_APS_FSMCO) | R92C_APS_FSMCO_APFM_ONMAC); for (ntries = 0; ntries < 1000; ntries++) { if (!(rtwn_read_2(sc, R92C_APS_FSMCO) & R92C_APS_FSMCO_APFM_ONMAC)) break; DELAY(5); } if (ntries == 1000) { device_printf(sc->sc_dev, "timeout waiting for MAC auto ON\n"); return (ETIMEDOUT); } /* Enable radio, GPIO and LED functions. */ rtwn_write_2(sc, R92C_APS_FSMCO, R92C_APS_FSMCO_AFSM_PCIE | R92C_APS_FSMCO_PDN_EN | R92C_APS_FSMCO_PFM_ALDN); /* Release RF digital isolation. */ rtwn_write_2(sc, R92C_SYS_ISO_CTRL, rtwn_read_2(sc, R92C_SYS_ISO_CTRL) & ~R92C_SYS_ISO_CTRL_DIOR); if (sc->chip & RTWN_CHIP_92C) rtwn_write_1(sc, R92C_PCIE_CTRL_REG + 3, 0x77); else rtwn_write_1(sc, R92C_PCIE_CTRL_REG + 3, 0x22); rtwn_write_4(sc, R92C_INT_MIG, 0); if (sc->board_type != R92C_BOARD_TYPE_DONGLE) { /* bt coex */ reg = rtwn_read_4(sc, R92C_AFE_XTAL_CTRL + 2); reg &= 0xfd; /* XXX magic from linux */ rtwn_write_4(sc, R92C_AFE_XTAL_CTRL + 2, reg); } rtwn_write_1(sc, R92C_GPIO_MUXCFG, rtwn_read_1(sc, R92C_GPIO_MUXCFG) & ~R92C_GPIO_MUXCFG_RFKILL); reg = rtwn_read_1(sc, R92C_GPIO_IO_SEL); if (!(reg & R92C_GPIO_IO_SEL_RFKILL)) { device_printf(sc->sc_dev, "radio is disabled by hardware switch\n"); return (EPERM); } /* Initialize MAC. */ reg = rtwn_read_1(sc, R92C_APSD_CTRL); rtwn_write_1(sc, R92C_APSD_CTRL, rtwn_read_1(sc, R92C_APSD_CTRL) & ~R92C_APSD_CTRL_OFF); for (ntries = 0; ntries < 200; ntries++) { if (!(rtwn_read_1(sc, R92C_APSD_CTRL) & R92C_APSD_CTRL_OFF_STATUS)) break; DELAY(500); } if (ntries == 200) { device_printf(sc->sc_dev, "timeout waiting for MAC initialization\n"); return (ETIMEDOUT); } /* Enable MAC DMA/WMAC/SCHEDULE/SEC blocks. */ reg = rtwn_read_2(sc, R92C_CR); reg |= R92C_CR_HCI_TXDMA_EN | R92C_CR_HCI_RXDMA_EN | R92C_CR_TXDMA_EN | R92C_CR_RXDMA_EN | R92C_CR_PROTOCOL_EN | R92C_CR_SCHEDULE_EN | R92C_CR_MACTXEN | R92C_CR_MACRXEN | R92C_CR_ENSEC; rtwn_write_2(sc, R92C_CR, reg); rtwn_write_1(sc, 0xfe10, 0x19); return (0); } static int rtwn_llt_init(struct rtwn_softc *sc) { int i, error; /* Reserve pages [0; R92C_TX_PAGE_COUNT]. */ for (i = 0; i < R92C_TX_PAGE_COUNT; i++) { if ((error = rtwn_llt_write(sc, i, i + 1)) != 0) return (error); } /* NB: 0xff indicates end-of-list. */ if ((error = rtwn_llt_write(sc, i, 0xff)) != 0) return (error); /* * Use pages [R92C_TX_PAGE_COUNT + 1; R92C_TXPKTBUF_COUNT - 1] * as ring buffer. */ for (++i; i < R92C_TXPKTBUF_COUNT - 1; i++) { if ((error = rtwn_llt_write(sc, i, i + 1)) != 0) return (error); } /* Make the last page point to the beginning of the ring buffer. */ error = rtwn_llt_write(sc, i, R92C_TX_PAGE_COUNT + 1); return (error); } static void rtwn_fw_reset(struct rtwn_softc *sc) { uint16_t reg; int ntries; /* Tell 8051 to reset itself. */ rtwn_write_1(sc, R92C_HMETFR + 3, 0x20); /* Wait until 8051 resets by itself. */ for (ntries = 0; ntries < 100; ntries++) { reg = rtwn_read_2(sc, R92C_SYS_FUNC_EN); if (!(reg & R92C_SYS_FUNC_EN_CPUEN)) goto sleep; DELAY(50); } /* Force 8051 reset. */ rtwn_write_2(sc, R92C_SYS_FUNC_EN, reg & ~R92C_SYS_FUNC_EN_CPUEN); sleep: /* * We must sleep for one second to let the firmware settle. * Accessing registers too early will hang the whole system. */ if (msleep(®, &sc->sc_mtx, 0, "rtwnrst", hz)) { device_printf(sc->sc_dev, "timeout waiting for firmware " "initialization to complete\n"); } } static void rtwn_fw_loadpage(struct rtwn_softc *sc, int page, const uint8_t *buf, int len) { uint32_t reg; int off, mlen, i; reg = rtwn_read_4(sc, R92C_MCUFWDL); reg = RW(reg, R92C_MCUFWDL_PAGE, page); rtwn_write_4(sc, R92C_MCUFWDL, reg); DELAY(5); off = R92C_FW_START_ADDR; while (len > 0) { if (len > 196) mlen = 196; else if (len > 4) mlen = 4; else mlen = 1; for (i = 0; i < mlen; i++) rtwn_write_1(sc, off++, buf[i]); buf += mlen; len -= mlen; } } static int rtwn_load_firmware(struct rtwn_softc *sc) { const struct firmware *fw; const struct r92c_fw_hdr *hdr; const char *name; const u_char *ptr; size_t len; uint32_t reg; int mlen, ntries, page, error = 0; /* Read firmware image from the filesystem. */ if ((sc->chip & (RTWN_CHIP_UMC_A_CUT | RTWN_CHIP_92C)) == RTWN_CHIP_UMC_A_CUT) name = "rtwn-rtl8192cfwU"; else name = "rtwn-rtl8192cfwU_B"; RTWN_UNLOCK(sc); fw = firmware_get(name); RTWN_LOCK(sc); if (fw == NULL) { device_printf(sc->sc_dev, "could not read firmware %s\n", name); return (ENOENT); } len = fw->datasize; if (len < sizeof(*hdr)) { device_printf(sc->sc_dev, "firmware too short\n"); error = EINVAL; goto fail; } ptr = fw->data; hdr = (const struct r92c_fw_hdr *)ptr; /* Check if there is a valid FW header and skip it. */ if ((le16toh(hdr->signature) >> 4) == 0x88c || (le16toh(hdr->signature) >> 4) == 0x92c) { DPRINTF(("FW V%d.%d %02d-%02d %02d:%02d\n", le16toh(hdr->version), le16toh(hdr->subversion), hdr->month, hdr->date, hdr->hour, hdr->minute)); ptr += sizeof(*hdr); len -= sizeof(*hdr); } if (rtwn_read_1(sc, R92C_MCUFWDL) & R92C_MCUFWDL_RAM_DL_SEL) rtwn_fw_reset(sc); /* Enable FW download. */ rtwn_write_2(sc, R92C_SYS_FUNC_EN, rtwn_read_2(sc, R92C_SYS_FUNC_EN) | R92C_SYS_FUNC_EN_CPUEN); rtwn_write_1(sc, R92C_MCUFWDL, rtwn_read_1(sc, R92C_MCUFWDL) | R92C_MCUFWDL_EN); rtwn_write_1(sc, R92C_MCUFWDL + 2, rtwn_read_1(sc, R92C_MCUFWDL + 2) & ~0x08); /* Reset the FWDL checksum. */ rtwn_write_1(sc, R92C_MCUFWDL, rtwn_read_1(sc, R92C_MCUFWDL) | R92C_MCUFWDL_CHKSUM_RPT); for (page = 0; len > 0; page++) { mlen = MIN(len, R92C_FW_PAGE_SIZE); rtwn_fw_loadpage(sc, page, ptr, mlen); ptr += mlen; len -= mlen; } /* Disable FW download. */ rtwn_write_1(sc, R92C_MCUFWDL, rtwn_read_1(sc, R92C_MCUFWDL) & ~R92C_MCUFWDL_EN); rtwn_write_1(sc, R92C_MCUFWDL + 1, 0); /* Wait for checksum report. */ for (ntries = 0; ntries < 1000; ntries++) { if (rtwn_read_4(sc, R92C_MCUFWDL) & R92C_MCUFWDL_CHKSUM_RPT) break; DELAY(5); } if (ntries == 1000) { device_printf(sc->sc_dev, "timeout waiting for checksum report\n"); error = ETIMEDOUT; goto fail; } reg = rtwn_read_4(sc, R92C_MCUFWDL); reg = (reg & ~R92C_MCUFWDL_WINTINI_RDY) | R92C_MCUFWDL_RDY; rtwn_write_4(sc, R92C_MCUFWDL, reg); /* Wait for firmware readiness. */ for (ntries = 0; ntries < 2000; ntries++) { if (rtwn_read_4(sc, R92C_MCUFWDL) & R92C_MCUFWDL_WINTINI_RDY) break; DELAY(50); } if (ntries == 1000) { device_printf(sc->sc_dev, "timeout waiting for firmware readiness\n"); error = ETIMEDOUT; goto fail; } fail: firmware_put(fw, FIRMWARE_UNLOAD); return (error); } static int rtwn_dma_init(struct rtwn_softc *sc) { uint32_t reg; int error; /* Initialize LLT table. */ error = rtwn_llt_init(sc); if (error != 0) return error; /* Set number of pages for normal priority queue. */ rtwn_write_2(sc, R92C_RQPN_NPQ, 0); rtwn_write_4(sc, R92C_RQPN, /* Set number of pages for public queue. */ SM(R92C_RQPN_PUBQ, R92C_PUBQ_NPAGES) | /* Set number of pages for high priority queue. */ SM(R92C_RQPN_HPQ, R92C_HPQ_NPAGES) | /* Set number of pages for low priority queue. */ SM(R92C_RQPN_LPQ, R92C_LPQ_NPAGES) | /* Load values. */ R92C_RQPN_LD); rtwn_write_1(sc, R92C_TXPKTBUF_BCNQ_BDNY, R92C_TX_PAGE_BOUNDARY); rtwn_write_1(sc, R92C_TXPKTBUF_MGQ_BDNY, R92C_TX_PAGE_BOUNDARY); rtwn_write_1(sc, R92C_TXPKTBUF_WMAC_LBK_BF_HD, R92C_TX_PAGE_BOUNDARY); rtwn_write_1(sc, R92C_TRXFF_BNDY, R92C_TX_PAGE_BOUNDARY); rtwn_write_1(sc, R92C_TDECTRL + 1, R92C_TX_PAGE_BOUNDARY); reg = rtwn_read_2(sc, R92C_TRXDMA_CTRL); reg &= ~R92C_TRXDMA_CTRL_QMAP_M; reg |= 0xF771; rtwn_write_2(sc, R92C_TRXDMA_CTRL, reg); rtwn_write_4(sc, R92C_TCR, R92C_TCR_CFENDFORM | (1 << 12) | (1 << 13)); /* Configure Tx DMA. */ rtwn_write_4(sc, R92C_BKQ_DESA, sc->tx_ring[RTWN_BK_QUEUE].paddr); rtwn_write_4(sc, R92C_BEQ_DESA, sc->tx_ring[RTWN_BE_QUEUE].paddr); rtwn_write_4(sc, R92C_VIQ_DESA, sc->tx_ring[RTWN_VI_QUEUE].paddr); rtwn_write_4(sc, R92C_VOQ_DESA, sc->tx_ring[RTWN_VO_QUEUE].paddr); rtwn_write_4(sc, R92C_BCNQ_DESA, sc->tx_ring[RTWN_BEACON_QUEUE].paddr); rtwn_write_4(sc, R92C_MGQ_DESA, sc->tx_ring[RTWN_MGNT_QUEUE].paddr); rtwn_write_4(sc, R92C_HQ_DESA, sc->tx_ring[RTWN_HIGH_QUEUE].paddr); /* Configure Rx DMA. */ rtwn_write_4(sc, R92C_RX_DESA, sc->rx_ring.paddr); /* Set Tx/Rx transfer page boundary. */ rtwn_write_2(sc, R92C_TRXFF_BNDY + 2, 0x27ff); /* Set Tx/Rx transfer page size. */ rtwn_write_1(sc, R92C_PBP, SM(R92C_PBP_PSRX, R92C_PBP_128) | SM(R92C_PBP_PSTX, R92C_PBP_128)); return (0); } static void rtwn_mac_init(struct rtwn_softc *sc) { int i; /* Write MAC initialization values. */ for (i = 0; i < nitems(rtl8192ce_mac); i++) rtwn_write_1(sc, rtl8192ce_mac[i].reg, rtl8192ce_mac[i].val); } static void rtwn_bb_init(struct rtwn_softc *sc) { const struct rtwn_bb_prog *prog; uint32_t reg; int i; /* Enable BB and RF. */ rtwn_write_2(sc, R92C_SYS_FUNC_EN, rtwn_read_2(sc, R92C_SYS_FUNC_EN) | R92C_SYS_FUNC_EN_BBRSTB | R92C_SYS_FUNC_EN_BB_GLB_RST | R92C_SYS_FUNC_EN_DIO_RF); rtwn_write_2(sc, R92C_AFE_PLL_CTRL, 0xdb83); rtwn_write_1(sc, R92C_RF_CTRL, R92C_RF_CTRL_EN | R92C_RF_CTRL_RSTB | R92C_RF_CTRL_SDMRSTB); rtwn_write_1(sc, R92C_SYS_FUNC_EN, R92C_SYS_FUNC_EN_DIO_PCIE | R92C_SYS_FUNC_EN_PCIEA | R92C_SYS_FUNC_EN_PPLL | R92C_SYS_FUNC_EN_BB_GLB_RST | R92C_SYS_FUNC_EN_BBRSTB); rtwn_write_1(sc, R92C_AFE_XTAL_CTRL + 1, 0x80); rtwn_write_4(sc, R92C_LEDCFG0, rtwn_read_4(sc, R92C_LEDCFG0) | 0x00800000); /* Select BB programming. */ prog = (sc->chip & RTWN_CHIP_92C) ? &rtl8192ce_bb_prog_2t : &rtl8192ce_bb_prog_1t; /* Write BB initialization values. */ for (i = 0; i < prog->count; i++) { rtwn_bb_write(sc, prog->regs[i], prog->vals[i]); DELAY(1); } if (sc->chip & RTWN_CHIP_92C_1T2R) { /* 8192C 1T only configuration. */ reg = rtwn_bb_read(sc, R92C_FPGA0_TXINFO); reg = (reg & ~0x00000003) | 0x2; rtwn_bb_write(sc, R92C_FPGA0_TXINFO, reg); reg = rtwn_bb_read(sc, R92C_FPGA1_TXINFO); reg = (reg & ~0x00300033) | 0x00200022; rtwn_bb_write(sc, R92C_FPGA1_TXINFO, reg); reg = rtwn_bb_read(sc, R92C_CCK0_AFESETTING); reg = (reg & ~0xff000000) | 0x45 << 24; rtwn_bb_write(sc, R92C_CCK0_AFESETTING, reg); reg = rtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA); reg = (reg & ~0x000000ff) | 0x23; rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, reg); reg = rtwn_bb_read(sc, R92C_OFDM0_AGCPARAM1); reg = (reg & ~0x00000030) | 1 << 4; rtwn_bb_write(sc, R92C_OFDM0_AGCPARAM1, reg); reg = rtwn_bb_read(sc, 0xe74); reg = (reg & ~0x0c000000) | 2 << 26; rtwn_bb_write(sc, 0xe74, reg); reg = rtwn_bb_read(sc, 0xe78); reg = (reg & ~0x0c000000) | 2 << 26; rtwn_bb_write(sc, 0xe78, reg); reg = rtwn_bb_read(sc, 0xe7c); reg = (reg & ~0x0c000000) | 2 << 26; rtwn_bb_write(sc, 0xe7c, reg); reg = rtwn_bb_read(sc, 0xe80); reg = (reg & ~0x0c000000) | 2 << 26; rtwn_bb_write(sc, 0xe80, reg); reg = rtwn_bb_read(sc, 0xe88); reg = (reg & ~0x0c000000) | 2 << 26; rtwn_bb_write(sc, 0xe88, reg); } /* Write AGC values. */ for (i = 0; i < prog->agccount; i++) { rtwn_bb_write(sc, R92C_OFDM0_AGCRSSITABLE, prog->agcvals[i]); DELAY(1); } if (rtwn_bb_read(sc, R92C_HSSI_PARAM2(0)) & R92C_HSSI_PARAM2_CCK_HIPWR) sc->sc_flags |= RTWN_FLAG_CCK_HIPWR; } static void rtwn_rf_init(struct rtwn_softc *sc) { const struct rtwn_rf_prog *prog; uint32_t reg, type; int i, j, idx, off; /* Select RF programming based on board type. */ if (!(sc->chip & RTWN_CHIP_92C)) { if (sc->board_type == R92C_BOARD_TYPE_MINICARD) prog = rtl8188ce_rf_prog; else if (sc->board_type == R92C_BOARD_TYPE_HIGHPA) prog = rtl8188ru_rf_prog; else prog = rtl8188cu_rf_prog; } else prog = rtl8192ce_rf_prog; for (i = 0; i < sc->nrxchains; i++) { /* Save RF_ENV control type. */ idx = i / 2; off = (i % 2) * 16; reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(idx)); type = (reg >> off) & 0x10; /* Set RF_ENV enable. */ reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(i)); reg |= 0x100000; rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(i), reg); DELAY(1); /* Set RF_ENV output high. */ reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(i)); reg |= 0x10; rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(i), reg); DELAY(1); /* Set address and data lengths of RF registers. */ reg = rtwn_bb_read(sc, R92C_HSSI_PARAM2(i)); reg &= ~R92C_HSSI_PARAM2_ADDR_LENGTH; rtwn_bb_write(sc, R92C_HSSI_PARAM2(i), reg); DELAY(1); reg = rtwn_bb_read(sc, R92C_HSSI_PARAM2(i)); reg &= ~R92C_HSSI_PARAM2_DATA_LENGTH; rtwn_bb_write(sc, R92C_HSSI_PARAM2(i), reg); DELAY(1); /* Write RF initialization values for this chain. */ for (j = 0; j < prog[i].count; j++) { if (prog[i].regs[j] >= 0xf9 && prog[i].regs[j] <= 0xfe) { /* * These are fake RF registers offsets that * indicate a delay is required. */ DELAY(50); continue; } rtwn_rf_write(sc, i, prog[i].regs[j], prog[i].vals[j]); DELAY(1); } /* Restore RF_ENV control type. */ reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(idx)); reg &= ~(0x10 << off) | (type << off); rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(idx), reg); /* Cache RF register CHNLBW. */ sc->rf_chnlbw[i] = rtwn_rf_read(sc, i, R92C_RF_CHNLBW); } if ((sc->chip & (RTWN_CHIP_UMC_A_CUT | RTWN_CHIP_92C)) == RTWN_CHIP_UMC_A_CUT) { rtwn_rf_write(sc, 0, R92C_RF_RX_G1, 0x30255); rtwn_rf_write(sc, 0, R92C_RF_RX_G2, 0x50a00); } } static void rtwn_cam_init(struct rtwn_softc *sc) { /* Invalidate all CAM entries. */ rtwn_write_4(sc, R92C_CAMCMD, R92C_CAMCMD_POLLING | R92C_CAMCMD_CLR); } static void rtwn_pa_bias_init(struct rtwn_softc *sc) { uint8_t reg; int i; for (i = 0; i < sc->nrxchains; i++) { if (sc->pa_setting & (1 << i)) continue; rtwn_rf_write(sc, i, R92C_RF_IPA, 0x0f406); rtwn_rf_write(sc, i, R92C_RF_IPA, 0x4f406); rtwn_rf_write(sc, i, R92C_RF_IPA, 0x8f406); rtwn_rf_write(sc, i, R92C_RF_IPA, 0xcf406); } if (!(sc->pa_setting & 0x10)) { reg = rtwn_read_1(sc, 0x16); reg = (reg & ~0xf0) | 0x90; rtwn_write_1(sc, 0x16, reg); } } static void rtwn_rxfilter_init(struct rtwn_softc *sc) { /* Initialize Rx filter. */ /* TODO: use better filter for monitor mode. */ rtwn_write_4(sc, R92C_RCR, R92C_RCR_AAP | R92C_RCR_APM | R92C_RCR_AM | R92C_RCR_AB | R92C_RCR_APP_ICV | R92C_RCR_AMF | R92C_RCR_HTC_LOC_CTRL | R92C_RCR_APP_MIC | R92C_RCR_APP_PHYSTS); /* Accept all multicast frames. */ rtwn_write_4(sc, R92C_MAR + 0, 0xffffffff); rtwn_write_4(sc, R92C_MAR + 4, 0xffffffff); /* Accept all management frames. */ rtwn_write_2(sc, R92C_RXFLTMAP0, 0xffff); /* Reject all control frames. */ rtwn_write_2(sc, R92C_RXFLTMAP1, 0x0000); /* Accept all data frames. */ rtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff); } static void rtwn_edca_init(struct rtwn_softc *sc) { rtwn_write_2(sc, R92C_SPEC_SIFS, 0x1010); rtwn_write_2(sc, R92C_MAC_SPEC_SIFS, 0x1010); rtwn_write_2(sc, R92C_SIFS_CCK, 0x1010); rtwn_write_2(sc, R92C_SIFS_OFDM, 0x0e0e); rtwn_write_4(sc, R92C_EDCA_BE_PARAM, 0x005ea42b); rtwn_write_4(sc, R92C_EDCA_BK_PARAM, 0x0000a44f); rtwn_write_4(sc, R92C_EDCA_VI_PARAM, 0x005e4322); rtwn_write_4(sc, R92C_EDCA_VO_PARAM, 0x002f3222); } static void rtwn_write_txpower(struct rtwn_softc *sc, int chain, uint16_t power[RTWN_RIDX_COUNT]) { uint32_t reg; /* Write per-CCK rate Tx power. */ if (chain == 0) { reg = rtwn_bb_read(sc, R92C_TXAGC_A_CCK1_MCS32); reg = RW(reg, R92C_TXAGC_A_CCK1, power[0]); rtwn_bb_write(sc, R92C_TXAGC_A_CCK1_MCS32, reg); reg = rtwn_bb_read(sc, R92C_TXAGC_B_CCK11_A_CCK2_11); reg = RW(reg, R92C_TXAGC_A_CCK2, power[1]); reg = RW(reg, R92C_TXAGC_A_CCK55, power[2]); reg = RW(reg, R92C_TXAGC_A_CCK11, power[3]); rtwn_bb_write(sc, R92C_TXAGC_B_CCK11_A_CCK2_11, reg); } else { reg = rtwn_bb_read(sc, R92C_TXAGC_B_CCK1_55_MCS32); reg = RW(reg, R92C_TXAGC_B_CCK1, power[0]); reg = RW(reg, R92C_TXAGC_B_CCK2, power[1]); reg = RW(reg, R92C_TXAGC_B_CCK55, power[2]); rtwn_bb_write(sc, R92C_TXAGC_B_CCK1_55_MCS32, reg); reg = rtwn_bb_read(sc, R92C_TXAGC_B_CCK11_A_CCK2_11); reg = RW(reg, R92C_TXAGC_B_CCK11, power[3]); rtwn_bb_write(sc, R92C_TXAGC_B_CCK11_A_CCK2_11, reg); } /* Write per-OFDM rate Tx power. */ rtwn_bb_write(sc, R92C_TXAGC_RATE18_06(chain), SM(R92C_TXAGC_RATE06, power[ 4]) | SM(R92C_TXAGC_RATE09, power[ 5]) | SM(R92C_TXAGC_RATE12, power[ 6]) | SM(R92C_TXAGC_RATE18, power[ 7])); rtwn_bb_write(sc, R92C_TXAGC_RATE54_24(chain), SM(R92C_TXAGC_RATE24, power[ 8]) | SM(R92C_TXAGC_RATE36, power[ 9]) | SM(R92C_TXAGC_RATE48, power[10]) | SM(R92C_TXAGC_RATE54, power[11])); /* Write per-MCS Tx power. */ rtwn_bb_write(sc, R92C_TXAGC_MCS03_MCS00(chain), SM(R92C_TXAGC_MCS00, power[12]) | SM(R92C_TXAGC_MCS01, power[13]) | SM(R92C_TXAGC_MCS02, power[14]) | SM(R92C_TXAGC_MCS03, power[15])); rtwn_bb_write(sc, R92C_TXAGC_MCS07_MCS04(chain), SM(R92C_TXAGC_MCS04, power[16]) | SM(R92C_TXAGC_MCS05, power[17]) | SM(R92C_TXAGC_MCS06, power[18]) | SM(R92C_TXAGC_MCS07, power[19])); rtwn_bb_write(sc, R92C_TXAGC_MCS11_MCS08(chain), SM(R92C_TXAGC_MCS08, power[20]) | SM(R92C_TXAGC_MCS09, power[21]) | SM(R92C_TXAGC_MCS10, power[22]) | SM(R92C_TXAGC_MCS11, power[23])); rtwn_bb_write(sc, R92C_TXAGC_MCS15_MCS12(chain), SM(R92C_TXAGC_MCS12, power[24]) | SM(R92C_TXAGC_MCS13, power[25]) | SM(R92C_TXAGC_MCS14, power[26]) | SM(R92C_TXAGC_MCS15, power[27])); } static void rtwn_get_txpower(struct rtwn_softc *sc, int chain, struct ieee80211_channel *c, struct ieee80211_channel *extc, uint16_t power[RTWN_RIDX_COUNT]) { struct ieee80211com *ic = &sc->sc_ic; struct r92c_rom *rom = &sc->rom; uint16_t cckpow, ofdmpow, htpow, diff, max; const struct rtwn_txpwr *base; int ridx, chan, group; /* Determine channel group. */ chan = ieee80211_chan2ieee(ic, c); /* XXX center freq! */ if (chan <= 3) group = 0; else if (chan <= 9) group = 1; else group = 2; /* Get original Tx power based on board type and RF chain. */ if (!(sc->chip & RTWN_CHIP_92C)) { if (sc->board_type == R92C_BOARD_TYPE_HIGHPA) base = &rtl8188ru_txagc[chain]; else base = &rtl8192cu_txagc[chain]; } else base = &rtl8192cu_txagc[chain]; memset(power, 0, RTWN_RIDX_COUNT * sizeof(power[0])); if (sc->regulatory == 0) { for (ridx = 0; ridx <= 3; ridx++) power[ridx] = base->pwr[0][ridx]; } for (ridx = 4; ridx < RTWN_RIDX_COUNT; ridx++) { if (sc->regulatory == 3) { power[ridx] = base->pwr[0][ridx]; /* Apply vendor limits. */ if (extc != NULL) max = rom->ht40_max_pwr[group]; else max = rom->ht20_max_pwr[group]; max = (max >> (chain * 4)) & 0xf; if (power[ridx] > max) power[ridx] = max; } else if (sc->regulatory == 1) { if (extc == NULL) power[ridx] = base->pwr[group][ridx]; } else if (sc->regulatory != 2) power[ridx] = base->pwr[0][ridx]; } /* Compute per-CCK rate Tx power. */ cckpow = rom->cck_tx_pwr[chain][group]; for (ridx = 0; ridx <= 3; ridx++) { power[ridx] += cckpow; if (power[ridx] > R92C_MAX_TX_PWR) power[ridx] = R92C_MAX_TX_PWR; } htpow = rom->ht40_1s_tx_pwr[chain][group]; if (sc->ntxchains > 1) { /* Apply reduction for 2 spatial streams. */ diff = rom->ht40_2s_tx_pwr_diff[group]; diff = (diff >> (chain * 4)) & 0xf; htpow = (htpow > diff) ? htpow - diff : 0; } /* Compute per-OFDM rate Tx power. */ diff = rom->ofdm_tx_pwr_diff[group]; diff = (diff >> (chain * 4)) & 0xf; ofdmpow = htpow + diff; /* HT->OFDM correction. */ for (ridx = 4; ridx <= 11; ridx++) { power[ridx] += ofdmpow; if (power[ridx] > R92C_MAX_TX_PWR) power[ridx] = R92C_MAX_TX_PWR; } /* Compute per-MCS Tx power. */ if (extc == NULL) { diff = rom->ht20_tx_pwr_diff[group]; diff = (diff >> (chain * 4)) & 0xf; htpow += diff; /* HT40->HT20 correction. */ } for (ridx = 12; ridx <= 27; ridx++) { power[ridx] += htpow; if (power[ridx] > R92C_MAX_TX_PWR) power[ridx] = R92C_MAX_TX_PWR; } #ifdef RTWN_DEBUG if (sc->sc_debug >= 4) { /* Dump per-rate Tx power values. */ printf("Tx power for chain %d:\n", chain); for (ridx = 0; ridx < RTWN_RIDX_COUNT; ridx++) printf("Rate %d = %u\n", ridx, power[ridx]); } #endif } static void rtwn_set_txpower(struct rtwn_softc *sc, struct ieee80211_channel *c, struct ieee80211_channel *extc) { uint16_t power[RTWN_RIDX_COUNT]; int i; for (i = 0; i < sc->ntxchains; i++) { /* Compute per-rate Tx power values. */ rtwn_get_txpower(sc, i, c, extc, power); /* Write per-rate Tx power values to hardware. */ rtwn_write_txpower(sc, i, power); } } static void rtwn_scan_start(struct ieee80211com *ic) { /* XXX do nothing? */ } static void rtwn_scan_end(struct ieee80211com *ic) { /* XXX do nothing? */ } static void rtwn_set_channel(struct ieee80211com *ic) { struct rtwn_softc *sc = ic->ic_softc; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); RTWN_LOCK(sc); if (vap->iv_state == IEEE80211_S_SCAN) { /* Make link LED blink during scan. */ rtwn_set_led(sc, RTWN_LED_LINK, !sc->ledlink); } rtwn_set_chan(sc, ic->ic_curchan, NULL); RTWN_UNLOCK(sc); } static void rtwn_update_mcast(struct ieee80211com *ic) { /* XXX do nothing? */ } static void rtwn_set_chan(struct rtwn_softc *sc, struct ieee80211_channel *c, struct ieee80211_channel *extc) { struct ieee80211com *ic = &sc->sc_ic; u_int chan; int i; chan = ieee80211_chan2ieee(ic, c); /* XXX center freq! */ if (chan == 0 || chan == IEEE80211_CHAN_ANY) { device_printf(sc->sc_dev, "%s: invalid channel %x\n", __func__, chan); return; } /* Set Tx power for this new channel. */ rtwn_set_txpower(sc, c, extc); for (i = 0; i < sc->nrxchains; i++) { rtwn_rf_write(sc, i, R92C_RF_CHNLBW, RW(sc->rf_chnlbw[i], R92C_RF_CHNLBW_CHNL, chan)); } #ifndef IEEE80211_NO_HT if (extc != NULL) { uint32_t reg; /* Is secondary channel below or above primary? */ int prichlo = c->ic_freq < extc->ic_freq; rtwn_write_1(sc, R92C_BWOPMODE, rtwn_read_1(sc, R92C_BWOPMODE) & ~R92C_BWOPMODE_20MHZ); reg = rtwn_read_1(sc, R92C_RRSR + 2); reg = (reg & ~0x6f) | (prichlo ? 1 : 2) << 5; rtwn_write_1(sc, R92C_RRSR + 2, reg); rtwn_bb_write(sc, R92C_FPGA0_RFMOD, rtwn_bb_read(sc, R92C_FPGA0_RFMOD) | R92C_RFMOD_40MHZ); rtwn_bb_write(sc, R92C_FPGA1_RFMOD, rtwn_bb_read(sc, R92C_FPGA1_RFMOD) | R92C_RFMOD_40MHZ); /* Set CCK side band. */ reg = rtwn_bb_read(sc, R92C_CCK0_SYSTEM); reg = (reg & ~0x00000010) | (prichlo ? 0 : 1) << 4; rtwn_bb_write(sc, R92C_CCK0_SYSTEM, reg); reg = rtwn_bb_read(sc, R92C_OFDM1_LSTF); reg = (reg & ~0x00000c00) | (prichlo ? 1 : 2) << 10; rtwn_bb_write(sc, R92C_OFDM1_LSTF, reg); rtwn_bb_write(sc, R92C_FPGA0_ANAPARAM2, rtwn_bb_read(sc, R92C_FPGA0_ANAPARAM2) & ~R92C_FPGA0_ANAPARAM2_CBW20); reg = rtwn_bb_read(sc, 0x818); reg = (reg & ~0x0c000000) | (prichlo ? 2 : 1) << 26; rtwn_bb_write(sc, 0x818, reg); /* Select 40MHz bandwidth. */ rtwn_rf_write(sc, 0, R92C_RF_CHNLBW, (sc->rf_chnlbw[0] & ~0xfff) | chan); } else #endif { rtwn_write_1(sc, R92C_BWOPMODE, rtwn_read_1(sc, R92C_BWOPMODE) | R92C_BWOPMODE_20MHZ); rtwn_bb_write(sc, R92C_FPGA0_RFMOD, rtwn_bb_read(sc, R92C_FPGA0_RFMOD) & ~R92C_RFMOD_40MHZ); rtwn_bb_write(sc, R92C_FPGA1_RFMOD, rtwn_bb_read(sc, R92C_FPGA1_RFMOD) & ~R92C_RFMOD_40MHZ); rtwn_bb_write(sc, R92C_FPGA0_ANAPARAM2, rtwn_bb_read(sc, R92C_FPGA0_ANAPARAM2) | R92C_FPGA0_ANAPARAM2_CBW20); /* Select 20MHz bandwidth. */ rtwn_rf_write(sc, 0, R92C_RF_CHNLBW, (sc->rf_chnlbw[0] & ~0xfff) | R92C_RF_CHNLBW_BW20 | chan); } } static int rtwn_iq_calib_chain(struct rtwn_softc *sc, int chain, uint16_t tx[2], uint16_t rx[2]) { uint32_t status; int offset = chain * 0x20; if (chain == 0) { /* IQ calibration for chain 0. */ /* IQ calibration settings for chain 0. */ rtwn_bb_write(sc, 0xe30, 0x10008c1f); rtwn_bb_write(sc, 0xe34, 0x10008c1f); rtwn_bb_write(sc, 0xe38, 0x82140102); if (sc->ntxchains > 1) { rtwn_bb_write(sc, 0xe3c, 0x28160202); /* 2T */ /* IQ calibration settings for chain 1. */ rtwn_bb_write(sc, 0xe50, 0x10008c22); rtwn_bb_write(sc, 0xe54, 0x10008c22); rtwn_bb_write(sc, 0xe58, 0x82140102); rtwn_bb_write(sc, 0xe5c, 0x28160202); } else rtwn_bb_write(sc, 0xe3c, 0x28160502); /* 1T */ /* LO calibration settings. */ rtwn_bb_write(sc, 0xe4c, 0x001028d1); /* We're doing LO and IQ calibration in one shot. */ rtwn_bb_write(sc, 0xe48, 0xf9000000); rtwn_bb_write(sc, 0xe48, 0xf8000000); } else { /* IQ calibration for chain 1. */ /* We're doing LO and IQ calibration in one shot. */ rtwn_bb_write(sc, 0xe60, 0x00000002); rtwn_bb_write(sc, 0xe60, 0x00000000); } /* Give LO and IQ calibrations the time to complete. */ DELAY(1000); /* Read IQ calibration status. */ status = rtwn_bb_read(sc, 0xeac); if (status & (1 << (28 + chain * 3))) return (0); /* Tx failed. */ /* Read Tx IQ calibration results. */ tx[0] = (rtwn_bb_read(sc, 0xe94 + offset) >> 16) & 0x3ff; tx[1] = (rtwn_bb_read(sc, 0xe9c + offset) >> 16) & 0x3ff; if (tx[0] == 0x142 || tx[1] == 0x042) return (0); /* Tx failed. */ if (status & (1 << (27 + chain * 3))) return (1); /* Rx failed. */ /* Read Rx IQ calibration results. */ rx[0] = (rtwn_bb_read(sc, 0xea4 + offset) >> 16) & 0x3ff; rx[1] = (rtwn_bb_read(sc, 0xeac + offset) >> 16) & 0x3ff; if (rx[0] == 0x132 || rx[1] == 0x036) return (1); /* Rx failed. */ return (3); /* Both Tx and Rx succeeded. */ } static void rtwn_iq_calib_run(struct rtwn_softc *sc, int n, uint16_t tx[2][2], uint16_t rx[2][2]) { /* Registers to save and restore during IQ calibration. */ struct iq_cal_regs { uint32_t adda[16]; uint8_t txpause; uint8_t bcn_ctrl; uint8_t ustime_tsf; uint32_t gpio_muxcfg; uint32_t ofdm0_trxpathena; uint32_t ofdm0_trmuxpar; uint32_t fpga0_rfifacesw1; } iq_cal_regs; static const uint16_t reg_adda[16] = { 0x85c, 0xe6c, 0xe70, 0xe74, 0xe78, 0xe7c, 0xe80, 0xe84, 0xe88, 0xe8c, 0xed0, 0xed4, 0xed8, 0xedc, 0xee0, 0xeec }; int i, chain; uint32_t hssi_param1; if (n == 0) { for (i = 0; i < nitems(reg_adda); i++) iq_cal_regs.adda[i] = rtwn_bb_read(sc, reg_adda[i]); iq_cal_regs.txpause = rtwn_read_1(sc, R92C_TXPAUSE); iq_cal_regs.bcn_ctrl = rtwn_read_1(sc, R92C_BCN_CTRL); iq_cal_regs.ustime_tsf = rtwn_read_1(sc, R92C_USTIME_TSF); iq_cal_regs.gpio_muxcfg = rtwn_read_4(sc, R92C_GPIO_MUXCFG); } if (sc->ntxchains == 1) { rtwn_bb_write(sc, reg_adda[0], 0x0b1b25a0); for (i = 1; i < nitems(reg_adda); i++) rtwn_bb_write(sc, reg_adda[i], 0x0bdb25a0); } else { for (i = 0; i < nitems(reg_adda); i++) rtwn_bb_write(sc, reg_adda[i], 0x04db25a4); } hssi_param1 = rtwn_bb_read(sc, R92C_HSSI_PARAM1(0)); if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) { rtwn_bb_write(sc, R92C_HSSI_PARAM1(0), hssi_param1 | R92C_HSSI_PARAM1_PI); rtwn_bb_write(sc, R92C_HSSI_PARAM1(1), hssi_param1 | R92C_HSSI_PARAM1_PI); } if (n == 0) { iq_cal_regs.ofdm0_trxpathena = rtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA); iq_cal_regs.ofdm0_trmuxpar = rtwn_bb_read(sc, R92C_OFDM0_TRMUXPAR); iq_cal_regs.fpga0_rfifacesw1 = rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(1)); } rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, 0x03a05600); rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, 0x000800e4); rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), 0x22204000); if (sc->ntxchains > 1) { rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00010000); rtwn_bb_write(sc, R92C_LSSI_PARAM(1), 0x00010000); } rtwn_write_1(sc, R92C_TXPAUSE, 0x3f); rtwn_write_1(sc, R92C_BCN_CTRL, iq_cal_regs.bcn_ctrl & ~(0x08)); rtwn_write_1(sc, R92C_USTIME_TSF, iq_cal_regs.ustime_tsf & ~(0x08)); rtwn_write_1(sc, R92C_GPIO_MUXCFG, iq_cal_regs.gpio_muxcfg & ~(0x20)); rtwn_bb_write(sc, 0x0b68, 0x00080000); if (sc->ntxchains > 1) rtwn_bb_write(sc, 0x0b6c, 0x00080000); rtwn_bb_write(sc, 0x0e28, 0x80800000); rtwn_bb_write(sc, 0x0e40, 0x01007c00); rtwn_bb_write(sc, 0x0e44, 0x01004800); rtwn_bb_write(sc, 0x0b68, 0x00080000); for (chain = 0; chain < sc->ntxchains; chain++) { if (chain > 0) { /* Put chain 0 on standby. */ rtwn_bb_write(sc, 0x0e28, 0x00); rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00010000); rtwn_bb_write(sc, 0x0e28, 0x80800000); /* Enable chain 1. */ for (i = 0; i < nitems(reg_adda); i++) rtwn_bb_write(sc, reg_adda[i], 0x0b1b25a4); } /* Run IQ calibration twice. */ for (i = 0; i < 2; i++) { int ret; ret = rtwn_iq_calib_chain(sc, chain, tx[chain], rx[chain]); if (ret == 0) { DPRINTF(("%s: chain %d: Tx failed.\n", __func__, chain)); tx[chain][0] = 0xff; tx[chain][1] = 0xff; rx[chain][0] = 0xff; rx[chain][1] = 0xff; } else if (ret == 1) { DPRINTF(("%s: chain %d: Rx failed.\n", __func__, chain)); rx[chain][0] = 0xff; rx[chain][1] = 0xff; } else if (ret == 3) { DPRINTF(("%s: chain %d: Both Tx and Rx " "succeeded.\n", __func__, chain)); } } DPRINTF(("%s: results for run %d chain %d: tx[0]=0x%x, " "tx[1]=0x%x rx[0]=0x%x rx[1]=0x%x\n", __func__, n, chain, tx[chain][0], tx[chain][1], rx[chain][0], rx[chain][1])); } rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, iq_cal_regs.ofdm0_trxpathena); rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), iq_cal_regs.fpga0_rfifacesw1); rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, iq_cal_regs.ofdm0_trmuxpar); rtwn_bb_write(sc, 0x0e28, 0x00); rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00032ed3); if (sc->ntxchains > 1) rtwn_bb_write(sc, R92C_LSSI_PARAM(1), 0x00032ed3); if (n != 0) { if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) { rtwn_bb_write(sc, R92C_HSSI_PARAM1(0), hssi_param1); rtwn_bb_write(sc, R92C_HSSI_PARAM1(1), hssi_param1); } for (i = 0; i < nitems(reg_adda); i++) rtwn_bb_write(sc, reg_adda[i], iq_cal_regs.adda[i]); rtwn_write_1(sc, R92C_TXPAUSE, iq_cal_regs.txpause); rtwn_write_1(sc, R92C_BCN_CTRL, iq_cal_regs.bcn_ctrl); rtwn_write_1(sc, R92C_USTIME_TSF, iq_cal_regs.ustime_tsf); rtwn_write_4(sc, R92C_GPIO_MUXCFG, iq_cal_regs.gpio_muxcfg); } } #define RTWN_IQ_CAL_MAX_TOLERANCE 5 static int rtwn_iq_calib_compare_results(uint16_t tx1[2][2], uint16_t rx1[2][2], uint16_t tx2[2][2], uint16_t rx2[2][2], int ntxchains) { int chain, i, tx_ok[2], rx_ok[2]; tx_ok[0] = tx_ok[1] = rx_ok[0] = rx_ok[1] = 0; for (chain = 0; chain < ntxchains; chain++) { for (i = 0; i < 2; i++) { if (tx1[chain][i] == 0xff || tx2[chain][i] == 0xff || rx1[chain][i] == 0xff || rx2[chain][i] == 0xff) continue; tx_ok[chain] = (abs(tx1[chain][i] - tx2[chain][i]) <= RTWN_IQ_CAL_MAX_TOLERANCE); rx_ok[chain] = (abs(rx1[chain][i] - rx2[chain][i]) <= RTWN_IQ_CAL_MAX_TOLERANCE); } } if (ntxchains > 1) return (tx_ok[0] && tx_ok[1] && rx_ok[0] && rx_ok[1]); else return (tx_ok[0] && rx_ok[0]); } #undef RTWN_IQ_CAL_MAX_TOLERANCE static void rtwn_iq_calib_write_results(struct rtwn_softc *sc, uint16_t tx[2], uint16_t rx[2], int chain) { uint32_t reg, val, x; long y, tx_c; if (tx[0] == 0xff || tx[1] == 0xff) return; reg = rtwn_bb_read(sc, R92C_OFDM0_TXIQIMBALANCE(chain)); val = ((reg >> 22) & 0x3ff); x = tx[0]; if (x & 0x0200) x |= 0xfc00; reg = (((x * val) >> 8) & 0x3ff); rtwn_bb_write(sc, R92C_OFDM0_TXIQIMBALANCE(chain), reg); reg = rtwn_bb_read(sc, R92C_OFDM0_ECCATHRESHOLD); if (((x * val) >> 7) & 0x01) reg |= 0x80000000; else reg &= ~0x80000000; rtwn_bb_write(sc, R92C_OFDM0_ECCATHRESHOLD, reg); y = tx[1]; if (y & 0x00000200) y |= 0xfffffc00; tx_c = (y * val) >> 8; reg = rtwn_bb_read(sc, R92C_OFDM0_TXAFE(chain)); reg |= ((((tx_c & 0x3c0) >> 6) << 24) & 0xf0000000); rtwn_bb_write(sc, R92C_OFDM0_TXAFE(chain), reg); reg = rtwn_bb_read(sc, R92C_OFDM0_TXIQIMBALANCE(chain)); reg |= (((tx_c & 0x3f) << 16) & 0x003F0000); rtwn_bb_write(sc, R92C_OFDM0_TXIQIMBALANCE(chain), reg); reg = rtwn_bb_read(sc, R92C_OFDM0_ECCATHRESHOLD); if (((y * val) >> 7) & 0x01) reg |= 0x20000000; else reg &= ~0x20000000; rtwn_bb_write(sc, R92C_OFDM0_ECCATHRESHOLD, reg); if (rx[0] == 0xff || rx[1] == 0xff) return; reg = rtwn_bb_read(sc, R92C_OFDM0_RXIQIMBALANCE(chain)); reg |= (rx[0] & 0x3ff); rtwn_bb_write(sc, R92C_OFDM0_RXIQIMBALANCE(chain), reg); reg |= (((rx[1] & 0x03f) << 8) & 0xFC00); rtwn_bb_write(sc, R92C_OFDM0_RXIQIMBALANCE(chain), reg); if (chain == 0) { reg = rtwn_bb_read(sc, R92C_OFDM0_RXIQEXTANTA); reg |= (((rx[1] & 0xf) >> 6) & 0x000f); rtwn_bb_write(sc, R92C_OFDM0_RXIQEXTANTA, reg); } else { reg = rtwn_bb_read(sc, R92C_OFDM0_AGCRSSITABLE); reg |= ((((rx[1] & 0xf) >> 6) << 12) & 0xf000); rtwn_bb_write(sc, R92C_OFDM0_AGCRSSITABLE, reg); } } #define RTWN_IQ_CAL_NRUN 3 static void rtwn_iq_calib(struct rtwn_softc *sc) { uint16_t tx[RTWN_IQ_CAL_NRUN][2][2], rx[RTWN_IQ_CAL_NRUN][2][2]; int n, valid; valid = 0; for (n = 0; n < RTWN_IQ_CAL_NRUN; n++) { rtwn_iq_calib_run(sc, n, tx[n], rx[n]); if (n == 0) continue; /* Valid results remain stable after consecutive runs. */ valid = rtwn_iq_calib_compare_results(tx[n - 1], rx[n - 1], tx[n], rx[n], sc->ntxchains); if (valid) break; } if (valid) { rtwn_iq_calib_write_results(sc, tx[n][0], rx[n][0], 0); if (sc->ntxchains > 1) rtwn_iq_calib_write_results(sc, tx[n][1], rx[n][1], 1); } } #undef RTWN_IQ_CAL_NRUN static void rtwn_lc_calib(struct rtwn_softc *sc) { uint32_t rf_ac[2]; uint8_t txmode; int i; txmode = rtwn_read_1(sc, R92C_OFDM1_LSTF + 3); if ((txmode & 0x70) != 0) { /* Disable all continuous Tx. */ rtwn_write_1(sc, R92C_OFDM1_LSTF + 3, txmode & ~0x70); /* Set RF mode to standby mode. */ for (i = 0; i < sc->nrxchains; i++) { rf_ac[i] = rtwn_rf_read(sc, i, R92C_RF_AC); rtwn_rf_write(sc, i, R92C_RF_AC, RW(rf_ac[i], R92C_RF_AC_MODE, R92C_RF_AC_MODE_STANDBY)); } } else { /* Block all Tx queues. */ rtwn_write_1(sc, R92C_TXPAUSE, 0xff); } /* Start calibration. */ rtwn_rf_write(sc, 0, R92C_RF_CHNLBW, rtwn_rf_read(sc, 0, R92C_RF_CHNLBW) | R92C_RF_CHNLBW_LCSTART); /* Give calibration the time to complete. */ DELAY(100); /* Restore configuration. */ if ((txmode & 0x70) != 0) { /* Restore Tx mode. */ rtwn_write_1(sc, R92C_OFDM1_LSTF + 3, txmode); /* Restore RF mode. */ for (i = 0; i < sc->nrxchains; i++) rtwn_rf_write(sc, i, R92C_RF_AC, rf_ac[i]); } else { /* Unblock all Tx queues. */ rtwn_write_1(sc, R92C_TXPAUSE, 0x00); } } static void rtwn_temp_calib(struct rtwn_softc *sc) { int temp; if (sc->thcal_state == 0) { /* Start measuring temperature. */ rtwn_rf_write(sc, 0, R92C_RF_T_METER, 0x60); sc->thcal_state = 1; return; } sc->thcal_state = 0; /* Read measured temperature. */ temp = rtwn_rf_read(sc, 0, R92C_RF_T_METER) & 0x1f; if (temp == 0) /* Read failed, skip. */ return; DPRINTFN(2, ("temperature=%d\n", temp)); /* * Redo IQ and LC calibration if temperature changed significantly * since last calibration. */ if (sc->thcal_lctemp == 0) { /* First calibration is performed in rtwn_init(). */ sc->thcal_lctemp = temp; } else if (abs(temp - sc->thcal_lctemp) > 1) { DPRINTF(("IQ/LC calib triggered by temp: %d -> %d\n", sc->thcal_lctemp, temp)); rtwn_iq_calib(sc); rtwn_lc_calib(sc); /* Record temperature of last calibration. */ sc->thcal_lctemp = temp; } } static void rtwn_init_locked(struct rtwn_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); uint32_t reg; uint8_t macaddr[IEEE80211_ADDR_LEN]; int i, error; RTWN_LOCK_ASSERT(sc); /* Init firmware commands ring. */ sc->fwcur = 0; /* Power on adapter. */ error = rtwn_power_on(sc); if (error != 0) { device_printf(sc->sc_dev, "could not power on adapter\n"); goto fail; } /* Initialize DMA. */ error = rtwn_dma_init(sc); if (error != 0) { device_printf(sc->sc_dev, "could not initialize DMA\n"); goto fail; } /* Set info size in Rx descriptors (in 64-bit words). */ rtwn_write_1(sc, R92C_RX_DRVINFO_SZ, 4); /* Disable interrupts. */ rtwn_write_4(sc, R92C_HISR, 0x00000000); rtwn_write_4(sc, R92C_HIMR, 0x00000000); /* Set MAC address. */ IEEE80211_ADDR_COPY(macaddr, vap ? vap->iv_myaddr : ic->ic_macaddr); for (i = 0; i < IEEE80211_ADDR_LEN; i++) rtwn_write_1(sc, R92C_MACID + i, macaddr[i]); /* Set initial network type. */ reg = rtwn_read_4(sc, R92C_CR); reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_INFRA); rtwn_write_4(sc, R92C_CR, reg); rtwn_rxfilter_init(sc); reg = rtwn_read_4(sc, R92C_RRSR); reg = RW(reg, R92C_RRSR_RATE_BITMAP, R92C_RRSR_RATE_ALL); rtwn_write_4(sc, R92C_RRSR, reg); /* Set short/long retry limits. */ rtwn_write_2(sc, R92C_RL, SM(R92C_RL_SRL, 0x07) | SM(R92C_RL_LRL, 0x07)); /* Initialize EDCA parameters. */ rtwn_edca_init(sc); /* Set data and response automatic rate fallback retry counts. */ rtwn_write_4(sc, R92C_DARFRC + 0, 0x01000000); rtwn_write_4(sc, R92C_DARFRC + 4, 0x07060504); rtwn_write_4(sc, R92C_RARFRC + 0, 0x01000000); rtwn_write_4(sc, R92C_RARFRC + 4, 0x07060504); rtwn_write_2(sc, R92C_FWHW_TXQ_CTRL, 0x1f80); /* Set ACK timeout. */ rtwn_write_1(sc, R92C_ACKTO, 0x40); /* Initialize beacon parameters. */ rtwn_write_2(sc, R92C_TBTT_PROHIBIT, 0x6404); rtwn_write_1(sc, R92C_DRVERLYINT, 0x05); rtwn_write_1(sc, R92C_BCNDMATIM, 0x02); rtwn_write_2(sc, R92C_BCNTCFG, 0x660f); /* Setup AMPDU aggregation. */ rtwn_write_4(sc, R92C_AGGLEN_LMT, 0x99997631); /* MCS7~0 */ rtwn_write_1(sc, R92C_AGGR_BREAK_TIME, 0x16); rtwn_write_1(sc, R92C_BCN_MAX_ERR, 0xff); rtwn_write_1(sc, R92C_BCN_CTRL, R92C_BCN_CTRL_DIS_TSF_UDT0); rtwn_write_4(sc, R92C_PIFS, 0x1c); rtwn_write_4(sc, R92C_MCUTST_1, 0x0); /* Load 8051 microcode. */ error = rtwn_load_firmware(sc); if (error != 0) goto fail; /* Initialize MAC/BB/RF blocks. */ rtwn_mac_init(sc); rtwn_bb_init(sc); rtwn_rf_init(sc); /* Turn CCK and OFDM blocks on. */ reg = rtwn_bb_read(sc, R92C_FPGA0_RFMOD); reg |= R92C_RFMOD_CCK_EN; rtwn_bb_write(sc, R92C_FPGA0_RFMOD, reg); reg = rtwn_bb_read(sc, R92C_FPGA0_RFMOD); reg |= R92C_RFMOD_OFDM_EN; rtwn_bb_write(sc, R92C_FPGA0_RFMOD, reg); /* Clear per-station keys table. */ rtwn_cam_init(sc); /* Enable hardware sequence numbering. */ rtwn_write_1(sc, R92C_HWSEQ_CTRL, 0xff); /* Perform LO and IQ calibrations. */ rtwn_iq_calib(sc); /* Perform LC calibration. */ rtwn_lc_calib(sc); rtwn_pa_bias_init(sc); /* Initialize GPIO setting. */ rtwn_write_1(sc, R92C_GPIO_MUXCFG, rtwn_read_1(sc, R92C_GPIO_MUXCFG) & ~R92C_GPIO_MUXCFG_ENBT); /* Fix for lower temperature. */ rtwn_write_1(sc, 0x15, 0xe9); /* CLear pending interrupts. */ rtwn_write_4(sc, R92C_HISR, 0xffffffff); /* Enable interrupts. */ rtwn_write_4(sc, R92C_HIMR, RTWN_INT_ENABLE); sc->sc_flags |= RTWN_RUNNING; callout_reset(&sc->watchdog_to, hz, rtwn_watchdog, sc); return; fail: rtwn_stop_locked(sc); } static void rtwn_init(struct rtwn_softc *sc) { RTWN_LOCK(sc); rtwn_init_locked(sc); RTWN_UNLOCK(sc); if (sc->sc_flags & RTWN_RUNNING) ieee80211_start_all(&sc->sc_ic); } static void rtwn_stop_locked(struct rtwn_softc *sc) { uint16_t reg; int i; RTWN_LOCK_ASSERT(sc); sc->sc_tx_timer = 0; callout_stop(&sc->watchdog_to); callout_stop(&sc->calib_to); sc->sc_flags &= ~RTWN_RUNNING; /* Disable interrupts. */ rtwn_write_4(sc, R92C_HISR, 0x00000000); rtwn_write_4(sc, R92C_HIMR, 0x00000000); /* Stop hardware. */ rtwn_write_1(sc, R92C_TXPAUSE, 0xff); rtwn_write_1(sc, R92C_RF_CTRL, 0x00); reg = rtwn_read_1(sc, R92C_SYS_FUNC_EN); reg |= R92C_SYS_FUNC_EN_BB_GLB_RST; rtwn_write_1(sc, R92C_SYS_FUNC_EN, reg); reg &= ~R92C_SYS_FUNC_EN_BB_GLB_RST; rtwn_write_1(sc, R92C_SYS_FUNC_EN, reg); reg = rtwn_read_2(sc, R92C_CR); reg &= ~(R92C_CR_HCI_TXDMA_EN | R92C_CR_HCI_RXDMA_EN | R92C_CR_TXDMA_EN | R92C_CR_RXDMA_EN | R92C_CR_PROTOCOL_EN | R92C_CR_SCHEDULE_EN | R92C_CR_MACTXEN | R92C_CR_MACRXEN | R92C_CR_ENSEC); rtwn_write_2(sc, R92C_CR, reg); if (rtwn_read_1(sc, R92C_MCUFWDL) & R92C_MCUFWDL_RAM_DL_SEL) rtwn_fw_reset(sc); /* TODO: linux does additional btcoex stuff here */ rtwn_write_2(sc, R92C_AFE_PLL_CTRL, 0x80); /* linux magic number */ rtwn_write_1(sc, R92C_SPS0_CTRL, 0x23); /* ditto */ rtwn_write_1(sc, R92C_AFE_XTAL_CTRL, 0x0e); /* different with btcoex */ rtwn_write_1(sc, R92C_RSV_CTRL, 0x0e); rtwn_write_1(sc, R92C_APS_FSMCO, R92C_APS_FSMCO_PDN_EN); for (i = 0; i < RTWN_NTXQUEUES; i++) rtwn_reset_tx_list(sc, i); rtwn_reset_rx_list(sc); } static void rtwn_stop(struct rtwn_softc *sc) { RTWN_LOCK(sc); rtwn_stop_locked(sc); RTWN_UNLOCK(sc); } static void rtwn_intr(void *arg) { struct rtwn_softc *sc = arg; uint32_t status; int i; RTWN_LOCK(sc); status = rtwn_read_4(sc, R92C_HISR); if (status == 0 || status == 0xffffffff) { RTWN_UNLOCK(sc); return; } /* Disable interrupts. */ rtwn_write_4(sc, R92C_HIMR, 0x00000000); /* Ack interrupts. */ rtwn_write_4(sc, R92C_HISR, status); /* Vendor driver treats RX errors like ROK... */ if (status & (R92C_IMR_ROK | R92C_IMR_RXFOVW | R92C_IMR_RDU)) { bus_dmamap_sync(sc->rx_ring.desc_dmat, sc->rx_ring.desc_map, BUS_DMASYNC_POSTREAD); for (i = 0; i < RTWN_RX_LIST_COUNT; i++) { struct r92c_rx_desc *rx_desc = &sc->rx_ring.desc[i]; struct rtwn_rx_data *rx_data = &sc->rx_ring.rx_data[i]; if (le32toh(rx_desc->rxdw0) & R92C_RXDW0_OWN) continue; rtwn_rx_frame(sc, rx_desc, rx_data, i); } } if (status & R92C_IMR_BDOK) rtwn_tx_done(sc, RTWN_BEACON_QUEUE); if (status & R92C_IMR_HIGHDOK) rtwn_tx_done(sc, RTWN_HIGH_QUEUE); if (status & R92C_IMR_MGNTDOK) rtwn_tx_done(sc, RTWN_MGNT_QUEUE); if (status & R92C_IMR_BKDOK) rtwn_tx_done(sc, RTWN_BK_QUEUE); if (status & R92C_IMR_BEDOK) rtwn_tx_done(sc, RTWN_BE_QUEUE); if (status & R92C_IMR_VIDOK) rtwn_tx_done(sc, RTWN_VI_QUEUE); if (status & R92C_IMR_VODOK) rtwn_tx_done(sc, RTWN_VO_QUEUE); /* Enable interrupts. */ rtwn_write_4(sc, R92C_HIMR, RTWN_INT_ENABLE); RTWN_UNLOCK(sc); } static void rtwn_hw_reset(void *arg0, int pending) { struct rtwn_softc *sc = arg0; struct ieee80211com *ic = &sc->sc_ic; rtwn_stop(sc); rtwn_init(sc); ieee80211_notify_radio(ic, 1); } Index: projects/clang380-import/sys/dev/sfxge/common/efsys.h =================================================================== --- projects/clang380-import/sys/dev/sfxge/common/efsys.h (revision 293686) +++ projects/clang380-import/sys/dev/sfxge/common/efsys.h (revision 293687) @@ -1,1230 +1,1231 @@ /*- * Copyright (c) 2010-2015 Solarflare Communications Inc. * All rights reserved. * * This software was developed in part by Philip Paeps under contract for * Solarflare Communications, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation are * those of the authors and should not be interpreted as representing official * policies, either expressed or implied, of the FreeBSD Project. * * $FreeBSD$ */ #ifndef _SYS_EFSYS_H #define _SYS_EFSYS_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include #include #include #include #include #include #include #include #include #define EFSYS_HAS_UINT64 1 #if defined(__x86_64__) #define EFSYS_USE_UINT64 1 #else #define EFSYS_USE_UINT64 0 #endif #define EFSYS_HAS_SSE2_M128 0 #if _BYTE_ORDER == _BIG_ENDIAN #define EFSYS_IS_BIG_ENDIAN 1 #define EFSYS_IS_LITTLE_ENDIAN 0 #elif _BYTE_ORDER == _LITTLE_ENDIAN #define EFSYS_IS_BIG_ENDIAN 0 #define EFSYS_IS_LITTLE_ENDIAN 1 #endif #include "efx_types.h" /* Common code requires this */ #if __FreeBSD_version < 800068 #define memmove(d, s, l) bcopy(s, d, l) #endif /* FreeBSD equivalents of Solaris things */ #ifndef _NOTE #define _NOTE(s) #endif #ifndef B_FALSE #define B_FALSE FALSE #endif #ifndef B_TRUE #define B_TRUE TRUE #endif #ifndef IS_P2ALIGNED #define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0) #endif #ifndef P2ROUNDUP #define P2ROUNDUP(x, align) (-(-(x) & -(align))) #endif #ifndef P2ALIGN #define P2ALIGN(_x, _a) ((_x) & -(_a)) #endif #ifndef IS2P #define ISP2(x) (((x) & ((x) - 1)) == 0) #endif #if defined(__x86_64__) && __FreeBSD_version >= 1000000 #define SFXGE_USE_BUS_SPACE_8 1 #if !defined(bus_space_read_stream_8) #define bus_space_read_stream_8(t, h, o) \ bus_space_read_8((t), (h), (o)) #define bus_space_write_stream_8(t, h, o, v) \ bus_space_write_8((t), (h), (o), (v)) #endif #endif #define ENOTACTIVE EINVAL /* Memory type to use on FreeBSD */ MALLOC_DECLARE(M_SFXGE); /* Machine dependend prefetch wrappers */ #if defined(__i386__) || defined(__amd64__) static __inline void prefetch_read_many(void *addr) { __asm__( "prefetcht0 (%0)" : : "r" (addr)); } static __inline void prefetch_read_once(void *addr) { __asm__( "prefetchnta (%0)" : : "r" (addr)); } #elif defined(__sparc64__) static __inline void prefetch_read_many(void *addr) { __asm__( "prefetch [%0], 0" : : "r" (addr)); } static __inline void prefetch_read_once(void *addr) { __asm__( "prefetch [%0], 1" : : "r" (addr)); } #else static __inline void prefetch_read_many(void *addr) { } static __inline void prefetch_read_once(void *addr) { } #endif #if defined(__i386__) || defined(__amd64__) #include #include #endif static __inline void sfxge_map_mbuf_fast(bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf *m, bus_dma_segment_t *seg) { #if defined(__i386__) || defined(__amd64__) seg->ds_addr = pmap_kextract(mtod(m, vm_offset_t)); seg->ds_len = m->m_len; #else int nsegstmp; bus_dmamap_load_mbuf_sg(tag, map, m, seg, &nsegstmp, 0); #endif } /* Modifiers used for Windows builds */ #define __in #define __in_opt #define __in_ecount(_n) #define __in_ecount_opt(_n) #define __in_bcount(_n) #define __in_bcount_opt(_n) #define __out #define __out_opt #define __out_ecount(_n) #define __out_ecount_opt(_n) #define __out_bcount(_n) #define __out_bcount_opt(_n) #define __deref_out #define __inout #define __inout_opt #define __inout_ecount(_n) #define __inout_ecount_opt(_n) #define __inout_bcount(_n) #define __inout_bcount_opt(_n) #define __inout_bcount_full_opt(_n) #define __deref_out_bcount_opt(n) #define __checkReturn #define __success(_x) #define __drv_when(_p, _c) /* Code inclusion options */ #define EFSYS_OPT_NAMES 1 #define EFSYS_OPT_FALCON 0 #define EFSYS_OPT_FALCON_NIC_CFG_OVERRIDE 0 #define EFSYS_OPT_SIENA 1 #define EFSYS_OPT_HUNTINGTON 1 +#define EFSYS_OPT_MEDFORD 0 #ifdef DEBUG #define EFSYS_OPT_CHECK_REG 1 #else #define EFSYS_OPT_CHECK_REG 0 #endif #define EFSYS_OPT_MCDI 1 #define EFSYS_OPT_MCDI_LOGGING 0 #define EFSYS_OPT_MCDI_PROXY_AUTH 0 #define EFSYS_OPT_MAC_FALCON_GMAC 0 #define EFSYS_OPT_MAC_FALCON_XMAC 0 #define EFSYS_OPT_MAC_STATS 1 #define EFSYS_OPT_LOOPBACK 0 #define EFSYS_OPT_MON_NULL 0 #define EFSYS_OPT_MON_LM87 0 #define EFSYS_OPT_MON_MAX6647 0 #define EFSYS_OPT_MON_MCDI 0 #define EFSYS_OPT_MON_STATS 0 #define EFSYS_OPT_PHY_NULL 0 #define EFSYS_OPT_PHY_QT2022C2 0 #define EFSYS_OPT_PHY_SFX7101 0 #define EFSYS_OPT_PHY_TXC43128 0 #define EFSYS_OPT_PHY_SFT9001 0 #define EFSYS_OPT_PHY_QT2025C 0 #define EFSYS_OPT_PHY_STATS 1 #define EFSYS_OPT_PHY_PROPS 0 #define EFSYS_OPT_PHY_BIST 0 #define EFSYS_OPT_BIST 1 #define EFSYS_OPT_PHY_LED_CONTROL 1 #define EFSYS_OPT_PHY_FLAGS 0 #define EFSYS_OPT_VPD 1 #define EFSYS_OPT_NVRAM 1 #define EFSYS_OPT_NVRAM_FALCON_BOOTROM 0 #define EFSYS_OPT_NVRAM_SFT9001 0 #define EFSYS_OPT_NVRAM_SFX7101 0 #define EFSYS_OPT_BOOTCFG 0 #define EFSYS_OPT_PCIE_TUNE 0 #define EFSYS_OPT_DIAG 0 #define EFSYS_OPT_WOL 1 #define EFSYS_OPT_RX_SCALE 1 #define EFSYS_OPT_QSTATS 1 #define EFSYS_OPT_FILTER 1 #define EFSYS_OPT_MCAST_FILTER_LIST 1 #define EFSYS_OPT_RX_SCATTER 0 #define EFSYS_OPT_RX_HDR_SPLIT 0 #define EFSYS_OPT_EV_PREFETCH 0 #define EFSYS_OPT_DECODE_INTR_FATAL 1 /* ID */ typedef struct __efsys_identifier_s efsys_identifier_t; /* PROBE */ #ifndef DTRACE_PROBE #define EFSYS_PROBE(_name) #define EFSYS_PROBE1(_name, _type1, _arg1) #define EFSYS_PROBE2(_name, _type1, _arg1, _type2, _arg2) #define EFSYS_PROBE3(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3) #define EFSYS_PROBE4(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4) #define EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5) #define EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6) #define EFSYS_PROBE7(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6, _type7, _arg7) #else /* DTRACE_PROBE */ #define EFSYS_PROBE(_name) \ DTRACE_PROBE(_name) #define EFSYS_PROBE1(_name, _type1, _arg1) \ DTRACE_PROBE1(_name, _type1, _arg1) #define EFSYS_PROBE2(_name, _type1, _arg1, _type2, _arg2) \ DTRACE_PROBE2(_name, _type1, _arg1, _type2, _arg2) #define EFSYS_PROBE3(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3) \ DTRACE_PROBE3(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3) #define EFSYS_PROBE4(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4) \ DTRACE_PROBE4(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4) #ifdef DTRACE_PROBE5 #define EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5) \ DTRACE_PROBE5(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5) #else #define EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5) \ DTRACE_PROBE4(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4) #endif #ifdef DTRACE_PROBE6 #define EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6) \ DTRACE_PROBE6(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6) #else #define EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6) \ EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5) #endif #ifdef DTRACE_PROBE7 #define EFSYS_PROBE7(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6, _type7, _arg7) \ DTRACE_PROBE7(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6, _type7, _arg7) #else #define EFSYS_PROBE7(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6, _type7, _arg7) \ EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2, \ _type3, _arg3, _type4, _arg4, _type5, _arg5, \ _type6, _arg6) #endif #endif /* DTRACE_PROBE */ /* DMA */ typedef uint64_t efsys_dma_addr_t; typedef struct efsys_mem_s { bus_dma_tag_t esm_tag; bus_dmamap_t esm_map; caddr_t esm_base; efsys_dma_addr_t esm_addr; } efsys_mem_t; #define EFSYS_MEM_ZERO(_esmp, _size) \ do { \ (void) memset((_esmp)->esm_base, 0, (_size)); \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_MEM_READD(_esmp, _offset, _edp) \ do { \ uint32_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)), \ ("not power of 2 aligned")); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ (_edp)->ed_u32[0] = *addr; \ \ EFSYS_PROBE2(mem_readd, unsigned int, (_offset), \ uint32_t, (_edp)->ed_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #if defined(__x86_64__) #define EFSYS_MEM_READQ(_esmp, _offset, _eqp) \ do { \ uint64_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ (_eqp)->eq_u64[0] = *addr; \ \ EFSYS_PROBE3(mem_readq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #else #define EFSYS_MEM_READQ(_esmp, _offset, _eqp) \ do { \ uint32_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ (_eqp)->eq_u32[0] = *addr++; \ (_eqp)->eq_u32[1] = *addr; \ \ EFSYS_PROBE3(mem_readq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif #if defined(__x86_64__) #define EFSYS_MEM_READO(_esmp, _offset, _eop) \ do { \ uint64_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ (_eop)->eo_u64[0] = *addr++; \ (_eop)->eo_u64[1] = *addr; \ \ EFSYS_PROBE5(mem_reado, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #else #define EFSYS_MEM_READO(_esmp, _offset, _eop) \ do { \ uint32_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ (_eop)->eo_u32[0] = *addr++; \ (_eop)->eo_u32[1] = *addr++; \ (_eop)->eo_u32[2] = *addr++; \ (_eop)->eo_u32[3] = *addr; \ \ EFSYS_PROBE5(mem_reado, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif #define EFSYS_MEM_WRITED(_esmp, _offset, _edp) \ do { \ uint32_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)), \ ("not power of 2 aligned")); \ \ EFSYS_PROBE2(mem_writed, unsigned int, (_offset), \ uint32_t, (_edp)->ed_u32[0]); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ *addr = (_edp)->ed_u32[0]; \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #if defined(__x86_64__) #define EFSYS_MEM_WRITEQ(_esmp, _offset, _eqp) \ do { \ uint64_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ EFSYS_PROBE3(mem_writeq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ *addr = (_eqp)->eq_u64[0]; \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #else #define EFSYS_MEM_WRITEQ(_esmp, _offset, _eqp) \ do { \ uint32_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ EFSYS_PROBE3(mem_writeq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ *addr++ = (_eqp)->eq_u32[0]; \ *addr = (_eqp)->eq_u32[1]; \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif #if defined(__x86_64__) #define EFSYS_MEM_WRITEO(_esmp, _offset, _eop) \ do { \ uint64_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ EFSYS_PROBE5(mem_writeo, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ *addr++ = (_eop)->eo_u64[0]; \ *addr = (_eop)->eo_u64[1]; \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #else #define EFSYS_MEM_WRITEO(_esmp, _offset, _eop) \ do { \ uint32_t *addr; \ \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ EFSYS_PROBE5(mem_writeo, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ addr = (void *)((_esmp)->esm_base + (_offset)); \ \ *addr++ = (_eop)->eo_u32[0]; \ *addr++ = (_eop)->eo_u32[1]; \ *addr++ = (_eop)->eo_u32[2]; \ *addr = (_eop)->eo_u32[3]; \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif #define EFSYS_MEM_ADDR(_esmp) \ ((_esmp)->esm_addr) #define EFSYS_MEM_IS_NULL(_esmp) \ ((_esmp)->esm_base == NULL) /* BAR */ #define SFXGE_LOCK_NAME_MAX 16 typedef struct efsys_bar_s { struct mtx esb_lock; char esb_lock_name[SFXGE_LOCK_NAME_MAX]; bus_space_tag_t esb_tag; bus_space_handle_t esb_handle; int esb_rid; struct resource *esb_res; } efsys_bar_t; #define SFXGE_BAR_LOCK_INIT(_esbp, _ifname) \ do { \ snprintf((_esbp)->esb_lock_name, \ sizeof((_esbp)->esb_lock_name), \ "%s:bar", (_ifname)); \ mtx_init(&(_esbp)->esb_lock, (_esbp)->esb_lock_name, \ NULL, MTX_DEF); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define SFXGE_BAR_LOCK_DESTROY(_esbp) \ mtx_destroy(&(_esbp)->esb_lock) #define SFXGE_BAR_LOCK(_esbp) \ mtx_lock(&(_esbp)->esb_lock) #define SFXGE_BAR_UNLOCK(_esbp) \ mtx_unlock(&(_esbp)->esb_lock) #define EFSYS_BAR_READD(_esbp, _offset, _edp, _lock) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)), \ ("not power of 2 aligned")); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_LOCK(_esbp); \ \ (_edp)->ed_u32[0] = bus_space_read_stream_4( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset)); \ \ EFSYS_PROBE2(bar_readd, unsigned int, (_offset), \ uint32_t, (_edp)->ed_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #if defined(SFXGE_USE_BUS_SPACE_8) #define EFSYS_BAR_READQ(_esbp, _offset, _eqp) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ SFXGE_BAR_LOCK(_esbp); \ \ (_eqp)->eq_u64[0] = bus_space_read_stream_8( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset)); \ \ EFSYS_PROBE3(bar_readq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_BAR_READO(_esbp, _offset, _eop, _lock) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_LOCK(_esbp); \ \ (_eop)->eo_u64[0] = bus_space_read_stream_8( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset)); \ (_eop)->eo_u64[1] = bus_space_read_stream_8( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset) + 8); \ \ EFSYS_PROBE5(bar_reado, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #else #define EFSYS_BAR_READQ(_esbp, _offset, _eqp) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ SFXGE_BAR_LOCK(_esbp); \ \ (_eqp)->eq_u32[0] = bus_space_read_stream_4( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset)); \ (_eqp)->eq_u32[1] = bus_space_read_stream_4( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset) + 4); \ \ EFSYS_PROBE3(bar_readq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_BAR_READO(_esbp, _offset, _eop, _lock) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_LOCK(_esbp); \ \ (_eop)->eo_u32[0] = bus_space_read_stream_4( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset)); \ (_eop)->eo_u32[1] = bus_space_read_stream_4( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset) + 4); \ (_eop)->eo_u32[2] = bus_space_read_stream_4( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset) + 8); \ (_eop)->eo_u32[3] = bus_space_read_stream_4( \ (_esbp)->esb_tag, (_esbp)->esb_handle, \ (_offset) + 12); \ \ EFSYS_PROBE5(bar_reado, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif #define EFSYS_BAR_WRITED(_esbp, _offset, _edp, _lock) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)), \ ("not power of 2 aligned")); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_LOCK(_esbp); \ \ EFSYS_PROBE2(bar_writed, unsigned int, (_offset), \ uint32_t, (_edp)->ed_u32[0]); \ \ /* \ * Make sure that previous writes to the dword have \ * been done. It should be cheaper than barrier just \ * after the write below. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_dword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_4((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset), (_edp)->ed_u32[0]); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #if defined(SFXGE_USE_BUS_SPACE_8) #define EFSYS_BAR_WRITEQ(_esbp, _offset, _eqp) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ SFXGE_BAR_LOCK(_esbp); \ \ EFSYS_PROBE3(bar_writeq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ /* \ * Make sure that previous writes to the qword have \ * been done. It should be cheaper than barrier just \ * after the write below. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_qword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_8((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset), (_eqp)->eq_u64[0]); \ \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #else #define EFSYS_BAR_WRITEQ(_esbp, _offset, _eqp) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ SFXGE_BAR_LOCK(_esbp); \ \ EFSYS_PROBE3(bar_writeq, unsigned int, (_offset), \ uint32_t, (_eqp)->eq_u32[1], \ uint32_t, (_eqp)->eq_u32[0]); \ \ /* \ * Make sure that previous writes to the qword have \ * been done. It should be cheaper than barrier just \ * after the last write below. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_qword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_4((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset), (_eqp)->eq_u32[0]); \ /* \ * It should be guaranteed that the last dword comes \ * the last, so barrier entire qword to be sure that \ * neither above nor below writes are reordered. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_qword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_4((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset) + 4, (_eqp)->eq_u32[1]); \ \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif /* * Guarantees 64bit aligned 64bit writes to write combined BAR mapping * (required by PIO hardware) */ #define EFSYS_BAR_WC_WRITEQ(_esbp, _offset, _eqp) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)), \ ("not power of 2 aligned")); \ \ (void) (_esbp); \ \ /* FIXME: Perform a 64-bit write */ \ KASSERT(0, ("not implemented")); \ \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #if defined(SFXGE_USE_BUS_SPACE_8) #define EFSYS_BAR_WRITEO(_esbp, _offset, _eop, _lock) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_LOCK(_esbp); \ \ EFSYS_PROBE5(bar_writeo, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ /* \ * Make sure that previous writes to the oword have \ * been done. It should be cheaper than barrier just \ * after the last write below. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_oword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_8((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset), (_eop)->eo_u64[0]); \ /* \ * It should be guaranteed that the last qword comes \ * the last, so barrier entire oword to be sure that \ * neither above nor below writes are reordered. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_oword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_8((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset) + 8, (_eop)->eo_u64[1]); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #else #define EFSYS_BAR_WRITEO(_esbp, _offset, _eop, _lock) \ do { \ _NOTE(CONSTANTCONDITION) \ KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)), \ ("not power of 2 aligned")); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_LOCK(_esbp); \ \ EFSYS_PROBE5(bar_writeo, unsigned int, (_offset), \ uint32_t, (_eop)->eo_u32[3], \ uint32_t, (_eop)->eo_u32[2], \ uint32_t, (_eop)->eo_u32[1], \ uint32_t, (_eop)->eo_u32[0]); \ \ /* \ * Make sure that previous writes to the oword have \ * been done. It should be cheaper than barrier just \ * after the last write below. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_oword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_4((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset), (_eop)->eo_u32[0]); \ bus_space_write_stream_4((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset) + 4, (_eop)->eo_u32[1]); \ bus_space_write_stream_4((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset) + 8, (_eop)->eo_u32[2]); \ /* \ * It should be guaranteed that the last dword comes \ * the last, so barrier entire oword to be sure that \ * neither above nor below writes are reordered. \ */ \ bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\ (_offset), sizeof (efx_oword_t), \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_stream_4((_esbp)->esb_tag, \ (_esbp)->esb_handle, \ (_offset) + 12, (_eop)->eo_u32[3]); \ \ _NOTE(CONSTANTCONDITION) \ if (_lock) \ SFXGE_BAR_UNLOCK(_esbp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif /* Use the standard octo-word write for doorbell writes */ #define EFSYS_BAR_DOORBELL_WRITEO(_esbp, _offset, _eop) \ do { \ EFSYS_BAR_WRITEO((_esbp), (_offset), (_eop), B_FALSE); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) /* SPIN */ #define EFSYS_SPIN(_us) \ do { \ DELAY(_us); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_SLEEP EFSYS_SPIN /* BARRIERS */ #define EFSYS_MEM_READ_BARRIER() rmb() #define EFSYS_PIO_WRITE_BARRIER() /* DMA SYNC */ #define EFSYS_DMA_SYNC_FOR_KERNEL(_esmp, _offset, _size) \ do { \ bus_dmamap_sync((_esmp)->esm_tag, \ (_esmp)->esm_map, \ BUS_DMASYNC_POSTREAD); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_DMA_SYNC_FOR_DEVICE(_esmp, _offset, _size) \ do { \ bus_dmamap_sync((_esmp)->esm_tag, \ (_esmp)->esm_map, \ BUS_DMASYNC_PREWRITE); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) /* TIMESTAMP */ typedef clock_t efsys_timestamp_t; #define EFSYS_TIMESTAMP(_usp) \ do { \ clock_t now; \ \ now = ticks; \ *(_usp) = now * hz / 1000000; \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) /* KMEM */ #define EFSYS_KMEM_ALLOC(_esip, _size, _p) \ do { \ (_esip) = (_esip); \ /* \ * The macro is used in non-sleepable contexts, for \ * example, holding a mutex. \ */ \ (_p) = malloc((_size), M_SFXGE, M_NOWAIT|M_ZERO); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_KMEM_FREE(_esip, _size, _p) \ do { \ (void) (_esip); \ (void) (_size); \ free((_p), M_SFXGE); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) /* LOCK */ typedef struct efsys_lock_s { struct mtx lock; char lock_name[SFXGE_LOCK_NAME_MAX]; } efsys_lock_t; #define SFXGE_EFSYS_LOCK_INIT(_eslp, _ifname, _label) \ do { \ efsys_lock_t *__eslp = (_eslp); \ \ snprintf((__eslp)->lock_name, \ sizeof((__eslp)->lock_name), \ "%s:%s", (_ifname), (_label)); \ mtx_init(&(__eslp)->lock, (__eslp)->lock_name, \ NULL, MTX_DEF); \ } while (B_FALSE) #define SFXGE_EFSYS_LOCK_DESTROY(_eslp) \ mtx_destroy(&(_eslp)->lock) #define SFXGE_EFSYS_LOCK(_eslp) \ mtx_lock(&(_eslp)->lock) #define SFXGE_EFSYS_UNLOCK(_eslp) \ mtx_unlock(&(_eslp)->lock) #define SFXGE_EFSYS_LOCK_ASSERT_OWNED(_eslp) \ mtx_assert(&(_eslp)->lock, MA_OWNED) #define EFSYS_LOCK_MAGIC 0x000010c4 #define EFSYS_LOCK(_lockp, _state) \ do { \ SFXGE_EFSYS_LOCK(_lockp); \ (_state) = EFSYS_LOCK_MAGIC; \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_UNLOCK(_lockp, _state) \ do { \ if ((_state) != EFSYS_LOCK_MAGIC) \ KASSERT(B_FALSE, ("not locked")); \ SFXGE_EFSYS_UNLOCK(_lockp); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) /* PREEMPT */ #define EFSYS_PREEMPT_DISABLE(_state) \ do { \ (_state) = (_state); \ critical_enter(); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_PREEMPT_ENABLE(_state) \ do { \ (_state) = (_state); \ critical_exit(_state); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) /* STAT */ typedef uint64_t efsys_stat_t; #define EFSYS_STAT_INCR(_knp, _delta) \ do { \ *(_knp) += (_delta); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_STAT_DECR(_knp, _delta) \ do { \ *(_knp) -= (_delta); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_STAT_SET(_knp, _val) \ do { \ *(_knp) = (_val); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_STAT_SET_QWORD(_knp, _valp) \ do { \ *(_knp) = le64toh((_valp)->eq_u64[0]); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_STAT_SET_DWORD(_knp, _valp) \ do { \ *(_knp) = le32toh((_valp)->ed_u32[0]); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_STAT_INCR_QWORD(_knp, _valp) \ do { \ *(_knp) += le64toh((_valp)->eq_u64[0]); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #define EFSYS_STAT_SUBR_QWORD(_knp, _valp) \ do { \ *(_knp) -= le64toh((_valp)->eq_u64[0]); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) /* ERR */ extern void sfxge_err(efsys_identifier_t *, unsigned int, uint32_t, uint32_t); #if EFSYS_OPT_DECODE_INTR_FATAL #define EFSYS_ERR(_esip, _code, _dword0, _dword1) \ do { \ sfxge_err((_esip), (_code), (_dword0), (_dword1)); \ _NOTE(CONSTANTCONDITION) \ } while (B_FALSE) #endif /* ASSERT */ #define EFSYS_ASSERT(_exp) do { \ if (!(_exp)) \ panic("%s", #_exp); \ } while (0) #define EFSYS_ASSERT3(_x, _op, _y, _t) do { \ const _t __x = (_t)(_x); \ const _t __y = (_t)(_y); \ if (!(__x _op __y)) \ panic("assertion failed at %s:%u", __FILE__, __LINE__); \ } while(0) #define EFSYS_ASSERT3U(_x, _op, _y) EFSYS_ASSERT3(_x, _op, _y, uint64_t) #define EFSYS_ASSERT3S(_x, _op, _y) EFSYS_ASSERT3(_x, _op, _y, int64_t) #define EFSYS_ASSERT3P(_x, _op, _y) EFSYS_ASSERT3(_x, _op, _y, uintptr_t) /* ROTATE */ #define EFSYS_HAS_ROTL_DWORD 0 #ifdef __cplusplus } #endif #endif /* _SYS_EFSYS_H */ Index: projects/clang380-import/sys/dev/ti/if_ti.c =================================================================== --- projects/clang380-import/sys/dev/ti/if_ti.c (revision 293686) +++ projects/clang380-import/sys/dev/ti/if_ti.c (revision 293687) @@ -1,4057 +1,4057 @@ /*- * Copyright (c) 1997, 1998, 1999 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Alteon Networks Tigon PCI gigabit ethernet driver for FreeBSD. * Manuals, sample driver and firmware source kits are available * from http://www.alteon.com/support/openkits. * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * The Alteon Networks Tigon chip contains an embedded R4000 CPU, * gigabit MAC, dual DMA channels and a PCI interface unit. NICs * using the Tigon may have anywhere from 512K to 2MB of SRAM. The * Tigon supports hardware IP, TCP and UCP checksumming, multicast * filtering and jumbo (9014 byte) frames. The hardware is largely * controlled by firmware, which must be loaded into the NIC during * initialization. * * The Tigon 2 contains 2 R4000 CPUs and requires a newer firmware * revision, which supports new features such as extended commands, * extended jumbo receive ring desciptors and a mini receive ring. * * Alteon Networks is to be commended for releasing such a vast amount * of development material for the Tigon NIC without requiring an NDA * (although they really should have done it a long time ago). With * any luck, the other vendors will finally wise up and follow Alteon's * stellar example. * * The firmware for the Tigon 1 and 2 NICs is compiled directly into * this driver by #including it as a C header file. This bloats the * driver somewhat, but it's the easiest method considering that the * driver code and firmware code need to be kept in sync. The source * for the firmware is not provided with the FreeBSD distribution since * compiling it requires a GNU toolchain targeted for mips-sgi-irix5.3. * * The following people deserve special thanks: * - Terry Murphy of 3Com, for providing a 3c985 Tigon 1 board * for testing * - Raymond Lee of Netgear, for providing a pair of Netgear * GA620 Tigon 2 boards for testing * - Ulf Zimmermann, for bringing the GA260 to my attention and * convincing me to write this driver. * - Andrew Gallatin for providing FreeBSD/Alpha support. */ #include __FBSDID("$FreeBSD$"); #include "opt_ti.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TI_SF_BUF_JUMBO #include #include #endif #include #include #include #include #include #include #include #define TI_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) /* * We can only turn on header splitting if we're using extended receive * BDs. */ #if defined(TI_JUMBO_HDRSPLIT) && !defined(TI_SF_BUF_JUMBO) #error "options TI_JUMBO_HDRSPLIT requires TI_SF_BUF_JUMBO" #endif /* TI_JUMBO_HDRSPLIT && !TI_SF_BUF_JUMBO */ typedef enum { TI_SWAP_HTON, TI_SWAP_NTOH } ti_swap_type; /* * Various supported device vendors/types and their names. */ static const struct ti_type ti_devs[] = { { ALT_VENDORID, ALT_DEVICEID_ACENIC, "Alteon AceNIC 1000baseSX Gigabit Ethernet" }, { ALT_VENDORID, ALT_DEVICEID_ACENIC_COPPER, "Alteon AceNIC 1000baseT Gigabit Ethernet" }, { TC_VENDORID, TC_DEVICEID_3C985, "3Com 3c985-SX Gigabit Ethernet" }, { NG_VENDORID, NG_DEVICEID_GA620, "Netgear GA620 1000baseSX Gigabit Ethernet" }, { NG_VENDORID, NG_DEVICEID_GA620T, "Netgear GA620 1000baseT Gigabit Ethernet" }, { SGI_VENDORID, SGI_DEVICEID_TIGON, "Silicon Graphics Gigabit Ethernet" }, { DEC_VENDORID, DEC_DEVICEID_FARALLON_PN9000SX, "Farallon PN9000SX Gigabit Ethernet" }, { 0, 0, NULL } }; static d_open_t ti_open; static d_close_t ti_close; static d_ioctl_t ti_ioctl2; static struct cdevsw ti_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = ti_open, .d_close = ti_close, .d_ioctl = ti_ioctl2, .d_name = "ti", }; static int ti_probe(device_t); static int ti_attach(device_t); static int ti_detach(device_t); static void ti_txeof(struct ti_softc *); static void ti_rxeof(struct ti_softc *); static int ti_encap(struct ti_softc *, struct mbuf **); static void ti_intr(void *); static void ti_start(struct ifnet *); static void ti_start_locked(struct ifnet *); static int ti_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t ti_get_counter(struct ifnet *, ift_counter); static void ti_init(void *); static void ti_init_locked(void *); static void ti_init2(struct ti_softc *); static void ti_stop(struct ti_softc *); static void ti_watchdog(void *); static int ti_shutdown(device_t); static int ti_ifmedia_upd(struct ifnet *); static int ti_ifmedia_upd_locked(struct ti_softc *); static void ti_ifmedia_sts(struct ifnet *, struct ifmediareq *); static uint32_t ti_eeprom_putbyte(struct ti_softc *, int); static uint8_t ti_eeprom_getbyte(struct ti_softc *, int, uint8_t *); static int ti_read_eeprom(struct ti_softc *, caddr_t, int, int); static void ti_add_mcast(struct ti_softc *, struct ether_addr *); static void ti_del_mcast(struct ti_softc *, struct ether_addr *); static void ti_setmulti(struct ti_softc *); static void ti_mem_read(struct ti_softc *, uint32_t, uint32_t, void *); static void ti_mem_write(struct ti_softc *, uint32_t, uint32_t, void *); static void ti_mem_zero(struct ti_softc *, uint32_t, uint32_t); static int ti_copy_mem(struct ti_softc *, uint32_t, uint32_t, caddr_t, int, int); static int ti_copy_scratch(struct ti_softc *, uint32_t, uint32_t, caddr_t, int, int, int); static int ti_bcopy_swap(const void *, void *, size_t, ti_swap_type); static void ti_loadfw(struct ti_softc *); static void ti_cmd(struct ti_softc *, struct ti_cmd_desc *); static void ti_cmd_ext(struct ti_softc *, struct ti_cmd_desc *, caddr_t, int); static void ti_handle_events(struct ti_softc *); static void ti_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int ti_dma_alloc(struct ti_softc *); static void ti_dma_free(struct ti_softc *); static int ti_dma_ring_alloc(struct ti_softc *, bus_size_t, bus_size_t, bus_dma_tag_t *, uint8_t **, bus_dmamap_t *, bus_addr_t *, const char *); static void ti_dma_ring_free(struct ti_softc *, bus_dma_tag_t *, uint8_t **, bus_dmamap_t, bus_addr_t *); static int ti_newbuf_std(struct ti_softc *, int); static int ti_newbuf_mini(struct ti_softc *, int); static int ti_newbuf_jumbo(struct ti_softc *, int, struct mbuf *); static int ti_init_rx_ring_std(struct ti_softc *); static void ti_free_rx_ring_std(struct ti_softc *); static int ti_init_rx_ring_jumbo(struct ti_softc *); static void ti_free_rx_ring_jumbo(struct ti_softc *); static int ti_init_rx_ring_mini(struct ti_softc *); static void ti_free_rx_ring_mini(struct ti_softc *); static void ti_free_tx_ring(struct ti_softc *); static int ti_init_tx_ring(struct ti_softc *); static void ti_discard_std(struct ti_softc *, int); #ifndef TI_SF_BUF_JUMBO static void ti_discard_jumbo(struct ti_softc *, int); #endif static void ti_discard_mini(struct ti_softc *, int); static int ti_64bitslot_war(struct ti_softc *); static int ti_chipinit(struct ti_softc *); static int ti_gibinit(struct ti_softc *); #ifdef TI_JUMBO_HDRSPLIT static __inline void ti_hdr_split(struct mbuf *top, int hdr_len, int pkt_len, int idx); #endif /* TI_JUMBO_HDRSPLIT */ static void ti_sysctl_node(struct ti_softc *); static device_method_t ti_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ti_probe), DEVMETHOD(device_attach, ti_attach), DEVMETHOD(device_detach, ti_detach), DEVMETHOD(device_shutdown, ti_shutdown), { 0, 0 } }; static driver_t ti_driver = { "ti", ti_methods, sizeof(struct ti_softc) }; static devclass_t ti_devclass; DRIVER_MODULE(ti, pci, ti_driver, ti_devclass, 0, 0); MODULE_DEPEND(ti, pci, 1, 1, 1); MODULE_DEPEND(ti, ether, 1, 1, 1); /* * Send an instruction or address to the EEPROM, check for ACK. */ static uint32_t ti_eeprom_putbyte(struct ti_softc *sc, int byte) { int i, ack = 0; /* * Make sure we're in TX mode. */ TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN); /* * Feed in each bit and stobe the clock. */ for (i = 0x80; i; i >>= 1) { if (byte & i) { TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_DOUT); } else { TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_DOUT); } DELAY(1); TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); DELAY(1); TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); } /* * Turn off TX mode. */ TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN); /* * Check for ack. */ TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); ack = CSR_READ_4(sc, TI_MISC_LOCAL_CTL) & TI_MLC_EE_DIN; TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); return (ack); } /* * Read a byte of data stored in the EEPROM at address 'addr.' * We have to send two address bytes since the EEPROM can hold * more than 256 bytes of data. */ static uint8_t ti_eeprom_getbyte(struct ti_softc *sc, int addr, uint8_t *dest) { int i; uint8_t byte = 0; EEPROM_START; /* * Send write control code to EEPROM. */ if (ti_eeprom_putbyte(sc, EEPROM_CTL_WRITE)) { device_printf(sc->ti_dev, "failed to send write command, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } /* * Send first byte of address of byte we want to read. */ if (ti_eeprom_putbyte(sc, (addr >> 8) & 0xFF)) { device_printf(sc->ti_dev, "failed to send address, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } /* * Send second byte address of byte we want to read. */ if (ti_eeprom_putbyte(sc, addr & 0xFF)) { device_printf(sc->ti_dev, "failed to send address, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } EEPROM_STOP; EEPROM_START; /* * Send read control code to EEPROM. */ if (ti_eeprom_putbyte(sc, EEPROM_CTL_READ)) { device_printf(sc->ti_dev, "failed to send read command, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } /* * Start reading bits from EEPROM. */ TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN); for (i = 0x80; i; i >>= 1) { TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); DELAY(1); if (CSR_READ_4(sc, TI_MISC_LOCAL_CTL) & TI_MLC_EE_DIN) byte |= i; TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); DELAY(1); } EEPROM_STOP; /* * No ACK generated for read, so just return byte. */ *dest = byte; return (0); } /* * Read a sequence of bytes from the EEPROM. */ static int ti_read_eeprom(struct ti_softc *sc, caddr_t dest, int off, int cnt) { int err = 0, i; uint8_t byte = 0; for (i = 0; i < cnt; i++) { err = ti_eeprom_getbyte(sc, off + i, &byte); if (err) break; *(dest + i) = byte; } return (err ? 1 : 0); } /* * NIC memory read function. * Can be used to copy data from NIC local memory. */ static void ti_mem_read(struct ti_softc *sc, uint32_t addr, uint32_t len, void *buf) { int segptr, segsize, cnt; char *ptr; segptr = addr; cnt = len; ptr = buf; while (cnt) { if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1))); bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, TI_WINDOW + (segptr & (TI_WINLEN - 1)), (uint32_t *)ptr, segsize / 4); ptr += segsize; segptr += segsize; cnt -= segsize; } } /* * NIC memory write function. * Can be used to copy data into NIC local memory. */ static void ti_mem_write(struct ti_softc *sc, uint32_t addr, uint32_t len, void *buf) { int segptr, segsize, cnt; char *ptr; segptr = addr; cnt = len; ptr = buf; while (cnt) { if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1))); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, TI_WINDOW + (segptr & (TI_WINLEN - 1)), (uint32_t *)ptr, segsize / 4); ptr += segsize; segptr += segsize; cnt -= segsize; } } /* * NIC memory read function. * Can be used to clear a section of NIC local memory. */ static void ti_mem_zero(struct ti_softc *sc, uint32_t addr, uint32_t len) { int segptr, segsize, cnt; segptr = addr; cnt = len; while (cnt) { if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1))); bus_space_set_region_4(sc->ti_btag, sc->ti_bhandle, TI_WINDOW + (segptr & (TI_WINLEN - 1)), 0, segsize / 4); segptr += segsize; cnt -= segsize; } } static int ti_copy_mem(struct ti_softc *sc, uint32_t tigon_addr, uint32_t len, caddr_t buf, int useraddr, int readdata) { int segptr, segsize, cnt; caddr_t ptr; uint32_t origwin; int resid, segresid; int first_pass; TI_LOCK_ASSERT(sc); /* * At the moment, we don't handle non-aligned cases, we just bail. * If this proves to be a problem, it will be fixed. */ if (readdata == 0 && (tigon_addr & 0x3) != 0) { device_printf(sc->ti_dev, "%s: tigon address %#x isn't " "word-aligned\n", __func__, tigon_addr); device_printf(sc->ti_dev, "%s: unaligned writes aren't " "yet supported\n", __func__); return (EINVAL); } segptr = tigon_addr & ~0x3; segresid = tigon_addr - segptr; /* * This is the non-aligned amount left over that we'll need to * copy. */ resid = len & 0x3; /* Add in the left over amount at the front of the buffer */ resid += segresid; cnt = len & ~0x3; /* * If resid + segresid is >= 4, add multiples of 4 to the count and * decrease the residual by that much. */ cnt += resid & ~0x3; resid -= resid & ~0x3; ptr = buf; first_pass = 1; /* * Save the old window base value. */ origwin = CSR_READ_4(sc, TI_WINBASE); while (cnt) { bus_size_t ti_offset; if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1))); ti_offset = TI_WINDOW + (segptr & (TI_WINLEN -1)); if (readdata) { bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, (uint32_t *)sc->ti_membuf, segsize >> 2); if (useraddr) { /* * Yeah, this is a little on the kludgy * side, but at least this code is only * used for debugging. */ ti_bcopy_swap(sc->ti_membuf, sc->ti_membuf2, segsize, TI_SWAP_NTOH); TI_UNLOCK(sc); if (first_pass) { copyout(&sc->ti_membuf2[segresid], ptr, segsize - segresid); first_pass = 0; } else copyout(sc->ti_membuf2, ptr, segsize); TI_LOCK(sc); } else { if (first_pass) { ti_bcopy_swap(sc->ti_membuf, sc->ti_membuf2, segsize, TI_SWAP_NTOH); TI_UNLOCK(sc); bcopy(&sc->ti_membuf2[segresid], ptr, segsize - segresid); TI_LOCK(sc); first_pass = 0; } else ti_bcopy_swap(sc->ti_membuf, ptr, segsize, TI_SWAP_NTOH); } } else { if (useraddr) { TI_UNLOCK(sc); copyin(ptr, sc->ti_membuf2, segsize); TI_LOCK(sc); ti_bcopy_swap(sc->ti_membuf2, sc->ti_membuf, segsize, TI_SWAP_HTON); } else ti_bcopy_swap(ptr, sc->ti_membuf, segsize, TI_SWAP_HTON); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, (uint32_t *)sc->ti_membuf, segsize >> 2); } segptr += segsize; ptr += segsize; cnt -= segsize; } /* * Handle leftover, non-word-aligned bytes. */ if (resid != 0) { uint32_t tmpval, tmpval2; bus_size_t ti_offset; /* * Set the segment pointer. */ CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1))); ti_offset = TI_WINDOW + (segptr & (TI_WINLEN - 1)); /* * First, grab whatever is in our source/destination. * We'll obviously need this for reads, but also for * writes, since we'll be doing read/modify/write. */ bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, &tmpval, 1); /* * Next, translate this from little-endian to big-endian * (at least on i386 boxes). */ tmpval2 = ntohl(tmpval); if (readdata) { /* * If we're reading, just copy the leftover number * of bytes from the host byte order buffer to * the user's buffer. */ if (useraddr) { TI_UNLOCK(sc); copyout(&tmpval2, ptr, resid); TI_LOCK(sc); } else bcopy(&tmpval2, ptr, resid); } else { /* * If we're writing, first copy the bytes to be * written into the network byte order buffer, * leaving the rest of the buffer with whatever was * originally in there. Then, swap the bytes * around into host order and write them out. * * XXX KDM the read side of this has been verified * to work, but the write side of it has not been * verified. So user beware. */ if (useraddr) { TI_UNLOCK(sc); copyin(ptr, &tmpval2, resid); TI_LOCK(sc); } else bcopy(ptr, &tmpval2, resid); tmpval = htonl(tmpval2); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, &tmpval, 1); } } CSR_WRITE_4(sc, TI_WINBASE, origwin); return (0); } static int ti_copy_scratch(struct ti_softc *sc, uint32_t tigon_addr, uint32_t len, caddr_t buf, int useraddr, int readdata, int cpu) { uint32_t segptr; int cnt; uint32_t tmpval, tmpval2; caddr_t ptr; TI_LOCK_ASSERT(sc); /* * At the moment, we don't handle non-aligned cases, we just bail. * If this proves to be a problem, it will be fixed. */ if (tigon_addr & 0x3) { device_printf(sc->ti_dev, "%s: tigon address %#x " "isn't word-aligned\n", __func__, tigon_addr); return (EINVAL); } if (len & 0x3) { device_printf(sc->ti_dev, "%s: transfer length %d " "isn't word-aligned\n", __func__, len); return (EINVAL); } segptr = tigon_addr; cnt = len; ptr = buf; while (cnt) { CSR_WRITE_4(sc, CPU_REG(TI_SRAM_ADDR, cpu), segptr); if (readdata) { tmpval2 = CSR_READ_4(sc, CPU_REG(TI_SRAM_DATA, cpu)); tmpval = ntohl(tmpval2); /* * Note: I've used this debugging interface * extensively with Alteon's 12.3.15 firmware, * compiled with GCC 2.7.2.1 and binutils 2.9.1. * * When you compile the firmware without * optimization, which is necessary sometimes in * order to properly step through it, you sometimes * read out a bogus value of 0xc0017c instead of * whatever was supposed to be in that scratchpad * location. That value is on the stack somewhere, * but I've never been able to figure out what was * causing the problem. * * The address seems to pop up in random places, * often not in the same place on two subsequent * reads. * * In any case, the underlying data doesn't seem * to be affected, just the value read out. * * KDM, 3/7/2000 */ if (tmpval2 == 0xc0017c) device_printf(sc->ti_dev, "found 0xc0017c at " "%#x (tmpval2)\n", segptr); if (tmpval == 0xc0017c) device_printf(sc->ti_dev, "found 0xc0017c at " "%#x (tmpval)\n", segptr); if (useraddr) copyout(&tmpval, ptr, 4); else bcopy(&tmpval, ptr, 4); } else { if (useraddr) copyin(ptr, &tmpval2, 4); else bcopy(ptr, &tmpval2, 4); tmpval = htonl(tmpval2); CSR_WRITE_4(sc, CPU_REG(TI_SRAM_DATA, cpu), tmpval); } cnt -= 4; segptr += 4; ptr += 4; } return (0); } static int ti_bcopy_swap(const void *src, void *dst, size_t len, ti_swap_type swap_type) { const uint8_t *tmpsrc; uint8_t *tmpdst; size_t tmplen; if (len & 0x3) { printf("ti_bcopy_swap: length %zd isn't 32-bit aligned\n", len); return (-1); } tmpsrc = src; tmpdst = dst; tmplen = len; while (tmplen) { if (swap_type == TI_SWAP_NTOH) *(uint32_t *)tmpdst = ntohl(*(const uint32_t *)tmpsrc); else *(uint32_t *)tmpdst = htonl(*(const uint32_t *)tmpsrc); tmpsrc += 4; tmpdst += 4; tmplen -= 4; } return (0); } /* * Load firmware image into the NIC. Check that the firmware revision * is acceptable and see if we want the firmware for the Tigon 1 or * Tigon 2. */ static void ti_loadfw(struct ti_softc *sc) { TI_LOCK_ASSERT(sc); switch (sc->ti_hwrev) { case TI_HWREV_TIGON: if (tigonFwReleaseMajor != TI_FIRMWARE_MAJOR || tigonFwReleaseMinor != TI_FIRMWARE_MINOR || tigonFwReleaseFix != TI_FIRMWARE_FIX) { device_printf(sc->ti_dev, "firmware revision mismatch; " "want %d.%d.%d, got %d.%d.%d\n", TI_FIRMWARE_MAJOR, TI_FIRMWARE_MINOR, TI_FIRMWARE_FIX, tigonFwReleaseMajor, tigonFwReleaseMinor, tigonFwReleaseFix); return; } ti_mem_write(sc, tigonFwTextAddr, tigonFwTextLen, tigonFwText); ti_mem_write(sc, tigonFwDataAddr, tigonFwDataLen, tigonFwData); ti_mem_write(sc, tigonFwRodataAddr, tigonFwRodataLen, tigonFwRodata); ti_mem_zero(sc, tigonFwBssAddr, tigonFwBssLen); ti_mem_zero(sc, tigonFwSbssAddr, tigonFwSbssLen); CSR_WRITE_4(sc, TI_CPU_PROGRAM_COUNTER, tigonFwStartAddr); break; case TI_HWREV_TIGON_II: if (tigon2FwReleaseMajor != TI_FIRMWARE_MAJOR || tigon2FwReleaseMinor != TI_FIRMWARE_MINOR || tigon2FwReleaseFix != TI_FIRMWARE_FIX) { device_printf(sc->ti_dev, "firmware revision mismatch; " "want %d.%d.%d, got %d.%d.%d\n", TI_FIRMWARE_MAJOR, TI_FIRMWARE_MINOR, TI_FIRMWARE_FIX, tigon2FwReleaseMajor, tigon2FwReleaseMinor, tigon2FwReleaseFix); return; } ti_mem_write(sc, tigon2FwTextAddr, tigon2FwTextLen, tigon2FwText); ti_mem_write(sc, tigon2FwDataAddr, tigon2FwDataLen, tigon2FwData); ti_mem_write(sc, tigon2FwRodataAddr, tigon2FwRodataLen, tigon2FwRodata); ti_mem_zero(sc, tigon2FwBssAddr, tigon2FwBssLen); ti_mem_zero(sc, tigon2FwSbssAddr, tigon2FwSbssLen); CSR_WRITE_4(sc, TI_CPU_PROGRAM_COUNTER, tigon2FwStartAddr); break; default: device_printf(sc->ti_dev, "can't load firmware: unknown hardware rev\n"); break; } } /* * Send the NIC a command via the command ring. */ static void ti_cmd(struct ti_softc *sc, struct ti_cmd_desc *cmd) { int index; index = sc->ti_cmd_saved_prodidx; CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(cmd)); TI_INC(index, TI_CMD_RING_CNT); CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, index); sc->ti_cmd_saved_prodidx = index; } /* * Send the NIC an extended command. The 'len' parameter specifies the * number of command slots to include after the initial command. */ static void ti_cmd_ext(struct ti_softc *sc, struct ti_cmd_desc *cmd, caddr_t arg, int len) { int index; int i; index = sc->ti_cmd_saved_prodidx; CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(cmd)); TI_INC(index, TI_CMD_RING_CNT); for (i = 0; i < len; i++) { CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(&arg[i * 4])); TI_INC(index, TI_CMD_RING_CNT); } CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, index); sc->ti_cmd_saved_prodidx = index; } /* * Handle events that have triggered interrupts. */ static void ti_handle_events(struct ti_softc *sc) { struct ti_event_desc *e; if (sc->ti_rdata.ti_event_ring == NULL) return; bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag, sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_POSTREAD); while (sc->ti_ev_saved_considx != sc->ti_ev_prodidx.ti_idx) { e = &sc->ti_rdata.ti_event_ring[sc->ti_ev_saved_considx]; switch (TI_EVENT_EVENT(e)) { case TI_EV_LINKSTAT_CHANGED: sc->ti_linkstat = TI_EVENT_CODE(e); if (sc->ti_linkstat == TI_EV_CODE_LINK_UP) { if_link_state_change(sc->ti_ifp, LINK_STATE_UP); sc->ti_ifp->if_baudrate = IF_Mbps(100); if (bootverbose) device_printf(sc->ti_dev, "10/100 link up\n"); } else if (sc->ti_linkstat == TI_EV_CODE_GIG_LINK_UP) { if_link_state_change(sc->ti_ifp, LINK_STATE_UP); sc->ti_ifp->if_baudrate = IF_Gbps(1UL); if (bootverbose) device_printf(sc->ti_dev, "gigabit link up\n"); } else if (sc->ti_linkstat == TI_EV_CODE_LINK_DOWN) { if_link_state_change(sc->ti_ifp, LINK_STATE_DOWN); sc->ti_ifp->if_baudrate = 0; if (bootverbose) device_printf(sc->ti_dev, "link down\n"); } break; case TI_EV_ERROR: if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_INVAL_CMD) device_printf(sc->ti_dev, "invalid command\n"); else if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_UNIMP_CMD) device_printf(sc->ti_dev, "unknown command\n"); else if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_BADCFG) device_printf(sc->ti_dev, "bad config data\n"); break; case TI_EV_FIRMWARE_UP: ti_init2(sc); break; case TI_EV_STATS_UPDATED: case TI_EV_RESET_JUMBO_RING: case TI_EV_MCAST_UPDATED: /* Who cares. */ break; default: device_printf(sc->ti_dev, "unknown event: %d\n", TI_EVENT_EVENT(e)); break; } /* Advance the consumer index. */ TI_INC(sc->ti_ev_saved_considx, TI_EVENT_RING_CNT); CSR_WRITE_4(sc, TI_GCR_EVENTCONS_IDX, sc->ti_ev_saved_considx); } bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag, sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_PREREAD); } struct ti_dmamap_arg { bus_addr_t ti_busaddr; }; static void ti_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct ti_dmamap_arg *ctx; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); ctx = arg; ctx->ti_busaddr = segs->ds_addr; } static int ti_dma_ring_alloc(struct ti_softc *sc, bus_size_t alignment, bus_size_t maxsize, bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t *map, bus_addr_t *paddr, const char *msg) { struct ti_dmamap_arg ctx; int error; error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag); if (error != 0) { device_printf(sc->ti_dev, "could not create %s dma tag\n", msg); return (error); } /* Allocate DMA'able memory for ring. */ error = bus_dmamem_alloc(*tag, (void **)ring, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, map); if (error != 0) { device_printf(sc->ti_dev, "could not allocate DMA'able memory for %s\n", msg); return (error); } /* Load the address of the ring. */ ctx.ti_busaddr = 0; error = bus_dmamap_load(*tag, *map, *ring, maxsize, ti_dma_map_addr, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->ti_dev, "could not load DMA'able memory for %s\n", msg); return (error); } *paddr = ctx.ti_busaddr; return (0); } static void ti_dma_ring_free(struct ti_softc *sc, bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t map, bus_addr_t *paddr) { if (*paddr != 0) { bus_dmamap_unload(*tag, map); *paddr = 0; } if (*ring != NULL) { bus_dmamem_free(*tag, *ring, map); *ring = NULL; } if (*tag) { bus_dma_tag_destroy(*tag); *tag = NULL; } } static int ti_dma_alloc(struct ti_softc *sc) { bus_addr_t lowaddr; int i, error; lowaddr = BUS_SPACE_MAXADDR; if (sc->ti_dac == 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; error = bus_dma_tag_create(bus_get_dma_tag(sc->ti_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->ti_cdata.ti_parent_tag); if (error != 0) { device_printf(sc->ti_dev, "could not allocate parent dma tag\n"); return (ENOMEM); } error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, sizeof(struct ti_gib), &sc->ti_cdata.ti_gib_tag, (uint8_t **)&sc->ti_rdata.ti_info, &sc->ti_cdata.ti_gib_map, &sc->ti_rdata.ti_info_paddr, "GIB"); if (error) return (error); /* Producer/consumer status */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, sizeof(struct ti_status), &sc->ti_cdata.ti_status_tag, (uint8_t **)&sc->ti_rdata.ti_status, &sc->ti_cdata.ti_status_map, &sc->ti_rdata.ti_status_paddr, "event ring"); if (error) return (error); /* Event ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_EVENT_RING_SZ, &sc->ti_cdata.ti_event_ring_tag, (uint8_t **)&sc->ti_rdata.ti_event_ring, &sc->ti_cdata.ti_event_ring_map, &sc->ti_rdata.ti_event_ring_paddr, "event ring"); if (error) return (error); /* Command ring lives in shared memory so no need to create DMA area. */ /* Standard RX ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_STD_RX_RING_SZ, &sc->ti_cdata.ti_rx_std_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_std_ring, &sc->ti_cdata.ti_rx_std_ring_map, &sc->ti_rdata.ti_rx_std_ring_paddr, "RX ring"); if (error) return (error); /* Jumbo RX ring */ error = ti_dma_ring_alloc(sc, TI_JUMBO_RING_ALIGN, TI_JUMBO_RX_RING_SZ, &sc->ti_cdata.ti_rx_jumbo_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_jumbo_ring, &sc->ti_cdata.ti_rx_jumbo_ring_map, &sc->ti_rdata.ti_rx_jumbo_ring_paddr, "jumbo RX ring"); if (error) return (error); /* RX return ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_RX_RETURN_RING_SZ, &sc->ti_cdata.ti_rx_return_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_return_ring, &sc->ti_cdata.ti_rx_return_ring_map, &sc->ti_rdata.ti_rx_return_ring_paddr, "RX return ring"); if (error) return (error); /* Create DMA tag for standard RX mbufs. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->ti_cdata.ti_rx_std_tag); if (error) { device_printf(sc->ti_dev, "could not allocate RX dma tag\n"); return (error); } /* Create DMA tag for jumbo RX mbufs. */ #ifdef TI_SF_BUF_JUMBO /* * The VM system will take care of providing aligned pages. Alignment * is set to 1 here so that busdma resources won't be wasted. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, PAGE_SIZE * 4, 4, PAGE_SIZE, 0, NULL, NULL, &sc->ti_cdata.ti_rx_jumbo_tag); #else error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL, &sc->ti_cdata.ti_rx_jumbo_tag); #endif if (error) { device_printf(sc->ti_dev, "could not allocate jumbo RX dma tag\n"); return (error); } /* Create DMA tag for TX mbufs. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * TI_MAXTXSEGS, TI_MAXTXSEGS, MCLBYTES, 0, NULL, NULL, &sc->ti_cdata.ti_tx_tag); if (error) { device_printf(sc->ti_dev, "could not allocate TX dma tag\n"); return (ENOMEM); } /* Create DMA maps for RX buffers. */ for (i = 0; i < TI_STD_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_rx_std_tag, 0, &sc->ti_cdata.ti_rx_std_maps[i]); if (error) { device_printf(sc->ti_dev, "could not create DMA map for RX\n"); return (error); } } error = bus_dmamap_create(sc->ti_cdata.ti_rx_std_tag, 0, &sc->ti_cdata.ti_rx_std_sparemap); if (error) { device_printf(sc->ti_dev, "could not create spare DMA map for RX\n"); return (error); } /* Create DMA maps for jumbo RX buffers. */ for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_rx_jumbo_tag, 0, &sc->ti_cdata.ti_rx_jumbo_maps[i]); if (error) { device_printf(sc->ti_dev, "could not create DMA map for jumbo RX\n"); return (error); } } error = bus_dmamap_create(sc->ti_cdata.ti_rx_jumbo_tag, 0, &sc->ti_cdata.ti_rx_jumbo_sparemap); if (error) { device_printf(sc->ti_dev, "could not create spare DMA map for jumbo RX\n"); return (error); } /* Create DMA maps for TX buffers. */ for (i = 0; i < TI_TX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_tx_tag, 0, &sc->ti_cdata.ti_txdesc[i].tx_dmamap); if (error) { device_printf(sc->ti_dev, "could not create DMA map for TX\n"); return (ENOMEM); } } /* Mini ring and TX ring is not available on Tigon 1. */ if (sc->ti_hwrev == TI_HWREV_TIGON) return (0); /* TX ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_TX_RING_SZ, &sc->ti_cdata.ti_tx_ring_tag, (uint8_t **)&sc->ti_rdata.ti_tx_ring, &sc->ti_cdata.ti_tx_ring_map, &sc->ti_rdata.ti_tx_ring_paddr, "TX ring"); if (error) return (error); /* Mini RX ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_MINI_RX_RING_SZ, &sc->ti_cdata.ti_rx_mini_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_mini_ring, &sc->ti_cdata.ti_rx_mini_ring_map, &sc->ti_rdata.ti_rx_mini_ring_paddr, "mini RX ring"); if (error) return (error); /* Create DMA tag for mini RX mbufs. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MHLEN, 1, MHLEN, 0, NULL, NULL, &sc->ti_cdata.ti_rx_mini_tag); if (error) { device_printf(sc->ti_dev, "could not allocate mini RX dma tag\n"); return (error); } /* Create DMA maps for mini RX buffers. */ for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_rx_mini_tag, 0, &sc->ti_cdata.ti_rx_mini_maps[i]); if (error) { device_printf(sc->ti_dev, "could not create DMA map for mini RX\n"); return (error); } } error = bus_dmamap_create(sc->ti_cdata.ti_rx_mini_tag, 0, &sc->ti_cdata.ti_rx_mini_sparemap); if (error) { device_printf(sc->ti_dev, "could not create spare DMA map for mini RX\n"); return (error); } return (0); } static void ti_dma_free(struct ti_softc *sc) { int i; /* Destroy DMA maps for RX buffers. */ for (i = 0; i < TI_STD_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_std_maps[i]) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i]); sc->ti_cdata.ti_rx_std_maps[i] = NULL; } } if (sc->ti_cdata.ti_rx_std_sparemap) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_sparemap); sc->ti_cdata.ti_rx_std_sparemap = NULL; } if (sc->ti_cdata.ti_rx_std_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_rx_std_tag); sc->ti_cdata.ti_rx_std_tag = NULL; } /* Destroy DMA maps for jumbo RX buffers. */ for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_jumbo_maps[i]) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i]); sc->ti_cdata.ti_rx_jumbo_maps[i] = NULL; } } if (sc->ti_cdata.ti_rx_jumbo_sparemap) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_sparemap); sc->ti_cdata.ti_rx_jumbo_sparemap = NULL; } if (sc->ti_cdata.ti_rx_jumbo_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_rx_jumbo_tag); sc->ti_cdata.ti_rx_jumbo_tag = NULL; } /* Destroy DMA maps for mini RX buffers. */ for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_mini_maps[i]) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i]); sc->ti_cdata.ti_rx_mini_maps[i] = NULL; } } if (sc->ti_cdata.ti_rx_mini_sparemap) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_sparemap); sc->ti_cdata.ti_rx_mini_sparemap = NULL; } if (sc->ti_cdata.ti_rx_mini_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_rx_mini_tag); sc->ti_cdata.ti_rx_mini_tag = NULL; } /* Destroy DMA maps for TX buffers. */ for (i = 0; i < TI_TX_RING_CNT; i++) { if (sc->ti_cdata.ti_txdesc[i].tx_dmamap) { bus_dmamap_destroy(sc->ti_cdata.ti_tx_tag, sc->ti_cdata.ti_txdesc[i].tx_dmamap); sc->ti_cdata.ti_txdesc[i].tx_dmamap = NULL; } } if (sc->ti_cdata.ti_tx_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_tx_tag); sc->ti_cdata.ti_tx_tag = NULL; } /* Destroy standard RX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_std_ring_tag, (void *)&sc->ti_rdata.ti_rx_std_ring, sc->ti_cdata.ti_rx_std_ring_map, &sc->ti_rdata.ti_rx_std_ring_paddr); /* Destroy jumbo RX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_jumbo_ring_tag, (void *)&sc->ti_rdata.ti_rx_jumbo_ring, sc->ti_cdata.ti_rx_jumbo_ring_map, &sc->ti_rdata.ti_rx_jumbo_ring_paddr); /* Destroy mini RX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_mini_ring_tag, (void *)&sc->ti_rdata.ti_rx_mini_ring, sc->ti_cdata.ti_rx_mini_ring_map, &sc->ti_rdata.ti_rx_mini_ring_paddr); /* Destroy RX return ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_return_ring_tag, (void *)&sc->ti_rdata.ti_rx_return_ring, sc->ti_cdata.ti_rx_return_ring_map, &sc->ti_rdata.ti_rx_return_ring_paddr); /* Destroy TX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_tx_ring_tag, (void *)&sc->ti_rdata.ti_tx_ring, sc->ti_cdata.ti_tx_ring_map, &sc->ti_rdata.ti_tx_ring_paddr); /* Destroy status block. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_status_tag, (void *)&sc->ti_rdata.ti_status, sc->ti_cdata.ti_status_map, &sc->ti_rdata.ti_status_paddr); /* Destroy event ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_event_ring_tag, (void *)&sc->ti_rdata.ti_event_ring, sc->ti_cdata.ti_event_ring_map, &sc->ti_rdata.ti_event_ring_paddr); /* Destroy GIB */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_gib_tag, (void *)&sc->ti_rdata.ti_info, sc->ti_cdata.ti_gib_map, &sc->ti_rdata.ti_info_paddr); /* Destroy the parent tag. */ if (sc->ti_cdata.ti_parent_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_parent_tag); sc->ti_cdata.ti_parent_tag = NULL; } } /* * Intialize a standard receive ring descriptor. */ static int ti_newbuf_std(struct ti_softc *sc, int i) { bus_dmamap_t map; bus_dma_segment_t segs[1]; struct mbuf *m; struct ti_rx_desc *r; int error, nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (sc->ti_cdata.ti_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i]); } map = sc->ti_cdata.ti_rx_std_maps[i]; sc->ti_cdata.ti_rx_std_maps[i] = sc->ti_cdata.ti_rx_std_sparemap; sc->ti_cdata.ti_rx_std_sparemap = map; sc->ti_cdata.ti_rx_std_chain[i] = m; r = &sc->ti_rdata.ti_rx_std_ring[i]; ti_hostaddr64(&r->ti_addr, segs[0].ds_addr); r->ti_len = segs[0].ds_len; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = 0; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i], BUS_DMASYNC_PREREAD); return (0); } /* * Intialize a mini receive ring descriptor. This only applies to * the Tigon 2. */ static int ti_newbuf_mini(struct ti_softc *sc, int i) { bus_dmamap_t map; bus_dma_segment_t segs[1]; struct mbuf *m; struct ti_rx_desc *r; int error, nsegs; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MHLEN; m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (sc->ti_cdata.ti_rx_mini_chain[i] != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i]); } map = sc->ti_cdata.ti_rx_mini_maps[i]; sc->ti_cdata.ti_rx_mini_maps[i] = sc->ti_cdata.ti_rx_mini_sparemap; sc->ti_cdata.ti_rx_mini_sparemap = map; sc->ti_cdata.ti_rx_mini_chain[i] = m; r = &sc->ti_rdata.ti_rx_mini_ring[i]; ti_hostaddr64(&r->ti_addr, segs[0].ds_addr); r->ti_len = segs[0].ds_len; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = TI_BDFLAG_MINI_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i], BUS_DMASYNC_PREREAD); return (0); } #ifndef TI_SF_BUF_JUMBO /* * Initialize a jumbo receive ring descriptor. This allocates * a jumbo buffer from the pool managed internally by the driver. */ static int ti_newbuf_jumbo(struct ti_softc *sc, int i, struct mbuf *dummy) { bus_dmamap_t map; bus_dma_segment_t segs[1]; struct mbuf *m; struct ti_rx_desc *r; int error, nsegs; (void)dummy; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (sc->ti_cdata.ti_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i]); } map = sc->ti_cdata.ti_rx_jumbo_maps[i]; sc->ti_cdata.ti_rx_jumbo_maps[i] = sc->ti_cdata.ti_rx_jumbo_sparemap; sc->ti_cdata.ti_rx_jumbo_sparemap = map; sc->ti_cdata.ti_rx_jumbo_chain[i] = m; r = &sc->ti_rdata.ti_rx_jumbo_ring[i]; ti_hostaddr64(&r->ti_addr, segs[0].ds_addr); r->ti_len = segs[0].ds_len; r->ti_type = TI_BDTYPE_RECV_JUMBO_BD; r->ti_flags = TI_BDFLAG_JUMBO_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i], BUS_DMASYNC_PREREAD); return (0); } #else #if (PAGE_SIZE == 4096) #define NPAYLOAD 2 #else #define NPAYLOAD 1 #endif #define TCP_HDR_LEN (52 + sizeof(struct ether_header)) #define UDP_HDR_LEN (28 + sizeof(struct ether_header)) #define NFS_HDR_LEN (UDP_HDR_LEN) static int HDR_LEN = TCP_HDR_LEN; /* * Initialize a jumbo receive ring descriptor. This allocates * a jumbo buffer from the pool managed internally by the driver. */ static int ti_newbuf_jumbo(struct ti_softc *sc, int idx, struct mbuf *m_old) { bus_dmamap_t map; struct mbuf *cur, *m_new = NULL; struct mbuf *m[3] = {NULL, NULL, NULL}; struct ti_rx_desc_ext *r; vm_page_t frame; /* 1 extra buf to make nobufs easy*/ struct sf_buf *sf[3] = {NULL, NULL, NULL}; int i; bus_dma_segment_t segs[4]; int nsegs; if (m_old != NULL) { m_new = m_old; cur = m_old->m_next; for (i = 0; i <= NPAYLOAD; i++){ m[i] = cur; cur = cur->m_next; } } else { /* Allocate the mbufs. */ MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { device_printf(sc->ti_dev, "mbuf allocation failed " "-- packet dropped!\n"); goto nobufs; } MGET(m[NPAYLOAD], M_NOWAIT, MT_DATA); if (m[NPAYLOAD] == NULL) { device_printf(sc->ti_dev, "cluster mbuf allocation " "failed -- packet dropped!\n"); goto nobufs; } if (!(MCLGET(m[NPAYLOAD], M_NOWAIT))) { device_printf(sc->ti_dev, "mbuf allocation failed " "-- packet dropped!\n"); goto nobufs; } m[NPAYLOAD]->m_len = MCLBYTES; for (i = 0; i < NPAYLOAD; i++){ MGET(m[i], M_NOWAIT, MT_DATA); if (m[i] == NULL) { device_printf(sc->ti_dev, "mbuf allocation " "failed -- packet dropped!\n"); goto nobufs; } frame = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (frame == NULL) { device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); printf(" index %d page %d\n", idx, i); goto nobufs; } sf[i] = sf_buf_alloc(frame, SFB_NOWAIT); if (sf[i] == NULL) { vm_page_unwire(frame, PQ_INACTIVE); vm_page_free(frame); device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); printf(" index %d page %d\n", idx, i); goto nobufs; } } for (i = 0; i < NPAYLOAD; i++){ /* Attach the buffer to the mbuf. */ m[i]->m_data = (void *)sf_buf_kva(sf[i]); m[i]->m_len = PAGE_SIZE; MEXTADD(m[i], sf_buf_kva(sf[i]), PAGE_SIZE, - sf_buf_mext, (void*)sf_buf_kva(sf[i]), sf[i], + sf_mext_free, (void*)sf_buf_kva(sf[i]), sf[i], 0, EXT_DISPOSABLE); m[i]->m_next = m[i+1]; } /* link the buffers to the header */ m_new->m_next = m[0]; m_new->m_data += ETHER_ALIGN; if (sc->ti_hdrsplit) m_new->m_len = MHLEN - ETHER_ALIGN; else m_new->m_len = HDR_LEN; m_new->m_pkthdr.len = NPAYLOAD * PAGE_SIZE + m_new->m_len; } /* Set up the descriptor. */ r = &sc->ti_rdata.ti_rx_jumbo_ring[idx]; sc->ti_cdata.ti_rx_jumbo_chain[idx] = m_new; map = sc->ti_cdata.ti_rx_jumbo_maps[i]; if (bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_jumbo_tag, map, m_new, segs, &nsegs, 0)) return (ENOBUFS); if ((nsegs < 1) || (nsegs > 4)) return (ENOBUFS); ti_hostaddr64(&r->ti_addr0, segs[0].ds_addr); r->ti_len0 = m_new->m_len; ti_hostaddr64(&r->ti_addr1, segs[1].ds_addr); r->ti_len1 = PAGE_SIZE; ti_hostaddr64(&r->ti_addr2, segs[2].ds_addr); r->ti_len2 = m[1]->m_ext.ext_size; /* could be PAGE_SIZE or MCLBYTES */ if (PAGE_SIZE == 4096) { ti_hostaddr64(&r->ti_addr3, segs[3].ds_addr); r->ti_len3 = MCLBYTES; } else { r->ti_len3 = 0; } r->ti_type = TI_BDTYPE_RECV_JUMBO_BD; r->ti_flags = TI_BDFLAG_JUMBO_RING|TI_RCB_FLAG_USE_EXT_RX_BD; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM|TI_BDFLAG_IP_CKSUM; r->ti_idx = idx; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map, BUS_DMASYNC_PREREAD); return (0); nobufs: /* * Warning! : * This can only be called before the mbufs are strung together. * If the mbufs are strung together, m_freem() will free the chain, * so that the later mbufs will be freed multiple times. */ if (m_new) m_freem(m_new); for (i = 0; i < 3; i++) { if (m[i]) m_freem(m[i]); if (sf[i]) - sf_buf_mext((void *)sf_buf_kva(sf[i]), sf[i]); + sf_mext_free((void *)sf_buf_kva(sf[i]), sf[i]); } return (ENOBUFS); } #endif /* * The standard receive ring has 512 entries in it. At 2K per mbuf cluster, * that's 1MB or memory, which is a lot. For now, we fill only the first * 256 ring entries and hope that our CPU is fast enough to keep up with * the NIC. */ static int ti_init_rx_ring_std(struct ti_softc *sc) { int i; struct ti_cmd_desc cmd; for (i = 0; i < TI_STD_RX_RING_CNT; i++) { if (ti_newbuf_std(sc, i) != 0) return (ENOBUFS); }; sc->ti_std = TI_STD_RX_RING_CNT - 1; TI_UPDATE_STDPROD(sc, TI_STD_RX_RING_CNT - 1); return (0); } static void ti_free_rx_ring_std(struct ti_softc *sc) { bus_dmamap_t map; int i; for (i = 0; i < TI_STD_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_std_chain[i] != NULL) { map = sc->ti_cdata.ti_rx_std_maps[i]; bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_std_tag, map); m_freem(sc->ti_cdata.ti_rx_std_chain[i]); sc->ti_cdata.ti_rx_std_chain[i] = NULL; } } bzero(sc->ti_rdata.ti_rx_std_ring, TI_STD_RX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag, sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_PREWRITE); } static int ti_init_rx_ring_jumbo(struct ti_softc *sc) { struct ti_cmd_desc cmd; int i; for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { if (ti_newbuf_jumbo(sc, i, NULL) != 0) return (ENOBUFS); }; sc->ti_jumbo = TI_JUMBO_RX_RING_CNT - 1; TI_UPDATE_JUMBOPROD(sc, TI_JUMBO_RX_RING_CNT - 1); return (0); } static void ti_free_rx_ring_jumbo(struct ti_softc *sc) { bus_dmamap_t map; int i; for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_jumbo_chain[i] != NULL) { map = sc->ti_cdata.ti_rx_jumbo_maps[i]; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, map); m_freem(sc->ti_cdata.ti_rx_jumbo_chain[i]); sc->ti_cdata.ti_rx_jumbo_chain[i] = NULL; } } bzero(sc->ti_rdata.ti_rx_jumbo_ring, TI_JUMBO_RX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag, sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); } static int ti_init_rx_ring_mini(struct ti_softc *sc) { int i; for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { if (ti_newbuf_mini(sc, i) != 0) return (ENOBUFS); }; sc->ti_mini = TI_MINI_RX_RING_CNT - 1; TI_UPDATE_MINIPROD(sc, TI_MINI_RX_RING_CNT - 1); return (0); } static void ti_free_rx_ring_mini(struct ti_softc *sc) { bus_dmamap_t map; int i; if (sc->ti_rdata.ti_rx_mini_ring == NULL) return; for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_mini_chain[i] != NULL) { map = sc->ti_cdata.ti_rx_mini_maps[i]; bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_mini_tag, map); m_freem(sc->ti_cdata.ti_rx_mini_chain[i]); sc->ti_cdata.ti_rx_mini_chain[i] = NULL; } } bzero(sc->ti_rdata.ti_rx_mini_ring, TI_MINI_RX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag, sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_PREWRITE); } static void ti_free_tx_ring(struct ti_softc *sc) { struct ti_txdesc *txd; int i; if (sc->ti_rdata.ti_tx_ring == NULL) return; for (i = 0; i < TI_TX_RING_CNT; i++) { txd = &sc->ti_cdata.ti_txdesc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } bzero(sc->ti_rdata.ti_tx_ring, TI_TX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE); } static int ti_init_tx_ring(struct ti_softc *sc) { struct ti_txdesc *txd; int i; STAILQ_INIT(&sc->ti_cdata.ti_txfreeq); STAILQ_INIT(&sc->ti_cdata.ti_txbusyq); for (i = 0; i < TI_TX_RING_CNT; i++) { txd = &sc->ti_cdata.ti_txdesc[i]; STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txfreeq, txd, tx_q); } sc->ti_txcnt = 0; sc->ti_tx_saved_considx = 0; sc->ti_tx_saved_prodidx = 0; CSR_WRITE_4(sc, TI_MB_SENDPROD_IDX, 0); return (0); } /* * The Tigon 2 firmware has a new way to add/delete multicast addresses, * but we have to support the old way too so that Tigon 1 cards will * work. */ static void ti_add_mcast(struct ti_softc *sc, struct ether_addr *addr) { struct ti_cmd_desc cmd; uint16_t *m; uint32_t ext[2] = {0, 0}; m = (uint16_t *)&addr->octet[0]; switch (sc->ti_hwrev) { case TI_HWREV_TIGON: CSR_WRITE_4(sc, TI_GCR_MAR0, htons(m[0])); CSR_WRITE_4(sc, TI_GCR_MAR1, (htons(m[1]) << 16) | htons(m[2])); TI_DO_CMD(TI_CMD_ADD_MCAST_ADDR, 0, 0); break; case TI_HWREV_TIGON_II: ext[0] = htons(m[0]); ext[1] = (htons(m[1]) << 16) | htons(m[2]); TI_DO_CMD_EXT(TI_CMD_EXT_ADD_MCAST, 0, 0, (caddr_t)&ext, 2); break; default: device_printf(sc->ti_dev, "unknown hwrev\n"); break; } } static void ti_del_mcast(struct ti_softc *sc, struct ether_addr *addr) { struct ti_cmd_desc cmd; uint16_t *m; uint32_t ext[2] = {0, 0}; m = (uint16_t *)&addr->octet[0]; switch (sc->ti_hwrev) { case TI_HWREV_TIGON: CSR_WRITE_4(sc, TI_GCR_MAR0, htons(m[0])); CSR_WRITE_4(sc, TI_GCR_MAR1, (htons(m[1]) << 16) | htons(m[2])); TI_DO_CMD(TI_CMD_DEL_MCAST_ADDR, 0, 0); break; case TI_HWREV_TIGON_II: ext[0] = htons(m[0]); ext[1] = (htons(m[1]) << 16) | htons(m[2]); TI_DO_CMD_EXT(TI_CMD_EXT_DEL_MCAST, 0, 0, (caddr_t)&ext, 2); break; default: device_printf(sc->ti_dev, "unknown hwrev\n"); break; } } /* * Configure the Tigon's multicast address filter. * * The actual multicast table management is a bit of a pain, thanks to * slight brain damage on the part of both Alteon and us. With our * multicast code, we are only alerted when the multicast address table * changes and at that point we only have the current list of addresses: * we only know the current state, not the previous state, so we don't * actually know what addresses were removed or added. The firmware has * state, but we can't get our grubby mits on it, and there is no 'delete * all multicast addresses' command. Hence, we have to maintain our own * state so we know what addresses have been programmed into the NIC at * any given time. */ static void ti_setmulti(struct ti_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; struct ti_cmd_desc cmd; struct ti_mc_entry *mc; uint32_t intrs; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; if (ifp->if_flags & IFF_ALLMULTI) { TI_DO_CMD(TI_CMD_SET_ALLMULTI, TI_CMD_CODE_ALLMULTI_ENB, 0); return; } else { TI_DO_CMD(TI_CMD_SET_ALLMULTI, TI_CMD_CODE_ALLMULTI_DIS, 0); } /* Disable interrupts. */ intrs = CSR_READ_4(sc, TI_MB_HOSTINTR); CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); /* First, zot all the existing filters. */ while (SLIST_FIRST(&sc->ti_mc_listhead) != NULL) { mc = SLIST_FIRST(&sc->ti_mc_listhead); ti_del_mcast(sc, &mc->mc_addr); SLIST_REMOVE_HEAD(&sc->ti_mc_listhead, mc_entries); free(mc, M_DEVBUF); } /* Now program new ones. */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct ti_mc_entry), M_DEVBUF, M_NOWAIT); if (mc == NULL) { device_printf(sc->ti_dev, "no memory for mcast filter entry\n"); continue; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), (char *)&mc->mc_addr, ETHER_ADDR_LEN); SLIST_INSERT_HEAD(&sc->ti_mc_listhead, mc, mc_entries); ti_add_mcast(sc, &mc->mc_addr); } if_maddr_runlock(ifp); /* Re-enable interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, intrs); } /* * Check to see if the BIOS has configured us for a 64 bit slot when * we aren't actually in one. If we detect this condition, we can work * around it on the Tigon 2 by setting a bit in the PCI state register, * but for the Tigon 1 we must give up and abort the interface attach. */ static int ti_64bitslot_war(struct ti_softc *sc) { if (!(CSR_READ_4(sc, TI_PCI_STATE) & TI_PCISTATE_32BIT_BUS)) { CSR_WRITE_4(sc, 0x600, 0); CSR_WRITE_4(sc, 0x604, 0); CSR_WRITE_4(sc, 0x600, 0x5555AAAA); if (CSR_READ_4(sc, 0x604) == 0x5555AAAA) { if (sc->ti_hwrev == TI_HWREV_TIGON) return (EINVAL); else { TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_32BIT_BUS); return (0); } } } return (0); } /* * Do endian, PCI and DMA initialization. Also check the on-board ROM * self-test results. */ static int ti_chipinit(struct ti_softc *sc) { uint32_t cacheline; uint32_t pci_writemax = 0; uint32_t hdrsplit; /* Initialize link to down state. */ sc->ti_linkstat = TI_EV_CODE_LINK_DOWN; /* Set endianness before we access any non-PCI registers. */ #if 0 && BYTE_ORDER == BIG_ENDIAN CSR_WRITE_4(sc, TI_MISC_HOST_CTL, TI_MHC_BIGENDIAN_INIT | (TI_MHC_BIGENDIAN_INIT << 24)); #else CSR_WRITE_4(sc, TI_MISC_HOST_CTL, TI_MHC_LITTLEENDIAN_INIT | (TI_MHC_LITTLEENDIAN_INIT << 24)); #endif /* Check the ROM failed bit to see if self-tests passed. */ if (CSR_READ_4(sc, TI_CPU_STATE) & TI_CPUSTATE_ROMFAIL) { device_printf(sc->ti_dev, "board self-diagnostics failed!\n"); return (ENODEV); } /* Halt the CPU. */ TI_SETBIT(sc, TI_CPU_STATE, TI_CPUSTATE_HALT); /* Figure out the hardware revision. */ switch (CSR_READ_4(sc, TI_MISC_HOST_CTL) & TI_MHC_CHIP_REV_MASK) { case TI_REV_TIGON_I: sc->ti_hwrev = TI_HWREV_TIGON; break; case TI_REV_TIGON_II: sc->ti_hwrev = TI_HWREV_TIGON_II; break; default: device_printf(sc->ti_dev, "unsupported chip revision\n"); return (ENODEV); } /* Do special setup for Tigon 2. */ if (sc->ti_hwrev == TI_HWREV_TIGON_II) { TI_SETBIT(sc, TI_CPU_CTL_B, TI_CPUSTATE_HALT); TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_SRAM_BANK_512K); TI_SETBIT(sc, TI_MISC_CONF, TI_MCR_SRAM_SYNCHRONOUS); } /* * We don't have firmware source for the Tigon 1, so Tigon 1 boards * can't do header splitting. */ #ifdef TI_JUMBO_HDRSPLIT if (sc->ti_hwrev != TI_HWREV_TIGON) sc->ti_hdrsplit = 1; else device_printf(sc->ti_dev, "can't do header splitting on a Tigon I board\n"); #endif /* TI_JUMBO_HDRSPLIT */ /* Set up the PCI state register. */ CSR_WRITE_4(sc, TI_PCI_STATE, TI_PCI_READ_CMD|TI_PCI_WRITE_CMD); if (sc->ti_hwrev == TI_HWREV_TIGON_II) { TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_USE_MEM_RD_MULT); } /* Clear the read/write max DMA parameters. */ TI_CLRBIT(sc, TI_PCI_STATE, (TI_PCISTATE_WRITE_MAXDMA| TI_PCISTATE_READ_MAXDMA)); /* Get cache line size. */ cacheline = CSR_READ_4(sc, TI_PCI_BIST) & 0xFF; /* * If the system has set enabled the PCI memory write * and invalidate command in the command register, set * the write max parameter accordingly. This is necessary * to use MWI with the Tigon 2. */ if (CSR_READ_4(sc, TI_PCI_CMDSTAT) & PCIM_CMD_MWIEN) { switch (cacheline) { case 1: case 4: case 8: case 16: case 32: case 64: break; default: /* Disable PCI memory write and invalidate. */ if (bootverbose) device_printf(sc->ti_dev, "cache line size %d" " not supported; disabling PCI MWI\n", cacheline); CSR_WRITE_4(sc, TI_PCI_CMDSTAT, CSR_READ_4(sc, TI_PCI_CMDSTAT) & ~PCIM_CMD_MWIEN); break; } } TI_SETBIT(sc, TI_PCI_STATE, pci_writemax); /* This sets the min dma param all the way up (0xff). */ TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_MINDMA); if (sc->ti_hdrsplit) hdrsplit = TI_OPMODE_JUMBO_HDRSPLIT; else hdrsplit = 0; /* Configure DMA variables. */ #if BYTE_ORDER == BIG_ENDIAN CSR_WRITE_4(sc, TI_GCR_OPMODE, TI_OPMODE_BYTESWAP_BD | TI_OPMODE_BYTESWAP_DATA | TI_OPMODE_WORDSWAP_BD | TI_OPMODE_WARN_ENB | TI_OPMODE_FATAL_ENB | TI_OPMODE_DONT_FRAG_JUMBO | hdrsplit); #else /* BYTE_ORDER */ CSR_WRITE_4(sc, TI_GCR_OPMODE, TI_OPMODE_BYTESWAP_DATA| TI_OPMODE_WORDSWAP_BD|TI_OPMODE_DONT_FRAG_JUMBO| TI_OPMODE_WARN_ENB|TI_OPMODE_FATAL_ENB | hdrsplit); #endif /* BYTE_ORDER */ /* * Only allow 1 DMA channel to be active at a time. * I don't think this is a good idea, but without it * the firmware racks up lots of nicDmaReadRingFull * errors. This is not compatible with hardware checksums. */ if ((sc->ti_ifp->if_capenable & (IFCAP_TXCSUM | IFCAP_RXCSUM)) == 0) TI_SETBIT(sc, TI_GCR_OPMODE, TI_OPMODE_1_DMA_ACTIVE); /* Recommended settings from Tigon manual. */ CSR_WRITE_4(sc, TI_GCR_DMA_WRITECFG, TI_DMA_STATE_THRESH_8W); CSR_WRITE_4(sc, TI_GCR_DMA_READCFG, TI_DMA_STATE_THRESH_8W); if (ti_64bitslot_war(sc)) { device_printf(sc->ti_dev, "bios thinks we're in a 64 bit slot, " "but we aren't"); return (EINVAL); } return (0); } /* * Initialize the general information block and firmware, and * start the CPU(s) running. */ static int ti_gibinit(struct ti_softc *sc) { struct ifnet *ifp; struct ti_rcb *rcb; int i; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; /* Disable interrupts for now. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); /* Tell the chip where to find the general information block. */ CSR_WRITE_4(sc, TI_GCR_GENINFO_HI, (uint64_t)sc->ti_rdata.ti_info_paddr >> 32); CSR_WRITE_4(sc, TI_GCR_GENINFO_LO, sc->ti_rdata.ti_info_paddr & 0xFFFFFFFF); /* Load the firmware into SRAM. */ ti_loadfw(sc); /* Set up the contents of the general info and ring control blocks. */ /* Set up the event ring and producer pointer. */ bzero(sc->ti_rdata.ti_event_ring, TI_EVENT_RING_SZ); rcb = &sc->ti_rdata.ti_info->ti_ev_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_event_ring_paddr); rcb->ti_flags = 0; ti_hostaddr64(&sc->ti_rdata.ti_info->ti_ev_prodidx_ptr, sc->ti_rdata.ti_status_paddr + offsetof(struct ti_status, ti_ev_prodidx_r)); sc->ti_ev_prodidx.ti_idx = 0; CSR_WRITE_4(sc, TI_GCR_EVENTCONS_IDX, 0); sc->ti_ev_saved_considx = 0; /* Set up the command ring and producer mailbox. */ rcb = &sc->ti_rdata.ti_info->ti_cmd_rcb; ti_hostaddr64(&rcb->ti_hostaddr, TI_GCR_NIC_ADDR(TI_GCR_CMDRING)); rcb->ti_flags = 0; rcb->ti_max_len = 0; for (i = 0; i < TI_CMD_RING_CNT; i++) { CSR_WRITE_4(sc, TI_GCR_CMDRING + (i * 4), 0); } CSR_WRITE_4(sc, TI_GCR_CMDCONS_IDX, 0); CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, 0); sc->ti_cmd_saved_prodidx = 0; /* * Assign the address of the stats refresh buffer. * We re-use the current stats buffer for this to * conserve memory. */ bzero(&sc->ti_rdata.ti_info->ti_stats, sizeof(struct ti_stats)); ti_hostaddr64(&sc->ti_rdata.ti_info->ti_refresh_stats_ptr, sc->ti_rdata.ti_info_paddr + offsetof(struct ti_gib, ti_stats)); /* Set up the standard receive ring. */ rcb = &sc->ti_rdata.ti_info->ti_std_rx_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_std_ring_paddr); rcb->ti_max_len = TI_FRAMELEN; rcb->ti_flags = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; /* Set up the jumbo receive ring. */ rcb = &sc->ti_rdata.ti_info->ti_jumbo_rx_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_jumbo_ring_paddr); #ifndef TI_SF_BUF_JUMBO rcb->ti_max_len = MJUM9BYTES - ETHER_ALIGN; rcb->ti_flags = 0; #else rcb->ti_max_len = PAGE_SIZE; rcb->ti_flags = TI_RCB_FLAG_USE_EXT_RX_BD; #endif if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; /* * Set up the mini ring. Only activated on the * Tigon 2 but the slot in the config block is * still there on the Tigon 1. */ rcb = &sc->ti_rdata.ti_info->ti_mini_rx_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_mini_ring_paddr); rcb->ti_max_len = MHLEN - ETHER_ALIGN; if (sc->ti_hwrev == TI_HWREV_TIGON) rcb->ti_flags = TI_RCB_FLAG_RING_DISABLED; else rcb->ti_flags = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; /* * Set up the receive return ring. */ rcb = &sc->ti_rdata.ti_info->ti_return_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_return_ring_paddr); rcb->ti_flags = 0; rcb->ti_max_len = TI_RETURN_RING_CNT; ti_hostaddr64(&sc->ti_rdata.ti_info->ti_return_prodidx_ptr, sc->ti_rdata.ti_status_paddr + offsetof(struct ti_status, ti_return_prodidx_r)); /* * Set up the tx ring. Note: for the Tigon 2, we have the option * of putting the transmit ring in the host's address space and * letting the chip DMA it instead of leaving the ring in the NIC's * memory and accessing it through the shared memory region. We * do this for the Tigon 2, but it doesn't work on the Tigon 1, * so we have to revert to the shared memory scheme if we detect * a Tigon 1 chip. */ CSR_WRITE_4(sc, TI_WINBASE, TI_TX_RING_BASE); if (sc->ti_rdata.ti_tx_ring != NULL) bzero(sc->ti_rdata.ti_tx_ring, TI_TX_RING_SZ); rcb = &sc->ti_rdata.ti_info->ti_tx_rcb; if (sc->ti_hwrev == TI_HWREV_TIGON) rcb->ti_flags = 0; else rcb->ti_flags = TI_RCB_FLAG_HOST_RING; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; if (sc->ti_ifp->if_capenable & IFCAP_TXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; rcb->ti_max_len = TI_TX_RING_CNT; if (sc->ti_hwrev == TI_HWREV_TIGON) ti_hostaddr64(&rcb->ti_hostaddr, TI_TX_RING_BASE); else ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_tx_ring_paddr); ti_hostaddr64(&sc->ti_rdata.ti_info->ti_tx_considx_ptr, sc->ti_rdata.ti_status_paddr + offsetof(struct ti_status, ti_tx_considx_r)); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->ti_cdata.ti_status_tag, sc->ti_cdata.ti_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag, sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (sc->ti_rdata.ti_tx_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Set up tunables */ #if 0 if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS, (sc->ti_rx_coal_ticks / 10)); else #endif CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS, sc->ti_rx_coal_ticks); CSR_WRITE_4(sc, TI_GCR_TX_COAL_TICKS, sc->ti_tx_coal_ticks); CSR_WRITE_4(sc, TI_GCR_STAT_TICKS, sc->ti_stat_ticks); CSR_WRITE_4(sc, TI_GCR_RX_MAX_COAL_BD, sc->ti_rx_max_coal_bds); CSR_WRITE_4(sc, TI_GCR_TX_MAX_COAL_BD, sc->ti_tx_max_coal_bds); CSR_WRITE_4(sc, TI_GCR_TX_BUFFER_RATIO, sc->ti_tx_buf_ratio); /* Turn interrupts on. */ CSR_WRITE_4(sc, TI_GCR_MASK_INTRS, 0); CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0); /* Start CPU. */ TI_CLRBIT(sc, TI_CPU_STATE, (TI_CPUSTATE_HALT|TI_CPUSTATE_STEP)); return (0); } /* * Probe for a Tigon chip. Check the PCI vendor and device IDs * against our list and return its name if we find a match. */ static int ti_probe(device_t dev) { const struct ti_type *t; t = ti_devs; while (t->ti_name != NULL) { if ((pci_get_vendor(dev) == t->ti_vid) && (pci_get_device(dev) == t->ti_did)) { device_set_desc(dev, t->ti_name); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static int ti_attach(device_t dev) { struct ifnet *ifp; struct ti_softc *sc; int error = 0, rid; u_char eaddr[6]; sc = device_get_softc(dev); sc->ti_dev = dev; mtx_init(&sc->ti_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->ti_watchdog, &sc->ti_mtx, 0); ifmedia_init(&sc->ifmedia, IFM_IMASK, ti_ifmedia_upd, ti_ifmedia_sts); ifp = sc->ti_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } sc->ti_ifp->if_hwassist = TI_CSUM_FEATURES; sc->ti_ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_RXCSUM; sc->ti_ifp->if_capenable = sc->ti_ifp->if_capabilities; /* * Map control/status registers. */ pci_enable_busmaster(dev); rid = PCIR_BAR(0); sc->ti_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->ti_res == NULL) { device_printf(dev, "couldn't map memory\n"); error = ENXIO; goto fail; } sc->ti_btag = rman_get_bustag(sc->ti_res); sc->ti_bhandle = rman_get_bushandle(sc->ti_res); /* Allocate interrupt */ rid = 0; sc->ti_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->ti_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } if (ti_chipinit(sc)) { device_printf(dev, "chip initialization failed\n"); error = ENXIO; goto fail; } /* Zero out the NIC's on-board SRAM. */ ti_mem_zero(sc, 0x2000, 0x100000 - 0x2000); /* Init again -- zeroing memory may have clobbered some registers. */ if (ti_chipinit(sc)) { device_printf(dev, "chip initialization failed\n"); error = ENXIO; goto fail; } /* * Get station address from the EEPROM. Note: the manual states * that the MAC address is at offset 0x8c, however the data is * stored as two longwords (since that's how it's loaded into * the NIC). This means the MAC address is actually preceded * by two zero bytes. We need to skip over those. */ if (ti_read_eeprom(sc, eaddr, TI_EE_MAC_OFFSET + 2, ETHER_ADDR_LEN)) { device_printf(dev, "failed to read station address\n"); error = ENXIO; goto fail; } /* Allocate working area for memory dump. */ sc->ti_membuf = malloc(sizeof(uint8_t) * TI_WINLEN, M_DEVBUF, M_NOWAIT); sc->ti_membuf2 = malloc(sizeof(uint8_t) * TI_WINLEN, M_DEVBUF, M_NOWAIT); if (sc->ti_membuf == NULL || sc->ti_membuf2 == NULL) { device_printf(dev, "cannot allocate memory buffer\n"); error = ENOMEM; goto fail; } if ((error = ti_dma_alloc(sc)) != 0) goto fail; /* * We really need a better way to tell a 1000baseTX card * from a 1000baseSX one, since in theory there could be * OEMed 1000baseTX cards from lame vendors who aren't * clever enough to change the PCI ID. For the moment * though, the AceNIC is the only copper card available. */ if (pci_get_vendor(dev) == ALT_VENDORID && pci_get_device(dev) == ALT_DEVICEID_ACENIC_COPPER) sc->ti_copper = 1; /* Ok, it's not the only copper card available. */ if (pci_get_vendor(dev) == NG_VENDORID && pci_get_device(dev) == NG_DEVICEID_GA620T) sc->ti_copper = 1; /* Set default tunable values. */ ti_sysctl_node(sc); /* Set up ifnet structure */ ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ti_ioctl; ifp->if_start = ti_start; ifp->if_init = ti_init; ifp->if_get_counter = ti_get_counter; ifp->if_baudrate = IF_Gbps(1UL); ifp->if_snd.ifq_drv_maxlen = TI_TX_RING_CNT - 1; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); /* Set up ifmedia support. */ if (sc->ti_copper) { /* * Copper cards allow manual 10/100 mode selection, * but not manual 1000baseTX mode selection. Why? * Becuase currently there's no way to specify the * master/slave setting through the firmware interface, * so Alteon decided to just bag it and handle it * via autonegotiation. */ ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL); } else { /* Fiber cards don't support 10/100 modes. */ ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_SX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_SX|IFM_FDX, 0, NULL); } ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL); ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO); /* * We're assuming here that card initialization is a sequential * thing. If it isn't, multiple cards probing at the same time * could stomp on the list of softcs here. */ /* Register the device */ sc->dev = make_dev(&ti_cdevsw, device_get_unit(dev), UID_ROOT, GID_OPERATOR, 0600, "ti%d", device_get_unit(dev)); sc->dev->si_drv1 = sc; /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* VLAN capability setup. */ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTAGGING; ifp->if_capenable = ifp->if_capabilities; /* Tell the upper layer we support VLAN over-sized frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* Driver supports link state tracking. */ ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable |= IFCAP_LINKSTATE; /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->ti_irq, INTR_TYPE_NET|INTR_MPSAFE, NULL, ti_intr, sc, &sc->ti_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); goto fail; } fail: if (error) ti_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int ti_detach(device_t dev) { struct ti_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); if (sc->dev) destroy_dev(sc->dev); KASSERT(mtx_initialized(&sc->ti_mtx), ("ti mutex not initialized")); ifp = sc->ti_ifp; if (device_is_attached(dev)) { ether_ifdetach(ifp); TI_LOCK(sc); ti_stop(sc); TI_UNLOCK(sc); } /* These should only be active if attach succeeded */ callout_drain(&sc->ti_watchdog); bus_generic_detach(dev); ti_dma_free(sc); ifmedia_removeall(&sc->ifmedia); if (sc->ti_intrhand) bus_teardown_intr(dev, sc->ti_irq, sc->ti_intrhand); if (sc->ti_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ti_irq); if (sc->ti_res) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), sc->ti_res); } if (ifp) if_free(ifp); if (sc->ti_membuf) free(sc->ti_membuf, M_DEVBUF); if (sc->ti_membuf2) free(sc->ti_membuf2, M_DEVBUF); mtx_destroy(&sc->ti_mtx); return (0); } #ifdef TI_JUMBO_HDRSPLIT /* * If hdr_len is 0, that means that header splitting wasn't done on * this packet for some reason. The two most likely reasons are that * the protocol isn't a supported protocol for splitting, or this * packet had a fragment offset that wasn't 0. * * The header length, if it is non-zero, will always be the length of * the headers on the packet, but that length could be longer than the * first mbuf. So we take the minimum of the two as the actual * length. */ static __inline void ti_hdr_split(struct mbuf *top, int hdr_len, int pkt_len, int idx) { int i = 0; int lengths[4] = {0, 0, 0, 0}; struct mbuf *m, *mp; if (hdr_len != 0) top->m_len = min(hdr_len, top->m_len); pkt_len -= top->m_len; lengths[i++] = top->m_len; mp = top; for (m = top->m_next; m && pkt_len; m = m->m_next) { m->m_len = m->m_ext.ext_size = min(m->m_len, pkt_len); pkt_len -= m->m_len; lengths[i++] = m->m_len; mp = m; } #if 0 if (hdr_len != 0) printf("got split packet: "); else printf("got non-split packet: "); printf("%d,%d,%d,%d = %d\n", lengths[0], lengths[1], lengths[2], lengths[3], lengths[0] + lengths[1] + lengths[2] + lengths[3]); #endif if (pkt_len) panic("header splitting didn't"); if (m) { m_freem(m); mp->m_next = NULL; } if (mp->m_next != NULL) panic("ti_hdr_split: last mbuf in chain should be null"); } #endif /* TI_JUMBO_HDRSPLIT */ static void ti_discard_std(struct ti_softc *sc, int i) { struct ti_rx_desc *r; r = &sc->ti_rdata.ti_rx_std_ring[i]; r->ti_len = MCLBYTES - ETHER_ALIGN; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = 0; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; } static void ti_discard_mini(struct ti_softc *sc, int i) { struct ti_rx_desc *r; r = &sc->ti_rdata.ti_rx_mini_ring[i]; r->ti_len = MHLEN - ETHER_ALIGN; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = TI_BDFLAG_MINI_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; } #ifndef TI_SF_BUF_JUMBO static void ti_discard_jumbo(struct ti_softc *sc, int i) { struct ti_rx_desc *r; r = &sc->ti_rdata.ti_rx_jumbo_ring[i]; r->ti_len = MJUM9BYTES - ETHER_ALIGN; r->ti_type = TI_BDTYPE_RECV_JUMBO_BD; r->ti_flags = TI_BDFLAG_JUMBO_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; } #endif /* * Frame reception handling. This is called if there's a frame * on the receive return list. * * Note: we have to be able to handle three possibilities here: * 1) the frame is from the mini receive ring (can only happen) * on Tigon 2 boards) * 2) the frame is from the jumbo recieve ring * 3) the frame is from the standard receive ring */ static void ti_rxeof(struct ti_softc *sc) { struct ifnet *ifp; #ifdef TI_SF_BUF_JUMBO bus_dmamap_t map; #endif struct ti_cmd_desc cmd; int jumbocnt, minicnt, stdcnt, ti_len; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag, sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_POSTWRITE); if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag, sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_POSTWRITE); if (sc->ti_rdata.ti_rx_mini_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag, sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->ti_cdata.ti_rx_return_ring_tag, sc->ti_cdata.ti_rx_return_ring_map, BUS_DMASYNC_POSTREAD); jumbocnt = minicnt = stdcnt = 0; while (sc->ti_rx_saved_considx != sc->ti_return_prodidx.ti_idx) { struct ti_rx_desc *cur_rx; uint32_t rxidx; struct mbuf *m = NULL; uint16_t vlan_tag = 0; int have_tag = 0; cur_rx = &sc->ti_rdata.ti_rx_return_ring[sc->ti_rx_saved_considx]; rxidx = cur_rx->ti_idx; ti_len = cur_rx->ti_len; TI_INC(sc->ti_rx_saved_considx, TI_RETURN_RING_CNT); if (cur_rx->ti_flags & TI_BDFLAG_VLAN_TAG) { have_tag = 1; vlan_tag = cur_rx->ti_vlan_tag; } if (cur_rx->ti_flags & TI_BDFLAG_JUMBO_RING) { jumbocnt++; TI_INC(sc->ti_jumbo, TI_JUMBO_RX_RING_CNT); m = sc->ti_cdata.ti_rx_jumbo_chain[rxidx]; #ifndef TI_SF_BUF_JUMBO if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_discard_jumbo(sc, rxidx); continue; } if (ti_newbuf_jumbo(sc, rxidx, NULL) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_discard_jumbo(sc, rxidx); continue; } m->m_len = ti_len; #else /* !TI_SF_BUF_JUMBO */ sc->ti_cdata.ti_rx_jumbo_chain[rxidx] = NULL; map = sc->ti_cdata.ti_rx_jumbo_maps[rxidx]; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, map); if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_newbuf_jumbo(sc, sc->ti_jumbo, m); continue; } if (ti_newbuf_jumbo(sc, sc->ti_jumbo, NULL) == ENOBUFS) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_newbuf_jumbo(sc, sc->ti_jumbo, m); continue; } #ifdef TI_JUMBO_HDRSPLIT if (sc->ti_hdrsplit) ti_hdr_split(m, TI_HOSTADDR(cur_rx->ti_addr), ti_len, rxidx); else #endif /* TI_JUMBO_HDRSPLIT */ m_adj(m, ti_len - m->m_pkthdr.len); #endif /* TI_SF_BUF_JUMBO */ } else if (cur_rx->ti_flags & TI_BDFLAG_MINI_RING) { minicnt++; TI_INC(sc->ti_mini, TI_MINI_RX_RING_CNT); m = sc->ti_cdata.ti_rx_mini_chain[rxidx]; if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_discard_mini(sc, rxidx); continue; } if (ti_newbuf_mini(sc, rxidx) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_discard_mini(sc, rxidx); continue; } m->m_len = ti_len; } else { stdcnt++; TI_INC(sc->ti_std, TI_STD_RX_RING_CNT); m = sc->ti_cdata.ti_rx_std_chain[rxidx]; if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_discard_std(sc, rxidx); continue; } if (ti_newbuf_std(sc, rxidx) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_discard_std(sc, rxidx); continue; } m->m_len = ti_len; } m->m_pkthdr.len = ti_len; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_pkthdr.rcvif = ifp; if (ifp->if_capenable & IFCAP_RXCSUM) { if (cur_rx->ti_flags & TI_BDFLAG_IP_CKSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->ti_ip_cksum ^ 0xffff) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->ti_flags & TI_BDFLAG_TCP_UDP_CKSUM) { m->m_pkthdr.csum_data = cur_rx->ti_tcp_udp_cksum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; } } /* * If we received a packet with a vlan tag, * tag it before passing the packet upward. */ if (have_tag) { m->m_pkthdr.ether_vtag = vlan_tag; m->m_flags |= M_VLANTAG; } TI_UNLOCK(sc); (*ifp->if_input)(ifp, m); TI_LOCK(sc); } bus_dmamap_sync(sc->ti_cdata.ti_rx_return_ring_tag, sc->ti_cdata.ti_rx_return_ring_map, BUS_DMASYNC_PREREAD); /* Only necessary on the Tigon 1. */ if (sc->ti_hwrev == TI_HWREV_TIGON) CSR_WRITE_4(sc, TI_GCR_RXRETURNCONS_IDX, sc->ti_rx_saved_considx); if (stdcnt > 0) { bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag, sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_PREWRITE); TI_UPDATE_STDPROD(sc, sc->ti_std); } if (minicnt > 0) { bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag, sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_PREWRITE); TI_UPDATE_MINIPROD(sc, sc->ti_mini); } if (jumbocnt > 0) { bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag, sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); TI_UPDATE_JUMBOPROD(sc, sc->ti_jumbo); } } static void ti_txeof(struct ti_softc *sc) { struct ti_txdesc *txd; struct ti_tx_desc txdesc; struct ti_tx_desc *cur_tx = NULL; struct ifnet *ifp; int idx; ifp = sc->ti_ifp; txd = STAILQ_FIRST(&sc->ti_cdata.ti_txbusyq); if (txd == NULL) return; if (sc->ti_rdata.ti_tx_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_POSTWRITE); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ for (idx = sc->ti_tx_saved_considx; idx != sc->ti_tx_considx.ti_idx; TI_INC(idx, TI_TX_RING_CNT)) { if (sc->ti_hwrev == TI_HWREV_TIGON) { ti_mem_read(sc, TI_TX_RING_BASE + idx * sizeof(txdesc), sizeof(txdesc), &txdesc); cur_tx = &txdesc; } else cur_tx = &sc->ti_rdata.ti_tx_ring[idx]; sc->ti_txcnt--; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if ((cur_tx->ti_flags & TI_BDFLAG_END) == 0) continue; bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); m_freem(txd->tx_m); txd->tx_m = NULL; STAILQ_REMOVE_HEAD(&sc->ti_cdata.ti_txbusyq, tx_q); STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txfreeq, txd, tx_q); txd = STAILQ_FIRST(&sc->ti_cdata.ti_txbusyq); } sc->ti_tx_saved_considx = idx; if (sc->ti_txcnt == 0) sc->ti_timer = 0; } static void ti_intr(void *xsc) { struct ti_softc *sc; struct ifnet *ifp; sc = xsc; TI_LOCK(sc); ifp = sc->ti_ifp; /* Make sure this is really our interrupt. */ if (!(CSR_READ_4(sc, TI_MISC_HOST_CTL) & TI_MHC_INTSTATE)) { TI_UNLOCK(sc); return; } /* Ack interrupt and stop others from occuring. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bus_dmamap_sync(sc->ti_cdata.ti_status_tag, sc->ti_cdata.ti_status_map, BUS_DMASYNC_POSTREAD); /* Check RX return ring producer/consumer */ ti_rxeof(sc); /* Check TX ring producer/consumer */ ti_txeof(sc); bus_dmamap_sync(sc->ti_cdata.ti_status_tag, sc->ti_cdata.ti_status_map, BUS_DMASYNC_PREREAD); } ti_handle_events(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* Re-enable interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ti_start_locked(ifp); } TI_UNLOCK(sc); } static uint64_t ti_get_counter(struct ifnet *ifp, ift_counter cnt) { switch (cnt) { case IFCOUNTER_COLLISIONS: { struct ti_softc *sc; struct ti_stats *s; uint64_t rv; sc = if_getsoftc(ifp); s = &sc->ti_rdata.ti_info->ti_stats; TI_LOCK(sc); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_POSTREAD); rv = s->dot3StatsSingleCollisionFrames + s->dot3StatsMultipleCollisionFrames + s->dot3StatsExcessiveCollisions + s->dot3StatsLateCollisions; bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD); TI_UNLOCK(sc); return (rv); } default: return (if_get_counter_default(ifp, cnt)); } } /* * Encapsulate an mbuf chain in the tx ring by coupling the mbuf data * pointers to descriptors. */ static int ti_encap(struct ti_softc *sc, struct mbuf **m_head) { struct ti_txdesc *txd; struct ti_tx_desc *f; struct ti_tx_desc txdesc; struct mbuf *m; bus_dma_segment_t txsegs[TI_MAXTXSEGS]; uint16_t csum_flags; int error, frag, i, nseg; if ((txd = STAILQ_FIRST(&sc->ti_cdata.ti_txfreeq)) == NULL) return (ENOBUFS); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, *m_head, txsegs, &nseg, 0); if (error == EFBIG) { m = m_defrag(*m_head, M_NOWAIT); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, *m_head, txsegs, &nseg, 0); if (error) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } if (sc->ti_txcnt + nseg >= TI_TX_RING_CNT) { bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap); return (ENOBUFS); } bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, BUS_DMASYNC_PREWRITE); m = *m_head; csum_flags = 0; if (m->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= TI_BDFLAG_IP_CKSUM; if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)) csum_flags |= TI_BDFLAG_TCP_UDP_CKSUM; frag = sc->ti_tx_saved_prodidx; for (i = 0; i < nseg; i++) { if (sc->ti_hwrev == TI_HWREV_TIGON) { bzero(&txdesc, sizeof(txdesc)); f = &txdesc; } else f = &sc->ti_rdata.ti_tx_ring[frag]; ti_hostaddr64(&f->ti_addr, txsegs[i].ds_addr); f->ti_len = txsegs[i].ds_len; f->ti_flags = csum_flags; if (m->m_flags & M_VLANTAG) { f->ti_flags |= TI_BDFLAG_VLAN_TAG; f->ti_vlan_tag = m->m_pkthdr.ether_vtag; } else { f->ti_vlan_tag = 0; } if (sc->ti_hwrev == TI_HWREV_TIGON) ti_mem_write(sc, TI_TX_RING_BASE + frag * sizeof(txdesc), sizeof(txdesc), &txdesc); TI_INC(frag, TI_TX_RING_CNT); } sc->ti_tx_saved_prodidx = frag; /* set TI_BDFLAG_END on the last descriptor */ frag = (frag + TI_TX_RING_CNT - 1) % TI_TX_RING_CNT; if (sc->ti_hwrev == TI_HWREV_TIGON) { txdesc.ti_flags |= TI_BDFLAG_END; ti_mem_write(sc, TI_TX_RING_BASE + frag * sizeof(txdesc), sizeof(txdesc), &txdesc); } else sc->ti_rdata.ti_tx_ring[frag].ti_flags |= TI_BDFLAG_END; STAILQ_REMOVE_HEAD(&sc->ti_cdata.ti_txfreeq, tx_q); STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txbusyq, txd, tx_q); txd->tx_m = m; sc->ti_txcnt += nseg; return (0); } static void ti_start(struct ifnet *ifp) { struct ti_softc *sc; sc = ifp->if_softc; TI_LOCK(sc); ti_start_locked(ifp); TI_UNLOCK(sc); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static void ti_start_locked(struct ifnet *ifp) { struct ti_softc *sc; struct mbuf *m_head = NULL; int enq = 0; sc = ifp->if_softc; for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->ti_txcnt < (TI_TX_RING_CNT - 16);) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (ti_encap(sc, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); } if (enq > 0) { if (sc->ti_rdata.ti_tx_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Transmit */ CSR_WRITE_4(sc, TI_MB_SENDPROD_IDX, sc->ti_tx_saved_prodidx); /* * Set a timeout in case the chip goes out to lunch. */ sc->ti_timer = 5; } } static void ti_init(void *xsc) { struct ti_softc *sc; sc = xsc; TI_LOCK(sc); ti_init_locked(sc); TI_UNLOCK(sc); } static void ti_init_locked(void *xsc) { struct ti_softc *sc = xsc; if (sc->ti_ifp->if_drv_flags & IFF_DRV_RUNNING) return; /* Cancel pending I/O and flush buffers. */ ti_stop(sc); /* Init the gen info block, ring control blocks and firmware. */ if (ti_gibinit(sc)) { device_printf(sc->ti_dev, "initialization failure\n"); return; } } static void ti_init2(struct ti_softc *sc) { struct ti_cmd_desc cmd; struct ifnet *ifp; uint8_t *ea; struct ifmedia *ifm; int tmp; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; /* Specify MTU and interface index. */ CSR_WRITE_4(sc, TI_GCR_IFINDEX, device_get_unit(sc->ti_dev)); CSR_WRITE_4(sc, TI_GCR_IFMTU, ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN); TI_DO_CMD(TI_CMD_UPDATE_GENCOM, 0, 0); /* Load our MAC address. */ ea = IF_LLADDR(sc->ti_ifp); CSR_WRITE_4(sc, TI_GCR_PAR0, (ea[0] << 8) | ea[1]); CSR_WRITE_4(sc, TI_GCR_PAR1, (ea[2] << 24) | (ea[3] << 16) | (ea[4] << 8) | ea[5]); TI_DO_CMD(TI_CMD_SET_MAC_ADDR, 0, 0); /* Enable or disable promiscuous mode as needed. */ if (ifp->if_flags & IFF_PROMISC) { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_ENB, 0); } else { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_DIS, 0); } /* Program multicast filter. */ ti_setmulti(sc); /* * If this is a Tigon 1, we should tell the * firmware to use software packet filtering. */ if (sc->ti_hwrev == TI_HWREV_TIGON) { TI_DO_CMD(TI_CMD_FDR_FILTERING, TI_CMD_CODE_FILT_ENB, 0); } /* Init RX ring. */ if (ti_init_rx_ring_std(sc) != 0) { /* XXX */ device_printf(sc->ti_dev, "no memory for std Rx buffers.\n"); return; } /* Init jumbo RX ring. */ if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) { if (ti_init_rx_ring_jumbo(sc) != 0) { /* XXX */ device_printf(sc->ti_dev, "no memory for jumbo Rx buffers.\n"); return; } } /* * If this is a Tigon 2, we can also configure the * mini ring. */ if (sc->ti_hwrev == TI_HWREV_TIGON_II) { if (ti_init_rx_ring_mini(sc) != 0) { /* XXX */ device_printf(sc->ti_dev, "no memory for mini Rx buffers.\n"); return; } } CSR_WRITE_4(sc, TI_GCR_RXRETURNCONS_IDX, 0); sc->ti_rx_saved_considx = 0; /* Init TX ring. */ ti_init_tx_ring(sc); /* Tell firmware we're alive. */ TI_DO_CMD(TI_CMD_HOST_STATE, TI_CMD_CODE_STACK_UP, 0); /* Enable host interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->ti_watchdog, hz, ti_watchdog, sc); /* * Make sure to set media properly. We have to do this * here since we have to issue commands in order to set * the link negotiation and we can't issue commands until * the firmware is running. */ ifm = &sc->ifmedia; tmp = ifm->ifm_media; ifm->ifm_media = ifm->ifm_cur->ifm_media; ti_ifmedia_upd_locked(sc); ifm->ifm_media = tmp; } /* * Set media options. */ static int ti_ifmedia_upd(struct ifnet *ifp) { struct ti_softc *sc; int error; sc = ifp->if_softc; TI_LOCK(sc); error = ti_ifmedia_upd_locked(sc); TI_UNLOCK(sc); return (error); } static int ti_ifmedia_upd_locked(struct ti_softc *sc) { struct ifmedia *ifm; struct ti_cmd_desc cmd; uint32_t flowctl; ifm = &sc->ifmedia; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); flowctl = 0; switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: /* * Transmit flow control doesn't work on the Tigon 1. */ flowctl = TI_GLNK_RX_FLOWCTL_Y; /* * Transmit flow control can also cause problems on the * Tigon 2, apparantly with both the copper and fiber * boards. The symptom is that the interface will just * hang. This was reproduced with Alteon 180 switches. */ #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_GLNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_GLINK, TI_GLNK_PREF|TI_GLNK_1000MB| TI_GLNK_FULL_DUPLEX| flowctl | TI_GLNK_AUTONEGENB|TI_GLNK_ENB); flowctl = TI_LNK_RX_FLOWCTL_Y; #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_LNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_LINK, TI_LNK_100MB|TI_LNK_10MB| TI_LNK_FULL_DUPLEX|TI_LNK_HALF_DUPLEX| flowctl | TI_LNK_AUTONEGENB|TI_LNK_ENB); TI_DO_CMD(TI_CMD_LINK_NEGOTIATION, TI_CMD_CODE_NEGOTIATE_BOTH, 0); break; case IFM_1000_SX: case IFM_1000_T: flowctl = TI_GLNK_RX_FLOWCTL_Y; #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_GLNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_GLINK, TI_GLNK_PREF|TI_GLNK_1000MB| flowctl |TI_GLNK_ENB); CSR_WRITE_4(sc, TI_GCR_LINK, 0); if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { TI_SETBIT(sc, TI_GCR_GLINK, TI_GLNK_FULL_DUPLEX); } TI_DO_CMD(TI_CMD_LINK_NEGOTIATION, TI_CMD_CODE_NEGOTIATE_GIGABIT, 0); break; case IFM_100_FX: case IFM_10_FL: case IFM_100_TX: case IFM_10_T: flowctl = TI_LNK_RX_FLOWCTL_Y; #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_LNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_GLINK, 0); CSR_WRITE_4(sc, TI_GCR_LINK, TI_LNK_ENB|TI_LNK_PREF|flowctl); if (IFM_SUBTYPE(ifm->ifm_media) == IFM_100_FX || IFM_SUBTYPE(ifm->ifm_media) == IFM_100_TX) { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_100MB); } else { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_10MB); } if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_FULL_DUPLEX); } else { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_HALF_DUPLEX); } TI_DO_CMD(TI_CMD_LINK_NEGOTIATION, TI_CMD_CODE_NEGOTIATE_10_100, 0); break; } return (0); } /* * Report current media status. */ static void ti_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct ti_softc *sc; uint32_t media = 0; sc = ifp->if_softc; TI_LOCK(sc); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->ti_linkstat == TI_EV_CODE_LINK_DOWN) { TI_UNLOCK(sc); return; } ifmr->ifm_status |= IFM_ACTIVE; if (sc->ti_linkstat == TI_EV_CODE_GIG_LINK_UP) { media = CSR_READ_4(sc, TI_GCR_GLINK_STAT); if (sc->ti_copper) ifmr->ifm_active |= IFM_1000_T; else ifmr->ifm_active |= IFM_1000_SX; if (media & TI_GLNK_FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } else if (sc->ti_linkstat == TI_EV_CODE_LINK_UP) { media = CSR_READ_4(sc, TI_GCR_LINK_STAT); if (sc->ti_copper) { if (media & TI_LNK_100MB) ifmr->ifm_active |= IFM_100_TX; if (media & TI_LNK_10MB) ifmr->ifm_active |= IFM_10_T; } else { if (media & TI_LNK_100MB) ifmr->ifm_active |= IFM_100_FX; if (media & TI_LNK_10MB) ifmr->ifm_active |= IFM_10_FL; } if (media & TI_LNK_FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; if (media & TI_LNK_HALF_DUPLEX) ifmr->ifm_active |= IFM_HDX; } TI_UNLOCK(sc); } static int ti_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ti_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct ti_cmd_desc cmd; int mask, error = 0; switch (command) { case SIOCSIFMTU: TI_LOCK(sc); if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TI_JUMBO_MTU) error = EINVAL; else { ifp->if_mtu = ifr->ifr_mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ti_init_locked(sc); } } TI_UNLOCK(sc); break; case SIOCSIFFLAGS: TI_LOCK(sc); if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->ti_if_flags & IFF_PROMISC)) { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_ENB, 0); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->ti_if_flags & IFF_PROMISC) { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_DIS, 0); } else ti_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ti_stop(sc); } } sc->ti_if_flags = ifp->if_flags; TI_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: TI_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) ti_setmulti(sc); TI_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCSIFCAP: TI_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= TI_CSUM_FEATURES; else ifp->if_hwassist &= ~TI_CSUM_FEATURES; } if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) ifp->if_capenable ^= IFCAP_RXCSUM; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if ((mask & IFCAP_VLAN_HWCSUM) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; if ((mask & (IFCAP_TXCSUM | IFCAP_RXCSUM | IFCAP_VLAN_HWTAGGING)) != 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ti_init_locked(sc); } } TI_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int ti_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct ti_softc *sc; sc = dev->si_drv1; if (sc == NULL) return (ENODEV); TI_LOCK(sc); sc->ti_flags |= TI_FLAG_DEBUGING; TI_UNLOCK(sc); return (0); } static int ti_close(struct cdev *dev, int flag, int fmt, struct thread *td) { struct ti_softc *sc; sc = dev->si_drv1; if (sc == NULL) return (ENODEV); TI_LOCK(sc); sc->ti_flags &= ~TI_FLAG_DEBUGING; TI_UNLOCK(sc); return (0); } /* * This ioctl routine goes along with the Tigon character device. */ static int ti_ioctl2(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ti_softc *sc; int error; sc = dev->si_drv1; if (sc == NULL) return (ENODEV); error = 0; switch (cmd) { case TIIOCGETSTATS: { struct ti_stats *outstats; outstats = (struct ti_stats *)addr; TI_LOCK(sc); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_POSTREAD); bcopy(&sc->ti_rdata.ti_info->ti_stats, outstats, sizeof(struct ti_stats)); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD); TI_UNLOCK(sc); break; } case TIIOCGETPARAMS: { struct ti_params *params; params = (struct ti_params *)addr; TI_LOCK(sc); params->ti_stat_ticks = sc->ti_stat_ticks; params->ti_rx_coal_ticks = sc->ti_rx_coal_ticks; params->ti_tx_coal_ticks = sc->ti_tx_coal_ticks; params->ti_rx_max_coal_bds = sc->ti_rx_max_coal_bds; params->ti_tx_max_coal_bds = sc->ti_tx_max_coal_bds; params->ti_tx_buf_ratio = sc->ti_tx_buf_ratio; params->param_mask = TI_PARAM_ALL; TI_UNLOCK(sc); break; } case TIIOCSETPARAMS: { struct ti_params *params; params = (struct ti_params *)addr; TI_LOCK(sc); if (params->param_mask & TI_PARAM_STAT_TICKS) { sc->ti_stat_ticks = params->ti_stat_ticks; CSR_WRITE_4(sc, TI_GCR_STAT_TICKS, sc->ti_stat_ticks); } if (params->param_mask & TI_PARAM_RX_COAL_TICKS) { sc->ti_rx_coal_ticks = params->ti_rx_coal_ticks; CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS, sc->ti_rx_coal_ticks); } if (params->param_mask & TI_PARAM_TX_COAL_TICKS) { sc->ti_tx_coal_ticks = params->ti_tx_coal_ticks; CSR_WRITE_4(sc, TI_GCR_TX_COAL_TICKS, sc->ti_tx_coal_ticks); } if (params->param_mask & TI_PARAM_RX_COAL_BDS) { sc->ti_rx_max_coal_bds = params->ti_rx_max_coal_bds; CSR_WRITE_4(sc, TI_GCR_RX_MAX_COAL_BD, sc->ti_rx_max_coal_bds); } if (params->param_mask & TI_PARAM_TX_COAL_BDS) { sc->ti_tx_max_coal_bds = params->ti_tx_max_coal_bds; CSR_WRITE_4(sc, TI_GCR_TX_MAX_COAL_BD, sc->ti_tx_max_coal_bds); } if (params->param_mask & TI_PARAM_TX_BUF_RATIO) { sc->ti_tx_buf_ratio = params->ti_tx_buf_ratio; CSR_WRITE_4(sc, TI_GCR_TX_BUFFER_RATIO, sc->ti_tx_buf_ratio); } TI_UNLOCK(sc); break; } case TIIOCSETTRACE: { ti_trace_type trace_type; trace_type = *(ti_trace_type *)addr; /* * Set tracing to whatever the user asked for. Setting * this register to 0 should have the effect of disabling * tracing. */ TI_LOCK(sc); CSR_WRITE_4(sc, TI_GCR_NIC_TRACING, trace_type); TI_UNLOCK(sc); break; } case TIIOCGETTRACE: { struct ti_trace_buf *trace_buf; uint32_t trace_start, cur_trace_ptr, trace_len; trace_buf = (struct ti_trace_buf *)addr; TI_LOCK(sc); trace_start = CSR_READ_4(sc, TI_GCR_NICTRACE_START); cur_trace_ptr = CSR_READ_4(sc, TI_GCR_NICTRACE_PTR); trace_len = CSR_READ_4(sc, TI_GCR_NICTRACE_LEN); #if 0 if_printf(sc->ti_ifp, "trace_start = %#x, cur_trace_ptr = %#x, " "trace_len = %d\n", trace_start, cur_trace_ptr, trace_len); if_printf(sc->ti_ifp, "trace_buf->buf_len = %d\n", trace_buf->buf_len); #endif error = ti_copy_mem(sc, trace_start, min(trace_len, trace_buf->buf_len), (caddr_t)trace_buf->buf, 1, 1); if (error == 0) { trace_buf->fill_len = min(trace_len, trace_buf->buf_len); if (cur_trace_ptr < trace_start) trace_buf->cur_trace_ptr = trace_start - cur_trace_ptr; else trace_buf->cur_trace_ptr = cur_trace_ptr - trace_start; } else trace_buf->fill_len = 0; TI_UNLOCK(sc); break; } /* * For debugging, five ioctls are needed: * ALT_ATTACH * ALT_READ_TG_REG * ALT_WRITE_TG_REG * ALT_READ_TG_MEM * ALT_WRITE_TG_MEM */ case ALT_ATTACH: /* * From what I can tell, Alteon's Solaris Tigon driver * only has one character device, so you have to attach * to the Tigon board you're interested in. This seems * like a not-so-good way to do things, since unless you * subsequently specify the unit number of the device * you're interested in every ioctl, you'll only be * able to debug one board at a time. */ break; case ALT_READ_TG_MEM: case ALT_WRITE_TG_MEM: { struct tg_mem *mem_param; uint32_t sram_end, scratch_end; mem_param = (struct tg_mem *)addr; if (sc->ti_hwrev == TI_HWREV_TIGON) { sram_end = TI_END_SRAM_I; scratch_end = TI_END_SCRATCH_I; } else { sram_end = TI_END_SRAM_II; scratch_end = TI_END_SCRATCH_II; } /* * For now, we'll only handle accessing regular SRAM, * nothing else. */ TI_LOCK(sc); if (mem_param->tgAddr >= TI_BEG_SRAM && mem_param->tgAddr + mem_param->len <= sram_end) { /* * In this instance, we always copy to/from user * space, so the user space argument is set to 1. */ error = ti_copy_mem(sc, mem_param->tgAddr, mem_param->len, mem_param->userAddr, 1, cmd == ALT_READ_TG_MEM ? 1 : 0); } else if (mem_param->tgAddr >= TI_BEG_SCRATCH && mem_param->tgAddr <= scratch_end) { error = ti_copy_scratch(sc, mem_param->tgAddr, mem_param->len, mem_param->userAddr, 1, cmd == ALT_READ_TG_MEM ? 1 : 0, TI_PROCESSOR_A); } else if (mem_param->tgAddr >= TI_BEG_SCRATCH_B_DEBUG && mem_param->tgAddr <= TI_BEG_SCRATCH_B_DEBUG) { if (sc->ti_hwrev == TI_HWREV_TIGON) { if_printf(sc->ti_ifp, "invalid memory range for Tigon I\n"); error = EINVAL; break; } error = ti_copy_scratch(sc, mem_param->tgAddr - TI_SCRATCH_DEBUG_OFF, mem_param->len, mem_param->userAddr, 1, cmd == ALT_READ_TG_MEM ? 1 : 0, TI_PROCESSOR_B); } else { if_printf(sc->ti_ifp, "memory address %#x len %d is " "out of supported range\n", mem_param->tgAddr, mem_param->len); error = EINVAL; } TI_UNLOCK(sc); break; } case ALT_READ_TG_REG: case ALT_WRITE_TG_REG: { struct tg_reg *regs; uint32_t tmpval; regs = (struct tg_reg *)addr; /* * Make sure the address in question isn't out of range. */ if (regs->addr > TI_REG_MAX) { error = EINVAL; break; } TI_LOCK(sc); if (cmd == ALT_READ_TG_REG) { bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, regs->addr, &tmpval, 1); regs->data = ntohl(tmpval); #if 0 if ((regs->addr == TI_CPU_STATE) || (regs->addr == TI_CPU_CTL_B)) { if_printf(sc->ti_ifp, "register %#x = %#x\n", regs->addr, tmpval); } #endif } else { tmpval = htonl(regs->data); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, regs->addr, &tmpval, 1); } TI_UNLOCK(sc); break; } default: error = ENOTTY; break; } return (error); } static void ti_watchdog(void *arg) { struct ti_softc *sc; struct ifnet *ifp; sc = arg; TI_LOCK_ASSERT(sc); callout_reset(&sc->ti_watchdog, hz, ti_watchdog, sc); if (sc->ti_timer == 0 || --sc->ti_timer > 0) return; /* * When we're debugging, the chip is often stopped for long periods * of time, and that would normally cause the watchdog timer to fire. * Since that impedes debugging, we don't want to do that. */ if (sc->ti_flags & TI_FLAG_DEBUGING) return; ifp = sc->ti_ifp; if_printf(ifp, "watchdog timeout -- resetting\n"); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ti_init_locked(sc); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void ti_stop(struct ti_softc *sc) { struct ifnet *ifp; struct ti_cmd_desc cmd; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; /* Disable host interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); /* * Tell firmware we're shutting down. */ TI_DO_CMD(TI_CMD_HOST_STATE, TI_CMD_CODE_STACK_DOWN, 0); /* Halt and reinitialize. */ if (ti_chipinit(sc) == 0) { ti_mem_zero(sc, 0x2000, 0x100000 - 0x2000); /* XXX ignore init errors. */ ti_chipinit(sc); } /* Free the RX lists. */ ti_free_rx_ring_std(sc); /* Free jumbo RX list. */ ti_free_rx_ring_jumbo(sc); /* Free mini RX list. */ ti_free_rx_ring_mini(sc); /* Free TX buffers. */ ti_free_tx_ring(sc); sc->ti_ev_prodidx.ti_idx = 0; sc->ti_return_prodidx.ti_idx = 0; sc->ti_tx_considx.ti_idx = 0; sc->ti_tx_saved_considx = TI_TXCONS_UNSET; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->ti_watchdog); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int ti_shutdown(device_t dev) { struct ti_softc *sc; sc = device_get_softc(dev); TI_LOCK(sc); ti_chipinit(sc); TI_UNLOCK(sc); return (0); } static void ti_sysctl_node(struct ti_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child; char tname[32]; ctx = device_get_sysctl_ctx(sc->ti_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ti_dev)); /* Use DAC */ sc->ti_dac = 1; snprintf(tname, sizeof(tname), "dev.ti.%d.dac", device_get_unit(sc->ti_dev)); TUNABLE_INT_FETCH(tname, &sc->ti_dac); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_coal_ticks", CTLFLAG_RW, &sc->ti_rx_coal_ticks, 0, "Receive coalcesced ticks"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_max_coal_bds", CTLFLAG_RW, &sc->ti_rx_max_coal_bds, 0, "Receive max coalcesced BDs"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_coal_ticks", CTLFLAG_RW, &sc->ti_tx_coal_ticks, 0, "Send coalcesced ticks"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_max_coal_bds", CTLFLAG_RW, &sc->ti_tx_max_coal_bds, 0, "Send max coalcesced BDs"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_buf_ratio", CTLFLAG_RW, &sc->ti_tx_buf_ratio, 0, "Ratio of NIC memory devoted to TX buffer"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "stat_ticks", CTLFLAG_RW, &sc->ti_stat_ticks, 0, "Number of clock ticks for statistics update interval"); /* Pull in device tunables. */ sc->ti_rx_coal_ticks = 170; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "rx_coal_ticks", &sc->ti_rx_coal_ticks); sc->ti_rx_max_coal_bds = 64; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "rx_max_coal_bds", &sc->ti_rx_max_coal_bds); sc->ti_tx_coal_ticks = TI_TICKS_PER_SEC / 500; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "tx_coal_ticks", &sc->ti_tx_coal_ticks); sc->ti_tx_max_coal_bds = 32; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "tx_max_coal_bds", &sc->ti_tx_max_coal_bds); sc->ti_tx_buf_ratio = 21; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "tx_buf_ratio", &sc->ti_tx_buf_ratio); sc->ti_stat_ticks = 2 * TI_TICKS_PER_SEC; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "stat_ticks", &sc->ti_stat_ticks); } Index: projects/clang380-import/sys/dev/vt/hw/ofwfb/ofwfb.c =================================================================== --- projects/clang380-import/sys/dev/vt/hw/ofwfb/ofwfb.c (revision 293686) +++ projects/clang380-import/sys/dev/vt/hw/ofwfb/ofwfb.c (revision 293687) @@ -1,481 +1,502 @@ /*- * Copyright (c) 2011 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #ifdef __sparc64__ #include #endif #include #include #include struct ofwfb_softc { struct fb_info fb; phandle_t sc_node; ihandle_t sc_handle; bus_space_tag_t sc_memt; int iso_palette; }; +static void ofwfb_initialize(struct vt_device *vd); static vd_probe_t ofwfb_probe; static vd_init_t ofwfb_init; static vd_bitblt_text_t ofwfb_bitblt_text; static vd_bitblt_bmp_t ofwfb_bitblt_bitmap; static const struct vt_driver vt_ofwfb_driver = { .vd_name = "ofwfb", .vd_probe = ofwfb_probe, .vd_init = ofwfb_init, .vd_blank = vt_fb_blank, .vd_bitblt_text = ofwfb_bitblt_text, .vd_bitblt_bmp = ofwfb_bitblt_bitmap, .vd_fb_ioctl = vt_fb_ioctl, .vd_fb_mmap = vt_fb_mmap, .vd_priority = VD_PRIORITY_GENERIC+1, }; static unsigned char ofw_colors[16] = { /* See "16-color Text Extension" Open Firmware document, page 4 */ 0, 4, 2, 6, 1, 5, 3, 7, 8, 12, 10, 14, 9, 13, 11, 15 }; static struct ofwfb_softc ofwfb_conssoftc; VT_DRIVER_DECLARE(vt_ofwfb, vt_ofwfb_driver); static int ofwfb_probe(struct vt_device *vd) { phandle_t chosen, node; ihandle_t stdout; char type[64]; chosen = OF_finddevice("/chosen"); OF_getprop(chosen, "stdout", &stdout, sizeof(stdout)); node = OF_instance_to_package(stdout); if (node == -1) { /* * The "/chosen/stdout" does not exist try * using "screen" directly. */ node = OF_finddevice("screen"); } OF_getprop(node, "device_type", type, sizeof(type)); if (strcmp(type, "display") != 0) return (CN_DEAD); /* Looks OK... */ return (CN_INTERNAL); } static void ofwfb_bitblt_bitmap(struct vt_device *vd, const struct vt_window *vw, const uint8_t *pattern, const uint8_t *mask, unsigned int width, unsigned int height, unsigned int x, unsigned int y, term_color_t fg, term_color_t bg) { struct fb_info *sc = vd->vd_softc; u_long line; uint32_t fgc, bgc; int c, l; uint8_t b, m; union { uint32_t l; uint8_t c[4]; } ch1, ch2; +#ifdef __powerpc__ + /* Deal with unmapped framebuffers */ + if (sc->fb_flags & FB_FLAG_NOWRITE) { + if (pmap_bootstrapped) { + sc->fb_flags &= ~FB_FLAG_NOWRITE; + ofwfb_initialize(vd); + } else { + return; + } + } +#endif + fgc = sc->fb_cmap[fg]; bgc = sc->fb_cmap[bg]; b = m = 0; if (((struct ofwfb_softc *)vd->vd_softc)->iso_palette) { fg = ofw_colors[fg]; bg = ofw_colors[bg]; } line = (sc->fb_stride * y) + x * sc->fb_bpp/8; if (mask == NULL && sc->fb_bpp == 8 && (width % 8 == 0)) { /* Don't try to put off screen pixels */ if (((x + width) > vd->vd_width) || ((y + height) > vd->vd_height)) return; for (; height > 0; height--) { for (c = 0; c < width; c += 8) { b = *pattern++; /* * Assume that there is more background than * foreground in characters and init accordingly */ ch1.l = ch2.l = (bg << 24) | (bg << 16) | (bg << 8) | bg; /* * Calculate 2 x 4-chars at a time, and then * write these out. */ if (b & 0x80) ch1.c[0] = fg; if (b & 0x40) ch1.c[1] = fg; if (b & 0x20) ch1.c[2] = fg; if (b & 0x10) ch1.c[3] = fg; if (b & 0x08) ch2.c[0] = fg; if (b & 0x04) ch2.c[1] = fg; if (b & 0x02) ch2.c[2] = fg; if (b & 0x01) ch2.c[3] = fg; *(uint32_t *)(sc->fb_vbase + line + c) = ch1.l; *(uint32_t *)(sc->fb_vbase + line + c + 4) = ch2.l; } line += sc->fb_stride; } } else { for (l = 0; l < height && y + l < vw->vw_draw_area.tr_end.tp_row; l++) { for (c = 0; c < width && x + c < vw->vw_draw_area.tr_end.tp_col; c++) { if (c % 8 == 0) b = *pattern++; else b <<= 1; if (mask != NULL) { if (c % 8 == 0) m = *mask++; else m <<= 1; /* Skip pixel write, if mask not set. */ if ((m & 0x80) == 0) continue; } switch(sc->fb_bpp) { case 8: *(uint8_t *)(sc->fb_vbase + line + c) = b & 0x80 ? fg : bg; break; case 32: *(uint32_t *)(sc->fb_vbase + line + 4*c) = (b & 0x80) ? fgc : bgc; break; default: /* panic? */ break; } } line += sc->fb_stride; } } } void ofwfb_bitblt_text(struct vt_device *vd, const struct vt_window *vw, const term_rect_t *area) { unsigned int col, row, x, y; struct vt_font *vf; term_char_t c; term_color_t fg, bg; const uint8_t *pattern; vf = vw->vw_font; for (row = area->tr_begin.tp_row; row < area->tr_end.tp_row; ++row) { for (col = area->tr_begin.tp_col; col < area->tr_end.tp_col; ++col) { x = col * vf->vf_width + vw->vw_draw_area.tr_begin.tp_col; y = row * vf->vf_height + vw->vw_draw_area.tr_begin.tp_row; c = VTBUF_GET_FIELD(&vw->vw_buf, row, col); pattern = vtfont_lookup(vf, c); vt_determine_colors(c, VTBUF_ISCURSOR(&vw->vw_buf, row, col), &fg, &bg); ofwfb_bitblt_bitmap(vd, vw, pattern, NULL, vf->vf_width, vf->vf_height, x, y, fg, bg); } } #ifndef SC_NO_CUTPASTE if (!vd->vd_mshown) return; term_rect_t drawn_area; drawn_area.tr_begin.tp_col = area->tr_begin.tp_col * vf->vf_width; drawn_area.tr_begin.tp_row = area->tr_begin.tp_row * vf->vf_height; drawn_area.tr_end.tp_col = area->tr_end.tp_col * vf->vf_width; drawn_area.tr_end.tp_row = area->tr_end.tp_row * vf->vf_height; if (vt_is_cursor_in_area(vd, &drawn_area)) { ofwfb_bitblt_bitmap(vd, vw, vd->vd_mcursor->map, vd->vd_mcursor->mask, vd->vd_mcursor->width, vd->vd_mcursor->height, vd->vd_mx_drawn + vw->vw_draw_area.tr_begin.tp_col, vd->vd_my_drawn + vw->vw_draw_area.tr_begin.tp_row, vd->vd_mcursor_fg, vd->vd_mcursor_bg); } #endif } static void ofwfb_initialize(struct vt_device *vd) { struct ofwfb_softc *sc = vd->vd_softc; int i, err; cell_t retval; uint32_t oldpix; + sc->fb.fb_cmsize = 16; + + if (sc->fb.fb_flags & FB_FLAG_NOWRITE) + return; + /* * Set up the color map */ sc->iso_palette = 0; switch (sc->fb.fb_bpp) { case 8: vt_generate_cons_palette(sc->fb.fb_cmap, COLOR_FORMAT_RGB, 255, 16, 255, 8, 255, 0); for (i = 0; i < 16; i++) { err = OF_call_method("color!", sc->sc_handle, 4, 1, (cell_t)((sc->fb.fb_cmap[i] >> 16) & 0xff), (cell_t)((sc->fb.fb_cmap[i] >> 8) & 0xff), (cell_t)((sc->fb.fb_cmap[i] >> 0) & 0xff), (cell_t)i, &retval); if (err) break; } if (i != 16) sc->iso_palette = 1; break; case 32: /* * We bypass the usual bus_space_() accessors here, mostly * for performance reasons. In particular, we don't want * any barrier operations that may be performed and handle * endianness slightly different. Figure out the host-view * endianness of the frame buffer. */ oldpix = bus_space_read_4(sc->sc_memt, sc->fb.fb_vbase, 0); bus_space_write_4(sc->sc_memt, sc->fb.fb_vbase, 0, 0xff000000); if (*(uint8_t *)(sc->fb.fb_vbase) == 0xff) vt_generate_cons_palette(sc->fb.fb_cmap, COLOR_FORMAT_RGB, 255, 0, 255, 8, 255, 16); else vt_generate_cons_palette(sc->fb.fb_cmap, COLOR_FORMAT_RGB, 255, 16, 255, 8, 255, 0); bus_space_write_4(sc->sc_memt, sc->fb.fb_vbase, 0, oldpix); break; default: panic("Unknown color space depth %d", sc->fb.fb_bpp); break; } - - sc->fb.fb_cmsize = 16; } static int ofwfb_init(struct vt_device *vd) { struct ofwfb_softc *sc; char type[64]; phandle_t chosen; phandle_t node; uint32_t depth, height, width, stride; uint32_t fb_phys; int i, len; #ifdef __sparc64__ static struct bus_space_tag ofwfb_memt[1]; bus_addr_t phys; int space; #endif /* Initialize softc */ vd->vd_softc = sc = &ofwfb_conssoftc; chosen = OF_finddevice("/chosen"); OF_getprop(chosen, "stdout", &sc->sc_handle, sizeof(ihandle_t)); node = OF_instance_to_package(sc->sc_handle); if (node == -1) { /* * The "/chosen/stdout" does not exist try * using "screen" directly. */ node = OF_finddevice("screen"); sc->sc_handle = OF_open("screen"); } OF_getprop(node, "device_type", type, sizeof(type)); if (strcmp(type, "display") != 0) return (CN_DEAD); /* Keep track of the OF node */ sc->sc_node = node; /* * Try to use a 32-bit framebuffer if possible. This may be * unimplemented and fail. That's fine -- it just means we are * stuck with the defaults. */ OF_call_method("set-depth", sc->sc_handle, 1, 1, (cell_t)32, &i); /* Make sure we have needed properties */ if (OF_getproplen(node, "height") != sizeof(height) || OF_getproplen(node, "width") != sizeof(width) || OF_getproplen(node, "depth") != sizeof(depth) || OF_getproplen(node, "linebytes") != sizeof(sc->fb.fb_stride)) return (CN_DEAD); /* Only support 8 and 32-bit framebuffers */ OF_getprop(node, "depth", &depth, sizeof(depth)); if (depth != 8 && depth != 32) return (CN_DEAD); sc->fb.fb_bpp = sc->fb.fb_depth = depth; OF_getprop(node, "height", &height, sizeof(height)); OF_getprop(node, "width", &width, sizeof(width)); OF_getprop(node, "linebytes", &stride, sizeof(stride)); sc->fb.fb_height = height; sc->fb.fb_width = width; sc->fb.fb_stride = stride; sc->fb.fb_size = sc->fb.fb_height * sc->fb.fb_stride; /* * Grab the physical address of the framebuffer, and then map it * into our memory space. If the MMU is not yet up, it will be * remapped for us when relocation turns on. */ if (OF_getproplen(node, "address") == sizeof(fb_phys)) { /* XXX We assume #address-cells is 1 at this point. */ OF_getprop(node, "address", &fb_phys, sizeof(fb_phys)); #if defined(__powerpc__) sc->sc_memt = &bs_be_tag; bus_space_map(sc->sc_memt, fb_phys, sc->fb.fb_size, BUS_SPACE_MAP_PREFETCHABLE, &sc->fb.fb_vbase); #elif defined(__sparc64__) OF_decode_addr(node, 0, &space, &phys); sc->sc_memt = &ofwfb_memt[0]; sc->fb.fb_vbase = sparc64_fake_bustag(space, fb_phys, sc->sc_memt); #elif defined(__arm__) sc->sc_memt = fdtbus_bs_tag; bus_space_map(sc->sc_memt, sc->fb.fb_pbase, sc->fb.fb_size, BUS_SPACE_MAP_PREFETCHABLE, (bus_space_handle_t *)&sc->fb.fb_vbase); #else #error Unsupported platform! #endif sc->fb.fb_pbase = fb_phys; } else { /* * Some IBM systems don't have an address property. Try to * guess the framebuffer region from the assigned addresses. * This is ugly, but there doesn't seem to be an alternative. * Linux does the same thing. */ struct ofw_pci_register pciaddrs[8]; int num_pciaddrs = 0; /* * Get the PCI addresses of the adapter, if present. The node * may be the child of the PCI device: in that case, try the * parent for the assigned-addresses property. */ len = OF_getprop(node, "assigned-addresses", pciaddrs, sizeof(pciaddrs)); if (len == -1) { len = OF_getprop(OF_parent(node), "assigned-addresses", pciaddrs, sizeof(pciaddrs)); } if (len == -1) len = 0; num_pciaddrs = len / sizeof(struct ofw_pci_register); fb_phys = num_pciaddrs; for (i = 0; i < num_pciaddrs; i++) { /* If it is too small, not the framebuffer */ if (pciaddrs[i].size_lo < sc->fb.fb_stride * height) continue; /* If it is not memory, it isn't either */ if (!(pciaddrs[i].phys_hi & OFW_PCI_PHYS_HI_SPACE_MEM32)) continue; /* This could be the framebuffer */ fb_phys = i; /* If it is prefetchable, it certainly is */ if (pciaddrs[i].phys_hi & OFW_PCI_PHYS_HI_PREFETCHABLE) break; } if (fb_phys == num_pciaddrs) /* No candidates found */ return (CN_DEAD); #if defined(__powerpc__) OF_decode_addr(node, fb_phys, &sc->sc_memt, &sc->fb.fb_vbase); sc->fb.fb_pbase = sc->fb.fb_vbase; /* 1:1 mapped */ + #ifdef __powerpc64__ + /* Real mode under a hypervisor probably doesn't cover FB */ + if (!(mfmsr() & (PSL_HV | PSL_DR))) + sc->fb.fb_flags |= FB_FLAG_NOWRITE; + #endif #else /* No ability to interpret assigned-addresses otherwise */ return (CN_DEAD); #endif } ofwfb_initialize(vd); vt_fb_init(vd); return (CN_INTERNAL); } Index: projects/clang380-import/sys/fs/ext2fs/ext2_bmap.c =================================================================== --- projects/clang380-import/sys/fs/ext2fs/ext2_bmap.c (revision 293686) +++ projects/clang380-import/sys/fs/ext2fs/ext2_bmap.c (revision 293687) @@ -1,369 +1,376 @@ /*- * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *); /* * Bmap converts the logical block number of a file to its physical block * number on the disk. The conversion is done by using the logical block * number to index into the array of block pointers described by the dinode. */ int ext2_bmap(struct vop_bmap_args *ap) { daddr_t blkno; int error; /* * Check for underlying vnode requests and ensure that logical * to physical mapping is requested. */ if (ap->a_bop != NULL) *ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj; if (ap->a_bnp == NULL) return (0); if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS) error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno, ap->a_runp, ap->a_runb); else error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno, ap->a_runp, ap->a_runb); *ap->a_bnp = blkno; return (error); } /* * This function converts the logical block number of a file to * its physical block number on the disk within ext4 extents. */ static int ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) { struct inode *ip; struct m_ext2fs *fs; struct ext4_extent *ep; struct ext4_extent_path path = { .ep_bp = NULL }; daddr_t lbn; int ret = 0; ip = VTOI(vp); fs = ip->i_e2fs; lbn = bn; - /* - * TODO: need to implement read ahead to improve the performance. - */ if (runp != NULL) *runp = 0; if (runb != NULL) *runb = 0; ext4_ext_find_extent(fs, ip, lbn, &path); - ep = path.ep_ext; - if (ep == NULL) - ret = EIO; - else { - *bnp = fsbtodb(fs, lbn - ep->e_blk + - (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); + if (path.ep_is_sparse) { + *bnp = -1; + if (runp != NULL) + *runp = path.ep_sparse_ext.e_len - + (lbn - path.ep_sparse_ext.e_blk) - 1; + } else { + ep = path.ep_ext; + if (ep == NULL) + ret = EIO; + else { + *bnp = fsbtodb(fs, lbn - ep->e_blk + + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); - if (*bnp == 0) - *bnp = -1; + if (*bnp == 0) + *bnp = -1; + + if (runp != NULL) + *runp = ep->e_len - (lbn - ep->e_blk) - 1; + } } if (path.ep_bp != NULL) { brelse(path.ep_bp); path.ep_bp = NULL; } return (ret); } /* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ext2_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ext2_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, int *runp, int *runb) { struct inode *ip; struct buf *bp; struct ext2mount *ump; struct mount *mp; struct indir a[NIADDR+1], *ap; daddr_t daddr; e2fs_lbn_t metalbn; int error, num, maxrun = 0, bsize; int *nump; ap = NULL; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOEXT2(mp); bsize = EXT2_BLOCK_SIZE(ump->um_e2fs); if (runp) { maxrun = mp->mnt_iosize_max / bsize - 1; *runp = 0; } if (runb) { *runb = 0; } ap = a; nump = # error = ext2_getlbns(vp, bn, ap, nump); if (error) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); if (*bnp == 0) { *bnp = -1; } else if (runp) { daddr_t bnb = bn; for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->i_db[bn], ip->i_db[bn + 1]); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->i_ib[ap->in_off]; for (bp = NULL, ++ap; --num; ++ap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = ap->in_lbn; if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); bp = getblk(vp, metalbn, bsize, 0, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { #ifdef INVARIANTS if (!daddr) panic("ext2_bmaparray: indirect block not in cache"); #endif bp->b_blkno = blkptrtodb(ump, daddr); bp->b_iocmd = BIO_READ; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; vfs_busy_pages(bp, 0); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); curthread->td_ru.ru_inblock++; error = bufwait(bp); if (error) { brelse(bp); return (error); } } daddr = ((e2fs_daddr_t *)bp->b_data)[ap->in_off]; if (num == 1 && daddr && runp) { for (bn = ap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((e2fs_daddr_t *)bp->b_data)[bn - 1], ((e2fs_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = ap->in_off; if (runb && bn) { for (--bn; bn >= 0 && *runb < maxrun && is_sequential(ump, ((e2fs_daddr_t *)bp->b_data)[bn], ((e2fs_daddr_t *)bp->b_data)[bn + 1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); /* * Since this is FFS independent code, we are out of scope for the * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they * will fall in the range 1..um_seqinc, so we use that test and * return a request for a zeroed out buffer if attempts are made * to read a BLK_NOCOPY or BLK_SNAP block. */ if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ *bnp = -1; return (0); } *bnp = blkptrtodb(ump, daddr); if (*bnp == 0) { *bnp = -1; } return (0); } /* * Create an array of logical block number/offset pairs which represent the * path of indirect blocks required to access a data block. The first "pair" * contains the logical block number of the appropriate single, double or * triple indirect block and the offset into the inode indirect block array. * Note, the logical block number of the inode single/double/triple indirect * block appears twice in the array, once with the offset into the i_ib and * once with the offset into the page itself. */ int ext2_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump) { long blockcnt; e2fs_lbn_t metalbn, realbn; struct ext2mount *ump; int i, numlevels, off; int64_t qblockcnt; ump = VFSTOEXT2(vp->v_mount); if (nump) *nump = 0; numlevels = 0; realbn = bn; if ((long)bn < 0) bn = -(long)bn; /* The first NDADDR blocks are direct blocks. */ if (bn < NDADDR) return (0); /* * Determine the number of levels of indirection. After this loop * is done, blockcnt indicates the number of data blocks possible * at the previous level of indirection, and NIADDR - i is the number * of levels of indirection needed to locate the requested block. */ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); /* * Use int64_t's here to avoid overflow for triple indirect * blocks when longs have 32 bits and the block size is more * than 4K. */ qblockcnt = (int64_t)blockcnt * MNINDIR(ump); if (bn < qblockcnt) break; blockcnt = qblockcnt; } /* Calculate the address of the first meta-block. */ if (realbn >= 0) metalbn = -(realbn - bn + NIADDR - i); else metalbn = -(-realbn - bn + NIADDR - i); /* * At each iteration, off is the offset into the bap array which is * an array of disk addresses at the current level of indirection. * The logical block number and the offset in that block are stored * into the argument array. */ ap->in_lbn = metalbn; ap->in_off = off = NIADDR - i; ap++; for (++numlevels; i <= NIADDR; i++) { /* If searching for a meta-data block, quit when found. */ if (metalbn == realbn) break; off = (bn / blockcnt) % MNINDIR(ump); ++numlevels; ap->in_lbn = metalbn; ap->in_off = off; ++ap; metalbn -= -1 + off * blockcnt; blockcnt /= MNINDIR(ump); } if (nump) *nump = numlevels; return (0); } Index: projects/clang380-import/sys/fs/ext2fs/ext2_extents.c =================================================================== --- projects/clang380-import/sys/fs/ext2fs/ext2_extents.c (revision 293686) +++ projects/clang380-import/sys/fs/ext2fs/ext2_extents.c (revision 293687) @@ -1,175 +1,195 @@ /*- * Copyright (c) 2010 Zheng Liu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void ext4_ext_binsearch_index(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn) { struct ext4_extent_header *ehp = path->ep_header; struct ext4_extent_index *l, *r, *m; l = (struct ext4_extent_index *)(char *)(ehp + 1); r = (struct ext4_extent_index *)(char *)(ehp + 1) + ehp->eh_ecount - 1; while (l <= r) { m = l + (r - l) / 2; if (lbn < m->ei_blk) r = m - 1; else l = m + 1; } path->ep_index = l - 1; } static void ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn) { struct ext4_extent_header *ehp = path->ep_header; - struct ext4_extent *l, *r, *m; + struct ext4_extent *first, *l, *r, *m; if (ehp->eh_ecount == 0) return; - l = (struct ext4_extent *)(char *)(ehp + 1); - r = (struct ext4_extent *)(char *)(ehp + 1) + ehp->eh_ecount - 1; + first = (struct ext4_extent *)(char *)(ehp + 1); + l = first; + r = first + ehp->eh_ecount - 1; while (l <= r) { m = l + (r - l) / 2; if (lbn < m->e_blk) r = m - 1; else l = m + 1; } + if (l == first) { + path->ep_sparse_ext.e_blk = lbn; + path->ep_sparse_ext.e_len = first->e_blk - lbn; + path->ep_sparse_ext.e_start_hi = 0; + path->ep_sparse_ext.e_start_lo = 0; + path->ep_is_sparse = 1; + return; + } path->ep_ext = l - 1; + if (path->ep_ext->e_blk + path->ep_ext->e_len <= lbn) { + path->ep_sparse_ext.e_blk = lbn; + if (l <= (first + ehp->eh_ecount - 1)) + path->ep_sparse_ext.e_len = l->e_blk - lbn; + else // XXX: where does it end? + path->ep_sparse_ext.e_len = 1; + path->ep_sparse_ext.e_start_hi = 0; + path->ep_sparse_ext.e_start_lo = 0; + path->ep_is_sparse = 1; + } } /* * Find a block in ext4 extent cache. */ int ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep) { struct ext4_extent_cache *ecp; int ret = EXT4_EXT_CACHE_NO; ecp = &ip->i_ext_cache; /* cache is invalid */ if (ecp->ec_type == EXT4_EXT_CACHE_NO) return (ret); if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) { ep->e_blk = ecp->ec_blk; ep->e_start_lo = ecp->ec_start & 0xffffffff; ep->e_start_hi = ecp->ec_start >> 32 & 0xffff; ep->e_len = ecp->ec_len; ret = ecp->ec_type; } return (ret); } /* * Put an ext4_extent structure in ext4 cache. */ void ext4_ext_put_cache(struct inode *ip, struct ext4_extent *ep, int type) { struct ext4_extent_cache *ecp; ecp = &ip->i_ext_cache; ecp->ec_type = type; ecp->ec_blk = ep->e_blk; ecp->ec_len = ep->e_len; ecp->ec_start = (daddr_t)ep->e_start_hi << 32 | ep->e_start_lo; } /* * Find an extent. */ struct ext4_extent_path * ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *ip, daddr_t lbn, struct ext4_extent_path *path) { struct ext4_extent_header *ehp; uint16_t i; int error, size; daddr_t nblk; ehp = (struct ext4_extent_header *)(char *)ip->i_db; if (ehp->eh_magic != EXT4_EXT_MAGIC) return (NULL); path->ep_header = ehp; for (i = ehp->eh_depth; i != 0; --i) { ext4_ext_binsearch_index(ip, path, lbn); path->ep_depth = 0; path->ep_ext = NULL; nblk = (daddr_t)path->ep_index->ei_leaf_hi << 32 | path->ep_index->ei_leaf_lo; size = blksize(fs, ip, nblk); if (path->ep_bp != NULL) { brelse(path->ep_bp); path->ep_bp = NULL; } error = bread(ip->i_devvp, fsbtodb(fs, nblk), size, NOCRED, &path->ep_bp); if (error) { brelse(path->ep_bp); path->ep_bp = NULL; return (NULL); } ehp = (struct ext4_extent_header *)path->ep_bp->b_data; path->ep_header = ehp; } path->ep_depth = i; path->ep_ext = NULL; path->ep_index = NULL; + path->ep_is_sparse = 0; ext4_ext_binsearch(ip, path, lbn); return (path); } Index: projects/clang380-import/sys/fs/ext2fs/ext2_extents.h =================================================================== --- projects/clang380-import/sys/fs/ext2fs/ext2_extents.h (revision 293686) +++ projects/clang380-import/sys/fs/ext2fs/ext2_extents.h (revision 293687) @@ -1,99 +1,103 @@ /*- * Copyright (c) 2012, 2010 Zheng Liu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_EXT2FS_EXT2_EXTENTS_H_ #define _FS_EXT2FS_EXT2_EXTENTS_H_ #include #define EXT4_EXT_MAGIC 0xf30a #define EXT4_EXT_CACHE_NO 0 #define EXT4_EXT_CACHE_GAP 1 #define EXT4_EXT_CACHE_IN 2 /* * Ext4 file system extent on disk. */ struct ext4_extent { uint32_t e_blk; /* first logical block */ uint16_t e_len; /* number of blocks */ uint16_t e_start_hi; /* high 16 bits of physical block */ uint32_t e_start_lo; /* low 32 bits of physical block */ }; /* * Extent index on disk. */ struct ext4_extent_index { uint32_t ei_blk; /* indexes logical blocks */ uint32_t ei_leaf_lo; /* points to physical block of the * next level */ uint16_t ei_leaf_hi; /* high 16 bits of physical block */ uint16_t ei_unused; }; /* * Extent tree header. */ struct ext4_extent_header { uint16_t eh_magic; /* magic number: 0xf30a */ uint16_t eh_ecount; /* number of valid entries */ uint16_t eh_max; /* capacity of store in entries */ uint16_t eh_depth; /* the depth of extent tree */ uint32_t eh_gen; /* generation of extent tree */ }; /* * Save cached extent. */ struct ext4_extent_cache { daddr_t ec_start; /* extent start */ uint32_t ec_blk; /* logical block */ uint32_t ec_len; uint32_t ec_type; }; /* * Save path to some extent. */ struct ext4_extent_path { uint16_t ep_depth; struct buf *ep_bp; - struct ext4_extent *ep_ext; + int ep_is_sparse; + union { + struct ext4_extent ep_sparse_ext; + struct ext4_extent *ep_ext; + }; struct ext4_extent_index *ep_index; struct ext4_extent_header *ep_header; }; struct inode; struct m_ext2fs; int ext4_ext_in_cache(struct inode *, daddr_t, struct ext4_extent *); void ext4_ext_put_cache(struct inode *, struct ext4_extent *, int); struct ext4_extent_path *ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *, daddr_t, struct ext4_extent_path *); #endif /* !_FS_EXT2FS_EXT2_EXTENTS_H_ */ Index: projects/clang380-import/sys/fs/ext2fs/ext2_vfsops.c =================================================================== --- projects/clang380-import/sys/fs/ext2fs/ext2_vfsops.c (revision 293686) +++ projects/clang380-import/sys/fs/ext2fs/ext2_vfsops.c (revision 293687) @@ -1,1101 +1,1101 @@ /*- * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /*- * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); static int ext2_mountfs(struct vnode *, struct mount *); static int ext2_reload(struct mount *mp, struct thread *td); static int ext2_sbupdate(struct ext2mount *, int); static int ext2_cgupdate(struct ext2mount *, int); static vfs_unmount_t ext2_unmount; static vfs_root_t ext2_root; static vfs_statfs_t ext2_statfs; static vfs_sync_t ext2_sync; static vfs_vget_t ext2_vget; static vfs_fhtovp_t ext2_fhtovp; static vfs_mount_t ext2_mount; MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); static struct vfsops ext2fs_vfsops = { .vfs_fhtovp = ext2_fhtovp, .vfs_mount = ext2_mount, .vfs_root = ext2_root, /* root inode via vget */ .vfs_statfs = ext2_statfs, .vfs_sync = ext2_sync, .vfs_unmount = ext2_unmount, .vfs_vget = ext2_vget, }; VFS_SET(ext2fs_vfsops, ext2fs, 0); static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly); static int compute_sb_data(struct vnode * devvp, struct ext2fs * es, struct m_ext2fs * fs); static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "multilabel", "suiddir", "nosymfollow", "sync", "union", NULL }; /* * VFS Operations. * * mount system call */ static int ext2_mount(struct mount *mp) { struct vfsoptlist *opts; struct vnode *devvp; struct thread *td; struct ext2mount *ump = NULL; struct m_ext2fs *fs; struct nameidata nd, *ndp = &nd; accmode_t accmode; char *path, *fspec; int error, flags, len; td = curthread; opts = mp->mnt_optnew; if (vfs_filteropt(opts, ext2_opts)) return (EINVAL); vfs_getopt(opts, "fspath", (void **)&path, NULL); /* Double-check the length of path.. */ if (strlen(path) >= MAXMNTLEN) return (ENAMETOOLONG); fspec = NULL; error = vfs_getopt(opts, "from", (void **)&fspec, &len); if (!error && fspec[len - 1] != '\0') return (EINVAL); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOEXT2(mp); fs = ump->um_e2fs; error = 0; if (fs->e2fs_ronly == 0 && vfs_flagopt(opts, "ro", NULL, 0)) { error = VFS_SYNC(mp, MNT_WAIT); if (error) return (error); flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ext2_flushfiles(mp, flags, td); if ( error == 0 && fs->e2fs_wasvalid && ext2_cgupdate(ump, MNT_WAIT) == 0) { fs->e2fs->e2fs_state |= E2FS_ISCLEAN; ext2_sbupdate(ump, MNT_WAIT); } fs->e2fs_ronly = 1; vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); DROP_GIANT(); g_topology_lock(); g_access(ump->um_cp, 0, -1, 0); g_topology_unlock(); PICKUP_GIANT(); } if (!error && (mp->mnt_flag & MNT_RELOAD)) error = ext2_reload(mp, td); if (error) return (error); devvp = ump->um_devvp; if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) return (EPERM); /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { VOP_UNLOCK(devvp, 0); return (error); } VOP_UNLOCK(devvp, 0); DROP_GIANT(); g_topology_lock(); error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); PICKUP_GIANT(); if (error) return (error); if ((fs->e2fs->e2fs_state & E2FS_ISCLEAN) == 0 || (fs->e2fs->e2fs_state & E2FS_ERRORS)) { if (mp->mnt_flag & MNT_FORCE) { printf( "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); } else { printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->e2fs_fsmnt); return (EPERM); } } fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN; (void)ext2_cgupdate(ump, MNT_WAIT); fs->e2fs_ronly = 0; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } if (vfs_flagopt(opts, "export", NULL, 0)) { /* Process export requests in vfs_mount.c. */ return (error); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ if (fspec == NULL) return (EINVAL); NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); if ((error = namei(ndp)) != 0) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. * * XXXRW: VOP_ACCESS() enough? */ accmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accmode |= VWRITE; error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = ext2_mountfs(devvp, mp); } else { if (devvp != ump->um_devvp) { vput(devvp); return (EINVAL); /* needs translation */ } else vput(devvp); } if (error) { vrele(devvp); return (error); } ump = VFSTOEXT2(mp); fs = ump->um_e2fs; /* * Note that this strncpy() is ok because of a check at the start * of ext2_mount(). */ strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; vfs_mountedfrom(mp, fspec); return (0); } static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) { if (es->e2fs_magic != E2FS_MAGIC) { printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", devtoname(dev), es->e2fs_magic, E2FS_MAGIC); return (1); } if (es->e2fs_rev > E2FS_REV0) { if (es->e2fs_features_incompat & ~(EXT2F_INCOMPAT_SUPP | EXT4F_RO_INCOMPAT_SUPP)) { printf( "WARNING: mount of %s denied due to unsupported optional features\n", devtoname(dev)); return (1); } if (!ronly && (es->e2fs_features_rocompat & ~EXT2F_ROCOMPAT_SUPP)) { printf("WARNING: R/W mount of %s denied due to " "unsupported optional features\n", devtoname(dev)); return (1); } } return (0); } /* * This computes the fields of the ext2_sb_info structure from the * data in the ext2_super_block structure read in. */ static int compute_sb_data(struct vnode *devvp, struct ext2fs *es, struct m_ext2fs *fs) { int db_count, error; int i; int logic_sb_block = 1; /* XXX for now */ struct buf *bp; uint32_t e2fs_descpb; fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->e2fs_log_bsize; fs->e2fs_bsize = 1U << fs->e2fs_bshift; fs->e2fs_fsbtodb = es->e2fs_log_bsize + 1; fs->e2fs_qbmask = fs->e2fs_bsize - 1; fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << es->e2fs_log_fsize; if (fs->e2fs_fsize) fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; fs->e2fs_bpg = es->e2fs_bpg; fs->e2fs_fpg = es->e2fs_fpg; fs->e2fs_ipg = es->e2fs_ipg; if (es->e2fs_rev == E2FS_REV0) { fs->e2fs_isize = E2FS_REV0_INODE_SIZE ; } else { fs->e2fs_isize = es->e2fs_inode_size; /* * Simple sanity check for superblock inode size value. */ if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { printf("ext2fs: invalid inode size %d\n", fs->e2fs_isize); return (EIO); } } /* Check for extra isize in big inodes. */ if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { printf("ext2fs: no space for extra inode timestamps\n"); return (EINVAL); } fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; /* s_resuid / s_resgid ? */ fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock + EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs); e2fs_descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); db_count = (fs->e2fs_gcount + e2fs_descpb - 1) / e2fs_descpb; fs->e2fs_gdbcount = db_count; fs->e2fs_gd = malloc(db_count * fs->e2fs_bsize, M_EXT2MNT, M_WAITOK); fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); /* * Adjust logic_sb_block. * Godmar thinks: if the blocksize is greater than 1024, then * the superblock is logically part of block zero. */ if(fs->e2fs_bsize > SBSIZE) logic_sb_block = 0; for (i = 0; i < db_count; i++) { error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1 ), fs->e2fs_bsize, NOCRED, &bp); if (error) { free(fs->e2fs_contigdirs, M_EXT2MNT); free(fs->e2fs_gd, M_EXT2MNT); brelse(bp); return (error); } e2fs_cgload((struct ext2_gd *)bp->b_data, &fs->e2fs_gd[ i * fs->e2fs_bsize / sizeof(struct ext2_gd)], fs->e2fs_bsize); brelse(bp); bp = NULL; } /* Initialization for the ext2 Orlov allocator variant. */ fs->e2fs_total_dir = 0; for (i = 0; i < fs->e2fs_gcount; i++) fs->e2fs_total_dir += fs->e2fs_gd[i].ext2bgd_ndirs; if (es->e2fs_rev == E2FS_REV0 || !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) fs->e2fs_maxfilesize = 0x7fffffff; else fs->e2fs_maxfilesize = 0x7fffffffffffffff; return (0); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) invalidate all cluster summary information. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. * XXX we are missing some steps, in particular # 3, this has to be reviewed. */ static int ext2_reload(struct mount *mp, struct thread *td) { struct vnode *vp, *mvp, *devvp; struct inode *ip; struct buf *bp; struct ext2fs *es; struct m_ext2fs *fs; struct csum *sump; int error, i; int32_t *lp; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOEXT2(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if (vinvalbuf(devvp, 0, 0, 0) != 0) panic("ext2_reload: dirty1"); VOP_UNLOCK(devvp, 0); /* * Step 2: re-read superblock from disk. * constants have been adjusted for ext2 */ if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) return (error); es = (struct ext2fs *)bp->b_data; if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOEXT2(mp)->um_e2fs; bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); if((error = compute_sb_data(devvp, es, fs)) != 0) { brelse(bp); return (error); } #ifdef UNKLAR if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; #endif brelse(bp); /* * Step 3: invalidate all cluster summary information. */ if (fs->e2fs_contigsumsize > 0) { lp = fs->e2fs_maxcluster; sump = fs->e2fs_clustersum; for (i = 0; i < fs->e2fs_gcount; i++, sump++) { *lp++ = fs->e2fs_contigsumsize; sump->cs_init = 0; bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); } } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Step 4: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } if (vinvalbuf(vp, 0, 0, 0)) panic("ext2_reload: dirty2"); /* * Step 5: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { VOP_UNLOCK(vp, 0); vrele(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); brelse(bp); VOP_UNLOCK(vp, 0); vrele(vp); } return (0); } /* * Common code for mount and mountroot. */ static int ext2_mountfs(struct vnode *devvp, struct mount *mp) { struct ext2mount *ump; struct buf *bp; struct m_ext2fs *fs; struct ext2fs *es; struct cdev *dev = devvp->v_rdev; struct g_consumer *cp; struct bufobj *bo; struct csum *sump; int error; int ronly; int i, size; int32_t *lp; int32_t e2fs_maxcontig; ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); /* XXX: use VOP_ACESS to check FS perms */ DROP_GIANT(); g_topology_lock(); error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); g_topology_unlock(); PICKUP_GIANT(); VOP_UNLOCK(devvp, 0); if (error) return (error); /* XXX: should we check for some sectorsize or 512 instead? */ if (((SBSIZE % cp->provider->sectorsize) != 0) || (SBSIZE < cp->provider->sectorsize)) { DROP_GIANT(); g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); PICKUP_GIANT(); return (EINVAL); } bo = &devvp->v_bufobj; bo->bo_private = cp; bo->bo_ops = g_vfs_bufops; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; bp = NULL; ump = NULL; if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) goto out; es = (struct ext2fs *)bp->b_data; if (ext2_check_sb_compat(es, dev, ronly) != 0) { error = EINVAL; /* XXX needs translation */ goto out; } if ((es->e2fs_state & E2FS_ISCLEAN) == 0 || (es->e2fs_state & E2FS_ERRORS)) { if (ronly || (mp->mnt_flag & MNT_FORCE)) { printf( "WARNING: Filesystem was not properly dismounted\n"); } else { printf( "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); error = EPERM; goto out; } } ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); /* * I don't know whether this is the right strategy. Note that * we dynamically allocate both an ext2_sb_info and an ext2_super_block * while Linux keeps the super block in a locked buffer. */ ump->um_e2fs = malloc(sizeof(struct m_ext2fs), - M_EXT2MNT, M_WAITOK); + M_EXT2MNT, M_WAITOK | M_ZERO); ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), M_EXT2MNT, M_WAITOK); mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) goto out; /* * Calculate the maximum contiguous blocks and size of cluster summary * array. In FFS this is done by newfs; however, the superblock * in ext2fs doesn't have these variables, so we can calculate * them here. */ e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); if (ump->um_e2fs->e2fs_contigsumsize > 0) { size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); lp = ump->um_e2fs->e2fs_maxcluster; sump = ump->um_e2fs->e2fs_clustersum; for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { *lp++ = ump->um_e2fs->e2fs_contigsumsize; sump->cs_init = 0; sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); } } brelse(bp); bp = NULL; fs = ump->um_e2fs; fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ /* * If the fs is not mounted read-only, make sure the super block is * always written back on a sync(). */ fs->e2fs_wasvalid = fs->e2fs->e2fs_state & E2FS_ISCLEAN ? 1 : 0; if (ronly == 0) { fs->e2fs_fmod = 1; /* mark it modified */ fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN; /* set fs invalid */ } mp->mnt_data = ump; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_bo = &devvp->v_bufobj; ump->um_cp = cp; /* * Setting those two parameters allowed us to use * ufs_bmap w/o changse! */ ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); ump->um_bptrtodb = fs->e2fs->e2fs_log_bsize + 1; ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); if (ronly == 0) ext2_sbupdate(ump, MNT_WAIT); /* * Initialize filesystem stat information in mount struct. */ MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | MNTK_USES_BCACHE; MNT_IUNLOCK(mp); return (0); out: if (bp) brelse(bp); if (cp != NULL) { DROP_GIANT(); g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); PICKUP_GIANT(); } if (ump) { mtx_destroy(EXT2_MTX(ump)); free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); free(ump->um_e2fs->e2fs, M_EXT2MNT); free(ump->um_e2fs, M_EXT2MNT); free(ump, M_EXT2MNT); mp->mnt_data = NULL; } return (error); } /* * Unmount system call. */ static int ext2_unmount(struct mount *mp, int mntflags) { struct ext2mount *ump; struct m_ext2fs *fs; struct csum *sump; int error, flags, i, ronly; flags = 0; if (mntflags & MNT_FORCE) { if (mp->mnt_flag & MNT_ROOTFS) return (EINVAL); flags |= FORCECLOSE; } if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) return (error); ump = VFSTOEXT2(mp); fs = ump->um_e2fs; ronly = fs->e2fs_ronly; if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { if (fs->e2fs_wasvalid) fs->e2fs->e2fs_state |= E2FS_ISCLEAN; ext2_sbupdate(ump, MNT_WAIT); } DROP_GIANT(); g_topology_lock(); g_vfs_close(ump->um_cp); g_topology_unlock(); PICKUP_GIANT(); vrele(ump->um_devvp); sump = fs->e2fs_clustersum; for (i = 0; i < fs->e2fs_gcount; i++, sump++) free(sump->cs_sum, M_EXT2MNT); free(fs->e2fs_clustersum, M_EXT2MNT); free(fs->e2fs_maxcluster, M_EXT2MNT); free(fs->e2fs_gd, M_EXT2MNT); free(fs->e2fs_contigdirs, M_EXT2MNT); free(fs->e2fs, M_EXT2MNT); free(fs, M_EXT2MNT); free(ump, M_EXT2MNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); return (error); } /* * Flush out all the files in a filesystem. */ static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td) { int error; error = vflush(mp, 0, flags, td); return (error); } /* * Get filesystem statistics. */ int ext2_statfs(struct mount *mp, struct statfs *sbp) { struct ext2mount *ump; struct m_ext2fs *fs; uint32_t overhead, overhead_per_group, ngdb; int i, ngroups; ump = VFSTOEXT2(mp); fs = ump->um_e2fs; if (fs->e2fs->e2fs_magic != E2FS_MAGIC) panic("ext2_statfs"); /* * Compute the overhead (FS structures) */ overhead_per_group = 1 /* block bitmap */ + 1 /* inode bitmap */ + fs->e2fs_itpg; overhead = fs->e2fs->e2fs_first_dblock + fs->e2fs_gcount * overhead_per_group; if (fs->e2fs->e2fs_rev > E2FS_REV0 && fs->e2fs->e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) { for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { if (cg_has_sb(i)) ngroups++; } } else { ngroups = fs->e2fs_gcount; } ngdb = fs->e2fs_gdbcount; if (fs->e2fs->e2fs_rev > E2FS_REV0 && fs->e2fs->e2fs_features_compat & EXT2F_COMPAT_RESIZE) ngdb += fs->e2fs->e2fs_reserved_ngdb; overhead += ngroups * (1 /* superblock */ + ngdb); sbp->f_bsize = EXT2_FRAG_SIZE(fs); sbp->f_iosize = EXT2_BLOCK_SIZE(fs); sbp->f_blocks = fs->e2fs->e2fs_bcount - overhead; sbp->f_bfree = fs->e2fs->e2fs_fbcount; sbp->f_bavail = sbp->f_bfree - fs->e2fs->e2fs_rbcount; sbp->f_files = fs->e2fs->e2fs_icount; sbp->f_ffree = fs->e2fs->e2fs_ficount; return (0); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ static int ext2_sync(struct mount *mp, int waitfor) { struct vnode *mvp, *vp; struct thread *td; struct inode *ip; struct ext2mount *ump = VFSTOEXT2(mp); struct m_ext2fs *fs; int error, allerror = 0; td = curthread; fs = ump->um_e2fs; if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ printf("fs = %s\n", fs->e2fs_fsmnt); panic("ext2_sync: rofs mod"); } /* * Write back each (modified) inode. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && (vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY)) { VI_UNLOCK(vp); continue; } error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); if (error) { if (error == ENOENT) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(vp, 0); vrele(vp); } /* * Force stale filesystem control information to be flushed. */ if (waitfor != MNT_LAZY) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(ump->um_devvp, 0); } /* * Write back modified superblock. */ if (fs->e2fs_fmod != 0) { fs->e2fs_fmod = 0; fs->e2fs->e2fs_wtime = time_second; if ((error = ext2_cgupdate(ump, waitfor)) != 0) allerror = error; } return (allerror); } /* * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ static int ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { struct m_ext2fs *fs; struct inode *ip; struct ext2mount *ump; struct buf *bp; struct vnode *vp; struct thread *td; int i, error; int used_blocks; td = curthread; error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); ump = VFSTOEXT2(mp); ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); /* Allocate a new vnode/inode. */ if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { *vpp = NULL; free(ip, M_EXT2NODE); return (error); } vp->v_data = ip; ip->i_vnode = vp; ip->i_e2fs = fs = ump->um_e2fs; ip->i_ump = ump; ip->i_number = ino; lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); error = insmntque(vp, mp); if (error != 0) { free(ip, M_EXT2NODE); *vpp = NULL; return (error); } error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* Read in the disk contents for the inode, copy into the inode. */ if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ brelse(bp); vput(vp); *vpp = NULL; return (error); } /* convert ext2 inode to dinode */ ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); ip->i_block_group = ino_to_cg(fs, ino); ip->i_next_alloc_block = 0; ip->i_next_alloc_goal = 0; /* * Now we want to make sure that block pointers for unused * blocks are zeroed out - ext2_balloc depends on this * although for regular files and directories only * * If IN_E4EXTENTS is enabled, unused blocks are not zeroed * out because we could corrupt the extent tree. */ if (!(ip->i_flag & IN_E4EXTENTS) && (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize; for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) ip->i_db[i] = 0; } #ifdef EXT2FS_DEBUG ext2_print_inode(ip); #endif bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization. */ /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { ip->i_gen = random() + 1; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - call ext2_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct inode *ip; struct ufid *ufhp; struct vnode *nvp; struct m_ext2fs *fs; int error; ufhp = (struct ufid *)fhp; fs = VFSTOEXT2(mp)->um_e2fs; if (ufhp->ufid_ino < EXT2_ROOTINO || ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs->e2fs_ipg) return (ESTALE); error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); if (error) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; vnode_create_vobject(*vpp, 0, curthread); return (0); } /* * Write a superblock and associated information back to disk. */ static int ext2_sbupdate(struct ext2mount *mp, int waitfor) { struct m_ext2fs *fs = mp->um_e2fs; struct ext2fs *es = fs->e2fs; struct buf *bp; int error = 0; bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); /* * The buffers for group descriptors, inode bitmaps and block bitmaps * are not busy at this point and are (hopefully) written by the * usual sync mechanism. No need to write them here. */ return (error); } int ext2_cgupdate(struct ext2mount *mp, int waitfor) { struct m_ext2fs *fs = mp->um_e2fs; struct buf *bp; int i, error = 0, allerror = 0; allerror = ext2_sbupdate(mp, waitfor); for (i = 0; i < fs->e2fs_gdbcount; i++) { bp = getblk(mp->um_devvp, fsbtodb(fs, fs->e2fs->e2fs_first_dblock + 1 /* superblock */ + i), fs->e2fs_bsize, 0, 0, 0); e2fs_cgsave(&fs->e2fs_gd[ i * fs->e2fs_bsize / sizeof(struct ext2_gd)], (struct ext2_gd *)bp->b_data, fs->e2fs_bsize); if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); } if (!allerror && error) allerror = error; return (allerror); } /* * Return the root of a filesystem. */ static int ext2_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *nvp; int error; error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); if (error) return (error); *vpp = nvp; return (0); } Index: projects/clang380-import/sys/fs/ext2fs/ext2_vnops.c =================================================================== --- projects/clang380-import/sys/fs/ext2fs/ext2_vnops.c (revision 293686) +++ projects/clang380-import/sys/fs/ext2fs/ext2_vnops.c (revision 293687) @@ -1,2064 +1,2079 @@ /*- * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 * $FreeBSD$ */ #include "opt_suiddir.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_directio.h" #include #include #include #include #include #include #include #include static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); static void ext2_itimes_locked(struct vnode *); static int ext4_ext_read(struct vop_read_args *); static int ext2_ind_read(struct vop_read_args *); static vop_access_t ext2_access; static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); static vop_close_t ext2_close; static vop_create_t ext2_create; static vop_fsync_t ext2_fsync; static vop_getattr_t ext2_getattr; static vop_ioctl_t ext2_ioctl; static vop_link_t ext2_link; static vop_mkdir_t ext2_mkdir; static vop_mknod_t ext2_mknod; static vop_open_t ext2_open; static vop_pathconf_t ext2_pathconf; static vop_print_t ext2_print; static vop_read_t ext2_read; static vop_readlink_t ext2_readlink; static vop_remove_t ext2_remove; static vop_rename_t ext2_rename; static vop_rmdir_t ext2_rmdir; static vop_setattr_t ext2_setattr; static vop_strategy_t ext2_strategy; static vop_symlink_t ext2_symlink; static vop_write_t ext2_write; static vop_vptofh_t ext2_vptofh; static vop_close_t ext2fifo_close; static vop_kqfilter_t ext2fifo_kqfilter; /* Global vfs data structures for ext2. */ struct vop_vector ext2_vnodeops = { .vop_default = &default_vnodeops, .vop_access = ext2_access, .vop_bmap = ext2_bmap, .vop_cachedlookup = ext2_lookup, .vop_close = ext2_close, .vop_create = ext2_create, .vop_fsync = ext2_fsync, .vop_getpages = vnode_pager_local_getpages, .vop_getpages_async = vnode_pager_local_getpages_async, .vop_getattr = ext2_getattr, .vop_inactive = ext2_inactive, .vop_ioctl = ext2_ioctl, .vop_link = ext2_link, .vop_lookup = vfs_cache_lookup, .vop_mkdir = ext2_mkdir, .vop_mknod = ext2_mknod, .vop_open = ext2_open, .vop_pathconf = ext2_pathconf, .vop_poll = vop_stdpoll, .vop_print = ext2_print, .vop_read = ext2_read, .vop_readdir = ext2_readdir, .vop_readlink = ext2_readlink, .vop_reallocblks = ext2_reallocblks, .vop_reclaim = ext2_reclaim, .vop_remove = ext2_remove, .vop_rename = ext2_rename, .vop_rmdir = ext2_rmdir, .vop_setattr = ext2_setattr, .vop_strategy = ext2_strategy, .vop_symlink = ext2_symlink, .vop_write = ext2_write, .vop_vptofh = ext2_vptofh, }; struct vop_vector ext2_fifoops = { .vop_default = &fifo_specops, .vop_access = ext2_access, .vop_close = ext2fifo_close, .vop_fsync = ext2_fsync, .vop_getattr = ext2_getattr, .vop_inactive = ext2_inactive, .vop_kqfilter = ext2fifo_kqfilter, .vop_print = ext2_print, .vop_read = VOP_PANIC, .vop_reclaim = ext2_reclaim, .vop_setattr = ext2_setattr, .vop_write = VOP_PANIC, .vop_vptofh = ext2_vptofh, }; /* * A virgin directory (no blushing please). * Note that the type and namlen fields are reversed relative to ext2. * Also, we don't use `struct odirtemplate', since it would just cause * endianness problems. */ static struct dirtemplate mastertemplate = { 0, 12, 1, EXT2_FT_DIR, ".", 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." }; static struct dirtemplate omastertemplate = { 0, 12, 1, EXT2_FT_UNKNOWN, ".", 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." }; static void ext2_itimes_locked(struct vnode *vp) { struct inode *ip; struct timespec ts; ASSERT_VI_LOCKED(vp, __func__); ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; if ((vp->v_type == VBLK || vp->v_type == VCHR)) ip->i_flag |= IN_LAZYMOD; else ip->i_flag |= IN_MODIFIED; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { vfs_timestamp(&ts); if (ip->i_flag & IN_ACCESS) { ip->i_atime = ts.tv_sec; ip->i_atimensec = ts.tv_nsec; } if (ip->i_flag & IN_UPDATE) { ip->i_mtime = ts.tv_sec; ip->i_mtimensec = ts.tv_nsec; ip->i_modrev++; } if (ip->i_flag & IN_CHANGE) { ip->i_ctime = ts.tv_sec; ip->i_ctimensec = ts.tv_nsec; } } ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } void ext2_itimes(struct vnode *vp) { VI_LOCK(vp); ext2_itimes_locked(vp); VI_UNLOCK(vp); } /* * Create a regular file */ static int ext2_create(struct vop_create_args *ap) { int error; error = ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp); if (error != 0) return (error); if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); return (0); } static int ext2_open(struct vop_open_args *ap) { if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) return (EOPNOTSUPP); /* * Files marked append-only must be opened for appending. */ if ((VTOI(ap->a_vp)->i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); return (0); } /* * Close called. * * Update the times on the inode. */ static int ext2_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; VI_LOCK(vp); if (vp->v_usecount > 1) ext2_itimes_locked(vp); VI_UNLOCK(vp); return (0); } static int ext2_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); accmode_t accmode = ap->a_accmode; int error; if (vp->v_type == VBLK || vp->v_type == VCHR) return (EOPNOTSUPP); /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ if (accmode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: break; } } /* If immutable bit set, nobody gets to write it. */ if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) return (EPERM); error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, ap->a_accmode, ap->a_cred, NULL); return (error); } static int ext2_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct vattr *vap = ap->a_vap; ext2_itimes(vp); /* * Copy from inode table */ vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; vap->va_nlink = ip->i_nlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; vap->va_rdev = ip->i_rdev; vap->va_size = ip->i_size; vap->va_atime.tv_sec = ip->i_atime; vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; vap->va_mtime.tv_sec = ip->i_mtime; vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; vap->va_ctime.tv_sec = ip->i_ctime; vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; if E2DI_HAS_XTIME(ip) { vap->va_birthtime.tv_sec = ip->i_birthtime; vap->va_birthtime.tv_nsec = ip->i_birthnsec; } vap->va_flags = ip->i_flags; vap->va_gen = ip->i_gen; vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); vap->va_type = IFTOVT(ip->i_mode); vap->va_filerev = ip->i_modrev; return (0); } /* * Set attribute vnode op. called from several syscalls */ static int ext2_setattr(struct vop_setattr_args *ap) { struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; struct thread *td = curthread; int error; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { /* Disallow flags not supported by ext2fs. */ if(vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) return (EOPNOTSUPP); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes and privileged processes in * jail() are not permitted to unset system flags, or * modify flags if any system flags are set. * Privileged non-jail processes may not modify system flags * if securelevel > 0 and any existing system flags are set. */ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } } else { if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) return (EPERM); } ip->i_flags = vap->va_flags; ip->i_flag |= IN_CHANGE; if (ip->i_flags & (IMMUTABLE | APPEND)) return (0); } if (ip->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, td)) != 0) return (error); } if (vap->va_size != VNOVAL) { /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: break; } if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) return (error); } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * From utimes(2): * If times is NULL, ... The caller must be the owner of * the file, have permission to write the file, or be the * super-user. * If times is non-NULL, ... The caller must be the owner of * the file or be the super-user. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(vp, VWRITE, cred, td)))) return (error); if (vap->va_atime.tv_sec != VNOVAL) ip->i_flag |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_itimes(vp); if (vap->va_atime.tv_sec != VNOVAL) { ip->i_atime = vap->va_atime.tv_sec; ip->i_atimensec = vap->va_atime.tv_nsec; } if (vap->va_mtime.tv_sec != VNOVAL) { ip->i_mtime = vap->va_mtime.tv_sec; ip->i_mtimensec = vap->va_mtime.tv_nsec; } ip->i_birthtime = vap->va_birthtime.tv_sec; ip->i_birthnsec = vap->va_birthtime.tv_nsec; error = ext2_update(vp, 0); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = ext2_chmod(vp, (int)vap->va_mode, cred, td); } return (error); } /* * Change the mode on a file. * Inode must be locked before calling. */ static int ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) { struct inode *ip = VTOI(vp); int error; /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); if (error) return (EFTYPE); } if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); if (error) return (error); } ip->i_mode &= ~ALLPERMS; ip->i_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; return (0); } /* * Perform chown operation on inode ip; * inode must be locked prior to call. */ static int ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct thread *td) { struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; if (uid == (uid_t)VNOVAL) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; /* * To modify the ownership of a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file * to a group of which we are not a member, the caller must * have privilege. */ if (uid != ip->i_uid || (gid != ip->i_gid && !groupmember(gid, cred))) { error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); if (error) return (error); } ogid = ip->i_gid; ouid = ip->i_uid; ip->i_gid = gid; ip->i_uid = uid; ip->i_flag |= IN_CHANGE; if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) ip->i_mode &= ~(ISUID | ISGID); } return (0); } /* * Synch an open file. */ /* ARGSUSED */ static int ext2_fsync(struct vop_fsync_args *ap) { /* * Flush all dirty buffers associated with a vnode. */ vop_stdfsync(ap); return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); } /* * Mknod vnode call */ /* ARGSUSED */ static int ext2_mknod(struct vop_mknod_args *ap) { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; ino_t ino; int error; error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ ip->i_rdev = vap->va_rdev; } /* * Remove inode, then reload it through VFS_VGET so it is * checked to see if it is an alias of an existing entry in * the inode cache. XXX I don't believe this is necessary now. */ (*vpp)->v_type = VNON; ino = ip->i_number; /* Save this before vgone() invalidates ip. */ vgone(*vpp); vput(*vpp); error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); if (error) { *vpp = NULL; return (error); } return (0); } static int ext2_remove(struct vop_remove_args *ap) { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) { error = EPERM; goto out; } error = ext2_dirremove(dvp, ap->a_cnp); if (error == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; } out: return (error); } /* * link vnode call */ static int ext2_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct inode *ip; int error; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ext2_link: no name"); #endif ip = VTOI(vp); if ((nlink_t)ip->i_nlink >= EXT2_LINK_MAX) { error = EMLINK; goto out; } if (ip->i_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } ip->i_nlink++; ip->i_flag |= IN_CHANGE; error = ext2_update(vp, !DOINGASYNC(vp)); if (!error) error = ext2_direnter(ip, tdvp, cnp); if (error) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; } out: return (error); } /* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ static int ext2_rename(struct vop_rename_args *ap) { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct inode *ip, *xp, *dp; struct dirtemplate dirbuf; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; u_char namlen; #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ext2_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); return (error); } if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto abortit; } /* * Renaming a file to itself has no effect. The upper layers should * not call us in that case. Temporarily just warn if they do. */ if (fvp == tvp) { printf("ext2_rename: fvp == tvp (can't happen)\n"); error = 0; goto abortit; } if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if (ip->i_nlink >= EXT2_LINK_MAX) { VOP_UNLOCK(fvp, 0); error = EMLINK; goto abortit; } if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) { VOP_UNLOCK(fvp, 0); error = EPERM; goto abortit; } if ((ip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || (ip->i_flag & IN_RENAME)) { VOP_UNLOCK(fvp, 0); error = EINVAL; goto abortit; } ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory++; } vrele(fdvp); /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ ip->i_nlink++; ip->i_flag |= IN_CHANGE; if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { VOP_UNLOCK(fvp, 0); goto bad; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); VOP_UNLOCK(fvp, 0); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); error = ext2_checkpath(ip, dp, tcnp->cn_cred); if (error) goto out; VREF(tdvp); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; vrele(tdvp); dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); } /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (xp == NULL) { if (dp->i_devvp != ip->i_devvp) panic("ext2_rename: EXDEV"); /* * Account for ".." in new directory. * When source and destination have the same * parent we don't fool with the link count. */ if (doingdirectory && newparent) { if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) { error = EMLINK; goto bad; } dp->i_nlink++; dp->i_flag |= IN_CHANGE; error = ext2_update(tdvp, !DOINGASYNC(tdvp)); if (error) goto bad; } error = ext2_direnter(ip, tdvp, tcnp); if (error) { if (doingdirectory && newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; (void)ext2_update(tdvp, 1); } goto bad; } vput(tdvp); } else { if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) panic("ext2_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (xp->i_number == ip->i_number) panic("ext2_rename: same file"); /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != dp->i_uid && xp->i_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((xp->i_mode&IFMT) == IFDIR) { if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || xp->i_nlink > 2) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = ext2_dirrewrite(dp, ip, tcnp); if (error) goto bad; /* * If the target directory is in the same * directory as the source directory, * decrement the link count on the parent * of the target directory. */ if (doingdirectory && !newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; } vput(tdvp); /* * Adjust the link count of the target to * reflect the dirrewrite above. If this is * a directory it is empty and there are * no links to it, so we can squash the inode and * any space associated with it. We disallowed * renaming over top of a directory with links to * it above, as the remaining link would point to * a directory without "." or ".." entries. */ xp->i_nlink--; if (doingdirectory) { if (--xp->i_nlink != 0) panic("ext2_rename: linked directory"); error = ext2_truncate(tvp, (off_t)0, IO_SYNC, tcnp->cn_cred, tcnp->cn_thread); } xp->i_flag |= IN_CHANGE; vput(tvp); xp = NULL; } /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp != NULL) { xp = VTOI(fvp); dp = VTOI(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("ext2_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; its link * count of three would cause a rmdir to fail with ENOTEMPTY. * The IN_RENAME flag ensures that it cannot be moved by another * rename. */ if (xp != ip) { if (doingdirectory) panic("ext2_rename: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, sizeof(struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, tcnp->cn_cred, NOCRED, NULL, NULL); if (error == 0) { /* Like ufs little-endian: */ namlen = dirbuf.dotdot_type; if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { ext2_dirbad(xp, (doff_t)12, "rename: mangled dir"); } else { dirbuf.dotdot_ino = newparent; (void) vn_rdwr(UIO_WRITE, fvp, (caddr_t)&dirbuf, sizeof(struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, tcnp->cn_cred, NOCRED, NULL, NULL); cache_purge(fdvp); } } } error = ext2_dirremove(fdvp, fcnp); if (!error) { xp->i_nlink--; xp->i_flag |= IN_CHANGE; } xp->i_flag &= ~IN_RENAME; } if (dp) vput(fdvp); if (xp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (xp) vput(ITOV(xp)); vput(ITOV(dp)); out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; vput(fvp); } else vrele(fvp); return (error); } /* * Mkdir system call */ static int ext2_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; struct vnode *tvp; struct dirtemplate dirtemplate, *dtp; int error, dmode; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ext2_mkdir: no name"); #endif dp = VTOI(dvp); if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ext2_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; ip = VTOI(tvp); ip->i_gid = dp->i_gid; #ifdef SUIDDIR { /* * if we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; } else { ip->i_uid = cnp->cn_cred->cr_uid; } } #else ip->i_uid = cnp->cn_cred->cr_uid; #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_nlink = 2; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; error = ext2_update(tvp, 1); /* * Bump link count in parent directory * to reflect work done below. Should * be done before reference is created * so reparation is possible if we crash. */ dp->i_nlink++; dp->i_flag |= IN_CHANGE; error = ext2_update(dvp, !DOINGASYNC(dvp)); if (error) goto bad; /* Initialize directory with "." and ".." from static template. */ if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, EXT2F_INCOMPAT_FTYPE)) dtp = &mastertemplate; else dtp = &omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; /* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE * so let's just redefine it - for this function only */ #undef DIRBLKSIZ #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, NULL, NULL); if (error) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; goto bad; } if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) /* XXX should grow with balloc() */ panic("ext2_mkdir: blksize"); else { ip->i_size = DIRBLKSIZ; ip->i_flag |= IN_CHANGE; } /* Directory set up, now install its entry in the parent directory. */ error = ext2_direnter(ip, dvp, cnp); if (error) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; } bad: /* * No need to do an explicit VOP_TRUNCATE here, vrele will do this * for us because we set the link count to 0. */ if (error) { ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); } else *ap->a_vpp = tvp; out: return (error); #undef DIRBLKSIZ #define DIRBLKSIZ DEV_BSIZE } /* * Rmdir system call. */ static int ext2_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ error = ext2_dirremove(dvp, cnp); if (error) goto out; dp->i_nlink--; dp->i_flag |= IN_CHANGE; cache_purge(dvp); VOP_UNLOCK(dvp, 0); /* * Truncate inode. The only stuff left * in the directory is "." and "..". The * "." reference is inconsequential since * we're quashing it. The ".." reference * has already been adjusted above. We've * removed the "." reference and the reference * in the parent directory, but there may be * other hard links so decrement by 2 and * worry about them later. */ ip->i_nlink -= 2; error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, cnp->cn_thread); cache_purge(ITOV(ip)); if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(vp, 0); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } out: return (error); } /* * symlink -- make a symbolic link */ static int ext2_symlink(struct vop_symlink_args *ap) { struct vnode *vp, **vpp = ap->a_vpp; struct inode *ip; int len, error; error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, (char *)ip->i_shortlink, len); ip->i_size = len; ip->i_flag |= IN_CHANGE | IN_UPDATE; } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, ap->a_cnp->cn_cred, NOCRED, NULL, NULL); if (error) vput(vp); return (error); } /* * Return target name of a symbolic link */ static int ext2_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); int isize; isize = ip->i_size; if (isize < vp->v_mount->mnt_maxsymlinklen) { uiomove((char *)ip->i_shortlink, isize, ap->a_uio); return (0); } return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } /* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. * * In order to be able to swap to a file, the ext2_bmaparray() operation may not * deadlock on memory. See ext2_bmap() for details. */ static int ext2_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; struct bufobj *bo; daddr_t blkno; int error; if (vp->v_type == VBLK || vp->v_type == VCHR) panic("ext2_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); bp->b_blkno = blkno; if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { bufdone(bp); return (0); } bp->b_iooffset = dbtob(bp->b_blkno); bo = VFSTOEXT2(vp->v_mount)->um_bo; BO_STRATEGY(bo, bp); return (0); } /* * Print out the contents of an inode. */ static int ext2_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Close wrapper for fifos. * * Update the times on the inode then do device close. */ static int ext2fifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; VI_LOCK(vp); if (vp->v_usecount > 1) ext2_itimes_locked(vp); VI_UNLOCK(vp); return (fifo_specops.vop_close(ap)); } /* * Kqfilter wrapper for fifos. * * Fall through to ext2 kqfilter routines if needed */ static int ext2fifo_kqfilter(struct vop_kqfilter_args *ap) { int error; error = fifo_specops.vop_kqfilter(ap); if (error) error = vfs_kqfilter(ap); return (error); } /* * Return POSIX pathconf information applicable to ext2 filesystems. */ static int ext2_pathconf(struct vop_pathconf_args *ap) { int error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = EXT2_LINK_MAX; break; case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; break; case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; break; case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; case _PC_MIN_HOLE_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_ASYNC_IO: /* _PC_ASYNC_IO should have been handled by upper layers. */ KASSERT(0, ("_PC_ASYNC_IO should not get here")); error = EINVAL; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: *ap->a_retval = 64; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; break; default: error = EINVAL; break; } return (error); } /* * Vnode pointer to File handle */ /* ARGSUSED */ static int ext2_vptofh(struct vop_vptofh_args *ap) { struct inode *ip; struct ufid *ufhp; ip = VTOI(ap->a_vp); ufhp = (struct ufid *)ap->a_fhp; ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; return (0); } /* * Initialize the vnode associated with a new inode, handle aliased * vnodes. */ int ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) { struct inode *ip; struct vnode *vp; vp = *vpp; ip = VTOI(vp); vp->v_type = IFTOVT(ip->i_mode); if (vp->v_type == VFIFO) vp->v_op = fifoops; if (ip->i_number == EXT2_ROOTINO) vp->v_vflag |= VV_ROOT; ip->i_modrev = init_va_filerev(); *vpp = vp; return (0); } /* * Allocate a new inode. */ static int ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct inode *ip, *pdir; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ext2_makeinode: no name"); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); if (error) { return (error); } ip = VTOI(tvp); ip->i_gid = pdir->i_gid; #ifdef SUIDDIR { /* * if we are * not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; mode &= ~07111; } else { ip->i_uid = cnp->cn_cred->cr_uid; } } #else ip->i_uid = cnp->cn_cred->cr_uid; #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_nlink = 1; if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) ip->i_mode &= ~ISGID; } if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; /* * Make sure inode goes to disk before directory entry. */ error = ext2_update(tvp, !DOINGASYNC(tvp)); if (error) goto bad; error = ext2_direnter(ip, dvp, cnp); if (error) goto bad; *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); return (error); } /* * Vnode op for reading. */ static int ext2_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; int error; vp = ap->a_vp; ip = VTOI(vp); /*EXT4_EXT_LOCK(ip);*/ if (ip->i_flag & IN_E4EXTENTS) error = ext4_ext_read(ap); else error = ext2_ind_read(ap); /*EXT4_EXT_UNLOCK(ip);*/ return (error); } /* * Vnode op for reading. */ static int ext2_ind_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; struct uio *uio; struct m_ext2fs *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error, orig_resid, seqcount; int ioflag; vp = ap->a_vp; uio = ap->a_uio; ioflag = ap->a_ioflag; seqcount = ap->a_ioflag >> IO_SEQSHIFT; ip = VTOI(vp); #ifdef INVARIANTS if (uio->uio_rw != UIO_READ) panic("%s: mode", "ext2_read"); if (vp->v_type == VLNK) { if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) panic("%s: short symlink", "ext2_read"); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("%s: type %d", "ext2_read", vp->v_type); #endif orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); fs = ip->i_e2fs; if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) return (EOVERFLOW); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = blksize(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_fsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ip->i_size) error = bread(vp, lbn, size, NOCRED, &bp); else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, blkoffset + uio->uio_resid, seqcount, 0, &bp); } else if (seqcount > 1) { u_int nextsize = blksize(fs, ip, nextlbn); error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); if (error) { brelse(bp); bp = NULL; break; } /* * If IO_DIRECT then set B_DIRECT for the buffer. This * will cause us to attempt to release the buffer later on * and will cause the buffer cache to attempt to free the * underlying pages. */ if (ioflag & IO_DIRECT) bp->b_flags |= B_DIRECT; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; if (ioflag & (IO_VMIO|IO_DIRECT)) { /* * If it's VMIO or direct I/O, then we don't * need the buf, mark it available for * freeing. If it's non-direct VMIO, the VM has * the data. */ bp->b_flags |= B_RELBUF; brelse(bp); } else { /* * Otherwise let whoever * made the request take care of * freeing it. We just queue * it onto another list. */ bqrelse(bp); } } /* * This can only happen in the case of an error * because the loop above resets bp to NULL on each iteration * and on normal completion has not set a new value into it. * so it must have come from a 'break' statement */ if (bp != NULL) { if (ioflag & (IO_VMIO|IO_DIRECT)) { bp->b_flags |= B_RELBUF; brelse(bp); } else { bqrelse(bp); } } if ((error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) ip->i_flag |= IN_ACCESS; return (error); } static int ext2_ioctl(struct vop_ioctl_args *ap) { switch (ap->a_command) { case FIOSEEKDATA: case FIOSEEKHOLE: return (vn_bmap_seekhole(ap->a_vp, ap->a_command, (off_t *)ap->a_data, ap->a_cred)); default: return (ENOTTY); } } /* * this function handles ext4 extents block mapping */ static int ext4_ext_read(struct vop_read_args *ap) { + static unsigned char zeroes[EXT2_MAX_BLOCK_SIZE]; struct vnode *vp; struct inode *ip; struct uio *uio; struct m_ext2fs *fs; struct buf *bp; struct ext4_extent nex, *ep; struct ext4_extent_path path; daddr_t lbn, newblk; off_t bytesinfile; int cache_type; ssize_t orig_resid; int error; long size, xfersize, blkoffset; vp = ap->a_vp; ip = VTOI(vp); uio = ap->a_uio; memset(&path, 0, sizeof(path)); orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__)); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__)); fs = ip->i_e2fs; if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) return (EOVERFLOW); while (uio->uio_resid > 0) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); size = blksize(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_fsize - blkoffset; xfersize = MIN(xfersize, uio->uio_resid); xfersize = MIN(xfersize, bytesinfile); /* get block from ext4 extent cache */ cache_type = ext4_ext_in_cache(ip, lbn, &nex); switch (cache_type) { case EXT4_EXT_CACHE_NO: ext4_ext_find_extent(fs, ip, lbn, &path); - ep = path.ep_ext; + if (path.ep_is_sparse) + ep = &path.ep_sparse_ext; + else + ep = path.ep_ext; if (ep == NULL) return (EIO); - ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN); + ext4_ext_put_cache(ip, ep, + path.ep_is_sparse ? EXT4_EXT_CACHE_GAP : EXT4_EXT_CACHE_IN); newblk = lbn - ep->e_blk + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32); if (path.ep_bp != NULL) { brelse(path.ep_bp); path.ep_bp = NULL; } break; case EXT4_EXT_CACHE_GAP: /* block has not been allocated yet */ - return (0); + break; case EXT4_EXT_CACHE_IN: newblk = lbn - nex.e_blk + (nex.e_start_lo | (daddr_t)nex.e_start_hi << 32); break; default: panic("%s: invalid cache type", __func__); } - error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } + if (cache_type == EXT4_EXT_CACHE_GAP || + (cache_type == EXT4_EXT_CACHE_NO && path.ep_is_sparse)) { + if (xfersize > sizeof(zeroes)) + xfersize = sizeof(zeroes); + error = uiomove(zeroes, xfersize, uio); + if (error) + return (error); + } else { + error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, + NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } - size -= bp->b_resid; - if (size < xfersize) { - if (size == 0) { - bqrelse(bp); - break; + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) { + bqrelse(bp); + break; + } + xfersize = size; } - xfersize = size; + error = uiomove(bp->b_data + blkoffset, xfersize, uio); + bqrelse(bp); + if (error) + return (error); } - error = uiomove(bp->b_data + blkoffset, (int)xfersize, uio); - bqrelse(bp); - if (error) - return (error); } return (0); } /* * Vnode op for writing. */ static int ext2_write(struct vop_write_args *ap) { struct vnode *vp; struct uio *uio; struct inode *ip; struct m_ext2fs *fs; struct buf *bp; daddr_t lbn; off_t osize; int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; ioflag = ap->a_ioflag; uio = ap->a_uio; vp = ap->a_vp; seqcount = ioflag >> IO_SEQSHIFT; ip = VTOI(vp); #ifdef INVARIANTS if (uio->uio_rw != UIO_WRITE) panic("%s: mode", "ext2_write"); #endif switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = ip->i_size; if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) return (EPERM); /* FALLTHROUGH */ case VLNK: break; case VDIR: /* XXX differs from ffs -- this is called from ext2_mkdir(). */ if ((ioflag & IO_SYNC) == 0) panic("ext2_write: nonsync dir write"); break; default: panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, vp->v_type, (intmax_t)uio->uio_offset, (intmax_t)uio->uio_resid); } KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); fs = ip->i_e2fs; if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) return (EFBIG); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); resid = uio->uio_resid; osize = ip->i_size; if (seqcount > BA_SEQMAX) flags = BA_SEQMAX << BA_SEQSHIFT; else flags = seqcount << BA_SEQSHIFT; if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) flags |= IO_SYNC; for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_fsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (uio->uio_offset + xfersize > ip->i_size) vnode_pager_setsize(vp, uio->uio_offset + xfersize); /* * We must perform a read-before-write if the transfer size * does not cover the entire buffer. */ if (fs->e2fs_bsize > xfersize) flags |= BA_CLRBUF; else flags &= ~BA_CLRBUF; error = ext2_balloc(ip, lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); if (error != 0) break; if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) bp->b_flags |= B_NOCACHE; if (uio->uio_offset + xfersize > ip->i_size) ip->i_size = uio->uio_offset + xfersize; size = blksize(fs, ip, lbn) - bp->b_resid; if (size < xfersize) xfersize = size; error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); /* * If the buffer is not already filled and we encounter an * error while trying to fill it, we have to clear out any * garbage data from the pages instantiated for the buffer. * If we do not, a failed uiomove() during a write can leave * the prior contents of the pages exposed to a userland mmap. * * Note that we need only clear buffers with a transfer size * equal to the block size because buffers with a shorter * transfer size were cleared above by the call to ext2_balloc() * with the BA_CLRBUF flag set. * * If the source region for uiomove identically mmaps the * buffer, uiomove() performed the NOP copy, and the buffer * content remains valid because the page fault handler * validated the pages. */ if (error != 0 && (bp->b_flags & B_CACHE) == 0 && fs->e2fs_bsize == xfersize) vfs_bio_clrbuf(bp); if (ioflag & (IO_VMIO|IO_DIRECT)) { bp->b_flags |= B_RELBUF; } /* * If IO_SYNC each buffer is written synchronously. Otherwise * if we have a severe page deficiency write the buffer * asynchronously. Otherwise try to cluster, and if that * doesn't do it then either do an async write (if O_DIRECT), * or a delayed write (if not). */ if (ioflag & IO_SYNC) { (void)bwrite(bp); } else if (vm_page_count_severe() || buf_dirty_count_severe() || (ioflag & IO_ASYNC)) { bp->b_flags |= B_CLUSTEROK; bawrite(bp); } else if (xfersize + blkoffset == fs->e2fs_fsize) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; cluster_write(vp, bp, ip->i_size, seqcount, 0); } else { bawrite(bp); } } else if (ioflag & IO_DIRECT) { bp->b_flags |= B_CLUSTEROK; bawrite(bp); } else { bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } if (error || xfersize == 0) break; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ap->a_cred) { if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) ip->i_mode &= ~(ISUID | ISGID); } if (error) { if (ioflag & IO_UNIT) { (void)ext2_truncate(vp, osize, ioflag & IO_SYNC, ap->a_cred, uio->uio_td); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } if (uio->uio_resid != resid) { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (ioflag & IO_SYNC) error = ext2_update(vp, 1); } return (error); } Index: projects/clang380-import/sys/fs/smbfs/smbfs_smb.c =================================================================== --- projects/clang380-import/sys/fs/smbfs/smbfs_smb.c (revision 293686) +++ projects/clang380-import/sys/fs/smbfs/smbfs_smb.c (revision 293687) @@ -1,1472 +1,1472 @@ /*- * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #ifdef USE_MD5_HASH #include #endif #include #include #include #include #include #include #include /* * Lack of inode numbers leads us to the problem of generating them. * Partially this problem can be solved by having a dir/file cache * with inode numbers generated from the incremented by one counter. * However this way will require too much kernel memory, gives all * sorts of locking and consistency problems, not to mentinon counter overflows. * So, I'm decided to use a hash function to generate pseudo random (and unique) * inode numbers. */ static long smbfs_getino(struct smbnode *dnp, const char *name, int nmlen) { #ifdef USE_MD5_HASH MD5_CTX md5; u_int32_t state[4]; long ino; int i; MD5Init(&md5); MD5Update(&md5, name, nmlen); MD5Final((u_char *)state, &md5); for (i = 0, ino = 0; i < 4; i++) ino += state[i]; return dnp->n_ino + ino; #endif u_int32_t ino; ino = dnp->n_ino + smbfs_hash(name, nmlen); if (ino <= 2) ino += 3; return ino; } static int smbfs_smb_lockandx(struct smbnode *np, int op, u_int32_t pid, off_t start, off_t end, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; struct smb_rq *rqp; struct mbchain *mbp; u_char ltype = 0; int error; if (op == SMB_LOCK_SHARED) ltype |= SMB_LOCKING_ANDX_SHARED_LOCK; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_LOCKING_ANDX, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint8(mbp, 0xff); /* secondary command */ mb_put_uint8(mbp, 0); /* MBZ */ mb_put_uint16le(mbp, 0); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint8(mbp, ltype); /* locktype */ mb_put_uint8(mbp, 0); /* oplocklevel - 0 seems is NO_OPLOCK */ mb_put_uint32le(mbp, 0); /* timeout - break immediately */ mb_put_uint16le(mbp, op == SMB_LOCK_RELEASE ? 1 : 0); mb_put_uint16le(mbp, op == SMB_LOCK_RELEASE ? 0 : 1); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint16le(mbp, pid); mb_put_uint32le(mbp, start); mb_put_uint32le(mbp, end - start); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } int smbfs_smb_lock(struct smbnode *np, int op, caddr_t id, off_t start, off_t end, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; if (SMB_DIALECT(SSTOVC(ssp)) < SMB_DIALECT_LANMAN1_0) /* * TODO: use LOCK_BYTE_RANGE here. */ return EINVAL; else return smbfs_smb_lockandx(np, op, (uintptr_t)id, start, end, scred); } static int smbfs_query_info_fs(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { struct smb_t2rq *t2p; struct mbchain *mbp; struct mdchain *mdp; uint32_t bsize, bpu; int64_t units, funits; int error; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_QUERY_FS_INFORMATION, scred, &t2p); if (error) return (error); mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_QUERY_FS_SIZE_INFO); t2p->t2_maxpcount = 2; t2p->t2_maxdcount = sizeof(int64_t) * 2 + sizeof(uint32_t) * 2; error = smb_t2_request(t2p); if (error) { smb_t2_done(t2p); return (error); } mdp = &t2p->t2_rdata; md_get_int64le(mdp, &units); md_get_int64le(mdp, &funits); md_get_uint32le(mdp, &bpu); md_get_uint32le(mdp, &bsize); sbp->f_bsize = bpu * bsize; /* fundamental filesystem block size */ sbp->f_blocks= (uint64_t)units; /* total data blocks in filesystem */ sbp->f_bfree = (uint64_t)funits;/* free blocks in fs */ sbp->f_bavail= (uint64_t)funits;/* free blocks avail to non-superuser */ sbp->f_files = 0xffff; /* total file nodes in filesystem */ sbp->f_ffree = 0xffff; /* free file nodes in fs */ smb_t2_done(t2p); return (0); } static int smbfs_query_info_alloc(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { struct smb_t2rq *t2p; struct mbchain *mbp; struct mdchain *mdp; u_int16_t bsize; u_int32_t units, bpu, funits; int error; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_QUERY_FS_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_INFO_ALLOCATION); t2p->t2_maxpcount = 4; t2p->t2_maxdcount = 4 * 4 + 2; error = smb_t2_request(t2p); if (error) { smb_t2_done(t2p); return error; } mdp = &t2p->t2_rdata; md_get_uint32(mdp, NULL); /* fs id */ md_get_uint32le(mdp, &bpu); md_get_uint32le(mdp, &units); md_get_uint32le(mdp, &funits); md_get_uint16le(mdp, &bsize); sbp->f_bsize = bpu * bsize; /* fundamental filesystem block size */ sbp->f_blocks= units; /* total data blocks in filesystem */ sbp->f_bfree = funits; /* free blocks in fs */ sbp->f_bavail= funits; /* free blocks avail to non-superuser */ sbp->f_files = 0xffff; /* total file nodes in filesystem */ sbp->f_ffree = 0xffff; /* free file nodes in fs */ smb_t2_done(t2p); return 0; } static int smbfs_query_info_disk(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { struct smb_rq *rqp; struct mdchain *mdp; u_int16_t units, bpu, bsize, funits; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_QUERY_INFORMATION_DISK, scred, &rqp); if (error) return (error); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) { smb_rq_done(rqp); return error; } smb_rq_getreply(rqp, &mdp); md_get_uint16le(mdp, &units); md_get_uint16le(mdp, &bpu); md_get_uint16le(mdp, &bsize); md_get_uint16le(mdp, &funits); sbp->f_bsize = bpu * bsize; /* fundamental filesystem block size */ sbp->f_blocks= units; /* total data blocks in filesystem */ sbp->f_bfree = funits; /* free blocks in fs */ sbp->f_bavail= funits; /* free blocks avail to non-superuser */ sbp->f_files = 0xffff; /* total file nodes in filesystem */ sbp->f_ffree = 0xffff; /* free file nodes in fs */ smb_rq_done(rqp); return 0; } int smbfs_smb_statfs(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { if (SMB_DIALECT(SSTOVC(ssp)) >= SMB_DIALECT_LANMAN2_0) { if (smbfs_query_info_fs(ssp, sbp, scred) == 0) return (0); if (smbfs_query_info_alloc(ssp, sbp, scred) == 0) return (0); } return (smbfs_query_info_disk(ssp, sbp, scred)); } static int smbfs_smb_seteof(struct smbnode *np, int64_t newsize, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_FILE_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, SMB_SET_FILE_END_OF_FILE_INFO); mb_put_uint32le(mbp, 0); mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_int64le(mbp, newsize); mb_put_uint32le(mbp, 0); /* padding */ mb_put_uint16le(mbp, 0); t2p->t2_maxpcount = 2; t2p->t2_maxdcount = 0; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } static int smb_smb_flush(struct smbnode *np, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; struct smb_rq *rqp; struct mbchain *mbp; int error; if ((np->n_flag & NOPEN) == 0 || !SMBTOV(np) || SMBTOV(np)->v_type != VREG) return 0; /* not a regular open file */ error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_FLUSH, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); if (!error) np->n_flag &= ~NFLUSHWIRE; return (error); } int smbfs_smb_flush(struct smbnode *np, struct smb_cred *scred) { if (np->n_flag & NFLUSHWIRE) return (smb_smb_flush(np, scred)); return (0); } int -smbfs_smb_setfsize(struct smbnode *np, int newsize, struct smb_cred *scred) +smbfs_smb_setfsize(struct smbnode *np, int64_t newsize, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; struct smb_rq *rqp; struct mbchain *mbp; int error; - if (!smbfs_smb_seteof(np, (int64_t) newsize, scred)) { + if (!smbfs_smb_seteof(np, newsize, scred)) { np->n_flag |= NFLUSHWIRE; return (0); } - + /* XXX: We should use SMB_COM_WRITE_ANDX to support large offsets */ error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_WRITE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, 0); - mb_put_uint32le(mbp, newsize); + mb_put_uint32le(mbp, (uint32_t)newsize); mb_put_uint16le(mbp, 0); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_DATA); mb_put_uint16le(mbp, 0); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } int smbfs_smb_query_info(struct smbnode *np, const char *name, int len, struct smbfattr *fap, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; struct mdchain *mdp; u_int8_t wc; int error; u_int16_t wattr; u_int32_t lint; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_QUERY_INFORMATION, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), np, name, len); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) break; smb_rq_getreply(rqp, &mdp); if (md_get_uint8(mdp, &wc) != 0 || wc != 10) { error = EBADRPC; break; } md_get_uint16le(mdp, &wattr); fap->fa_attr = wattr; /* * Be careful using the time returned here, as * with FAT on NT4SP6, at least, the time returned is low * 32 bits of 100s of nanoseconds (since 1601) so it rolls * over about every seven minutes! */ md_get_uint32le(mdp, &lint); /* specs: secs since 1970 */ if (lint) /* avoid bogus zero returns */ smb_time_server2local(lint, SSTOVC(ssp)->vc_sopt.sv_tz, &fap->fa_mtime); md_get_uint32le(mdp, &lint); fap->fa_size = lint; } while(0); smb_rq_done(rqp); return error; } /* * Set DOS file attributes. mtime should be NULL for dialects above lm10 */ int smbfs_smb_setpattr(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; u_long time; int error, svtz; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_SET_INFORMATION, scred, &rqp); if (error) return (error); svtz = SSTOVC(ssp)->vc_sopt.sv_tz; smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, attr); if (mtime) { smb_time_local2server(mtime, svtz, &time); } else time = 0; mb_put_uint32le(mbp, time); /* mtime */ mb_put_mem(mbp, NULL, 5 * 2, MB_MZERO); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (error) break; mb_put_uint8(mbp, SMB_DT_ASCII); if (SMB_UNICODE_STRINGS(SSTOVC(ssp))) { mb_put_padbyte(mbp); mb_put_uint8(mbp, 0); /* 1st byte of NULL Unicode char */ } mb_put_uint8(mbp, 0); smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) { SMBERROR("smb_rq_simple(rqp) => error %d\n", error); break; } } while(0); smb_rq_done(rqp); return error; } /* * Note, win95 doesn't support this call. */ int smbfs_smb_setptime2(struct smbnode *np, struct timespec *mtime, struct timespec *atime, int attr, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct smb_vc *vcp = SSTOVC(ssp); struct mbchain *mbp; u_int16_t date, time; int error, tzoff; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_PATH_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_INFO_STANDARD); mb_put_uint32le(mbp, 0); /* MBZ */ /* mb_put_uint8(mbp, SMB_DT_ASCII); specs incorrect */ error = smbfs_fullpath(mbp, vcp, np, NULL, 0); if (error) { smb_t2_done(t2p); return error; } tzoff = vcp->vc_sopt.sv_tz; mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_uint32le(mbp, 0); /* creation time */ if (atime) smb_time_unix2dos(atime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); if (mtime) smb_time_unix2dos(mtime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); mb_put_uint32le(mbp, 0); /* file size */ mb_put_uint32le(mbp, 0); /* allocation unit size */ mb_put_uint16le(mbp, attr); /* DOS attr */ mb_put_uint32le(mbp, 0); /* EA size */ t2p->t2_maxpcount = 5 * 2; t2p->t2_maxdcount = vcp->vc_txmax; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } /* * NT level. Specially for win9x */ int smbfs_smb_setpattrNT(struct smbnode *np, u_short attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct smb_vc *vcp = SSTOVC(ssp); struct mbchain *mbp; int64_t tm; int error, tzoff; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_PATH_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_SET_FILE_BASIC_INFO); mb_put_uint32le(mbp, 0); /* MBZ */ /* mb_put_uint8(mbp, SMB_DT_ASCII); specs incorrect */ error = smbfs_fullpath(mbp, vcp, np, NULL, 0); if (error) { smb_t2_done(t2p); return error; } tzoff = vcp->vc_sopt.sv_tz; mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_int64le(mbp, 0); /* creation time */ if (atime) { smb_time_local2NT(atime, tzoff, &tm); } else tm = 0; mb_put_int64le(mbp, tm); if (mtime) { smb_time_local2NT(mtime, tzoff, &tm); } else tm = 0; mb_put_int64le(mbp, tm); mb_put_int64le(mbp, tm); /* change time */ mb_put_uint32le(mbp, attr); /* attr */ t2p->t2_maxpcount = 24; t2p->t2_maxdcount = 56; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } /* * Set file atime and mtime. Doesn't supported by core dialect. */ int smbfs_smb_setftime(struct smbnode *np, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; u_int16_t date, time; int error, tzoff; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_SET_INFORMATION2, scred, &rqp); if (error) return (error); tzoff = SSTOVC(ssp)->vc_sopt.sv_tz; smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint32le(mbp, 0); /* creation time */ if (atime) smb_time_unix2dos(atime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); if (mtime) smb_time_unix2dos(mtime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); SMBSDEBUG("%d\n", error); smb_rq_done(rqp); return error; } /* * Set DOS file attributes. * Looks like this call can be used only if SMB_CAP_NT_SMBS bit is on. */ int smbfs_smb_setfattrNT(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int64_t tm; int error, svtz; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_FILE_INFORMATION, scred, &t2p); if (error) return error; svtz = SSTOVC(ssp)->vc_sopt.sv_tz; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, SMB_SET_FILE_BASIC_INFO); mb_put_uint32le(mbp, 0); mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_int64le(mbp, 0); /* creation time */ if (atime) { smb_time_local2NT(atime, svtz, &tm); } else tm = 0; mb_put_int64le(mbp, tm); if (mtime) { smb_time_local2NT(mtime, svtz, &tm); } else tm = 0; mb_put_int64le(mbp, tm); mb_put_int64le(mbp, tm); /* change time */ mb_put_uint16le(mbp, attr); mb_put_uint32le(mbp, 0); /* padding */ mb_put_uint16le(mbp, 0); t2p->t2_maxpcount = 2; t2p->t2_maxdcount = 0; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } int smbfs_smb_open(struct smbnode *np, int accmode, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; struct mdchain *mdp; u_int8_t wc; u_int16_t fid, wattr, grantedmode; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_OPEN, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, accmode); mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) break; smb_rq_getreply(rqp, &mdp); if (md_get_uint8(mdp, &wc) != 0 || wc != 7) { error = EBADRPC; break; } md_get_uint16(mdp, &fid); md_get_uint16le(mdp, &wattr); md_get_uint32(mdp, NULL); /* mtime */ md_get_uint32(mdp, NULL); /* fsize */ md_get_uint16le(mdp, &grantedmode); /* * TODO: refresh attributes from this reply */ } while(0); smb_rq_done(rqp); if (error) return error; np->n_fid = fid; np->n_rwstate = grantedmode; return 0; } int smbfs_smb_close(struct smb_share *ssp, u_int16_t fid, struct timespec *mtime, struct smb_cred *scred) { struct smb_rq *rqp; struct mbchain *mbp; u_long time; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CLOSE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&fid, sizeof(fid), MB_MSYSTEM); if (mtime) { smb_time_local2server(mtime, SSTOVC(ssp)->vc_sopt.sv_tz, &time); } else time = 0; mb_put_uint32le(mbp, time); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } int smbfs_smb_create(struct smbnode *dnp, const char *name, int nmlen, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = dnp->n_mount->sm_share; struct mbchain *mbp; struct mdchain *mdp; struct timespec ctime; u_int8_t wc; u_int16_t fid; u_long tm; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CREATE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_FA_ARCHIVE); /* attributes */ nanotime(&ctime); smb_time_local2server(&ctime, SSTOVC(ssp)->vc_sopt.sv_tz, &tm); mb_put_uint32le(mbp, tm); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), dnp, name, nmlen); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (!error) { smb_rq_getreply(rqp, &mdp); md_get_uint8(mdp, &wc); if (wc == 1) md_get_uint16(mdp, &fid); else error = EBADRPC; } } smb_rq_done(rqp); if (error) return error; smbfs_smb_close(ssp, fid, &ctime, scred); return error; } int smbfs_smb_delete(struct smbnode *np, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_DELETE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); } smb_rq_done(rqp); return error; } int smbfs_smb_rename(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = src->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_RENAME, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), src, NULL, 0); if (error) break; mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), tdnp, tname, tnmlen); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); } while(0); smb_rq_done(rqp); return error; } int smbfs_smb_move(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, u_int16_t flags, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = src->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_MOVE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_TID_UNKNOWN); mb_put_uint16le(mbp, 0x20); /* delete target file */ mb_put_uint16le(mbp, flags); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), src, NULL, 0); if (error) break; mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), tdnp, tname, tnmlen); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); } while(0); smb_rq_done(rqp); return error; } int smbfs_smb_mkdir(struct smbnode *dnp, const char *name, int len, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = dnp->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CREATE_DIRECTORY, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), dnp, name, len); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); } smb_rq_done(rqp); return error; } int smbfs_smb_rmdir(struct smbnode *np, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_DELETE_DIRECTORY, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); } smb_rq_done(rqp); return error; } static int smbfs_smb_search(struct smbfs_fctx *ctx) { struct smb_vc *vcp = SSTOVC(ctx->f_ssp); struct smb_rq *rqp; struct mbchain *mbp; struct mdchain *mdp; u_int8_t wc, bt; u_int16_t ec, dlen, bc; int maxent, error, iseof = 0; maxent = min(ctx->f_left, (vcp->vc_txmax - SMB_HDRLEN - 3) / SMB_DENTRYLEN); if (ctx->f_rq) { smb_rq_done(ctx->f_rq); ctx->f_rq = NULL; } error = smb_rq_alloc(SSTOCP(ctx->f_ssp), SMB_COM_SEARCH, ctx->f_scred, &rqp); if (error) return (error); ctx->f_rq = rqp; smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, maxent); /* max entries to return */ mb_put_uint16le(mbp, ctx->f_attrmask); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); /* buffer format */ if (ctx->f_flags & SMBFS_RDD_FINDFIRST) { error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard, ctx->f_wclen); if (error) return error; mb_put_uint8(mbp, SMB_DT_VARIABLE); mb_put_uint16le(mbp, 0); /* context length */ ctx->f_flags &= ~SMBFS_RDD_FINDFIRST; } else { if (SMB_UNICODE_STRINGS(vcp)) { mb_put_padbyte(mbp); mb_put_uint8(mbp, 0); } mb_put_uint8(mbp, 0); /* file name length */ mb_put_uint8(mbp, SMB_DT_VARIABLE); mb_put_uint16le(mbp, SMB_SKEYLEN); mb_put_mem(mbp, ctx->f_skey, SMB_SKEYLEN, MB_MSYSTEM); } smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) { if (rqp->sr_errclass == ERRDOS && rqp->sr_serror == ERRnofiles) { error = 0; iseof = 1; ctx->f_flags |= SMBFS_RDD_EOF; } else return error; } smb_rq_getreply(rqp, &mdp); md_get_uint8(mdp, &wc); if (wc != 1) return iseof ? ENOENT : EBADRPC; md_get_uint16le(mdp, &ec); if (ec == 0) return ENOENT; ctx->f_ecnt = ec; md_get_uint16le(mdp, &bc); if (bc < 3) return EBADRPC; bc -= 3; md_get_uint8(mdp, &bt); if (bt != SMB_DT_VARIABLE) return EBADRPC; md_get_uint16le(mdp, &dlen); if (dlen != bc || dlen % SMB_DENTRYLEN != 0) return EBADRPC; return 0; } static int smbfs_findopenLM1(struct smbfs_fctx *ctx, struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred) { ctx->f_attrmask = attr; if (wildcard) { if (wclen == 1 && wildcard[0] == '*') { ctx->f_wildcard = "*.*"; ctx->f_wclen = 3; } else { ctx->f_wildcard = wildcard; ctx->f_wclen = wclen; } } else { ctx->f_wildcard = NULL; ctx->f_wclen = 0; } ctx->f_name = ctx->f_fname; return 0; } static int smbfs_findnextLM1(struct smbfs_fctx *ctx, int limit) { struct mdchain *mbp; struct smb_rq *rqp; char *cp; u_int8_t battr; u_int16_t date, time; u_int32_t size; int error; if (ctx->f_ecnt == 0) { if (ctx->f_flags & SMBFS_RDD_EOF) return ENOENT; ctx->f_left = ctx->f_limit = limit; error = smbfs_smb_search(ctx); if (error) return error; } rqp = ctx->f_rq; smb_rq_getreply(rqp, &mbp); md_get_mem(mbp, ctx->f_skey, SMB_SKEYLEN, MB_MSYSTEM); md_get_uint8(mbp, &battr); md_get_uint16le(mbp, &time); md_get_uint16le(mbp, &date); md_get_uint32le(mbp, &size); cp = ctx->f_name; md_get_mem(mbp, cp, sizeof(ctx->f_fname), MB_MSYSTEM); cp[sizeof(ctx->f_fname) - 1] = 0; cp += strlen(cp) - 1; while (*cp == ' ' && cp >= ctx->f_name) *cp-- = 0; ctx->f_attr.fa_attr = battr; smb_dos2unixtime(date, time, 0, rqp->sr_vc->vc_sopt.sv_tz, &ctx->f_attr.fa_mtime); ctx->f_attr.fa_size = size; ctx->f_nmlen = strlen(ctx->f_name); ctx->f_ecnt--; ctx->f_left--; return 0; } static int smbfs_findcloseLM1(struct smbfs_fctx *ctx) { if (ctx->f_rq) smb_rq_done(ctx->f_rq); return 0; } /* * TRANS2_FIND_FIRST2/NEXT2, used for NT LM12 dialect */ static int smbfs_smb_trans2find2(struct smbfs_fctx *ctx) { struct smb_t2rq *t2p; struct smb_vc *vcp = SSTOVC(ctx->f_ssp); struct mbchain *mbp; struct mdchain *mdp; u_int16_t tw, flags; int error; if (ctx->f_t2) { smb_t2_done(ctx->f_t2); ctx->f_t2 = NULL; } ctx->f_flags &= ~SMBFS_RDD_GOTRNAME; flags = 8 | 2; /* | */ if (ctx->f_flags & SMBFS_RDD_FINDSINGLE) { flags |= 1; /* close search after this request */ ctx->f_flags |= SMBFS_RDD_NOCLOSE; } if (ctx->f_flags & SMBFS_RDD_FINDFIRST) { error = smb_t2_alloc(SSTOCP(ctx->f_ssp), SMB_TRANS2_FIND_FIRST2, ctx->f_scred, &t2p); if (error) return error; ctx->f_t2 = t2p; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, ctx->f_attrmask); mb_put_uint16le(mbp, ctx->f_limit); mb_put_uint16le(mbp, flags); mb_put_uint16le(mbp, ctx->f_infolevel); mb_put_uint32le(mbp, 0); error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard, ctx->f_wclen); if (error) return error; } else { error = smb_t2_alloc(SSTOCP(ctx->f_ssp), SMB_TRANS2_FIND_NEXT2, ctx->f_scred, &t2p); if (error) return error; ctx->f_t2 = t2p; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_mem(mbp, (caddr_t)&ctx->f_Sid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, ctx->f_limit); mb_put_uint16le(mbp, ctx->f_infolevel); mb_put_uint32le(mbp, 0); /* resume key */ mb_put_uint16le(mbp, flags); if (ctx->f_rname) mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen + 1, MB_MSYSTEM); else mb_put_uint8(mbp, 0); /* resume file name */ #if 0 struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 200 * 1000; /* 200ms */ if (vcp->vc_flags & SMBC_WIN95) { /* * some implementations suggests to sleep here * for 200ms, due to the bug in the Win95. * I've didn't notice any problem, but put code * for it. */ pause("fix95", tvtohz(&tv)); } #endif } t2p->t2_maxpcount = 5 * 2; t2p->t2_maxdcount = vcp->vc_txmax; error = smb_t2_request(t2p); if (error) return error; mdp = &t2p->t2_rparam; if (ctx->f_flags & SMBFS_RDD_FINDFIRST) { if ((error = md_get_uint16(mdp, &ctx->f_Sid)) != 0) return error; ctx->f_flags &= ~SMBFS_RDD_FINDFIRST; } if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; ctx->f_ecnt = tw; if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; if (tw) ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE; if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; if (ctx->f_ecnt == 0) { ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE; return ENOENT; } ctx->f_rnameofs = tw; mdp = &t2p->t2_rdata; if (mdp->md_top == NULL) { printf("bug: ecnt = %d, but data is NULL (please report)\n", ctx->f_ecnt); return ENOENT; } if (mdp->md_top->m_len == 0) { printf("bug: ecnt = %d, but m_len = 0 and m_next = %p (please report)\n", ctx->f_ecnt,mbp->mb_top->m_next); return ENOENT; } ctx->f_eofs = 0; return 0; } static int smbfs_smb_findclose2(struct smbfs_fctx *ctx) { struct smb_rq *rqp; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ctx->f_ssp), SMB_COM_FIND_CLOSE2, ctx->f_scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&ctx->f_Sid, 2, MB_MSYSTEM); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } static int smbfs_findopenLM2(struct smbfs_fctx *ctx, struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred) { if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { ctx->f_name = malloc(SMB_MAXFNAMELEN * 2, M_SMBFSDATA, M_WAITOK); } else ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK); ctx->f_infolevel = SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_NTLM0_12 ? SMB_INFO_STANDARD : SMB_FIND_FILE_DIRECTORY_INFO; ctx->f_attrmask = attr; ctx->f_wildcard = wildcard; ctx->f_wclen = wclen; return 0; } static int smbfs_findnextLM2(struct smbfs_fctx *ctx, int limit) { struct mdchain *mbp; struct smb_t2rq *t2p; char *cp; u_int8_t tb; u_int16_t date, time, wattr; u_int32_t size, next, dattr; int64_t lint; int error, svtz, cnt, fxsz, nmlen, recsz; if (ctx->f_ecnt == 0) { if (ctx->f_flags & SMBFS_RDD_EOF) return ENOENT; ctx->f_left = ctx->f_limit = limit; error = smbfs_smb_trans2find2(ctx); if (error) return error; } t2p = ctx->f_t2; mbp = &t2p->t2_rdata; svtz = SSTOVC(ctx->f_ssp)->vc_sopt.sv_tz; switch (ctx->f_infolevel) { case SMB_INFO_STANDARD: next = 0; fxsz = 0; md_get_uint16le(mbp, &date); md_get_uint16le(mbp, &time); /* creation time */ md_get_uint16le(mbp, &date); md_get_uint16le(mbp, &time); /* access time */ smb_dos2unixtime(date, time, 0, svtz, &ctx->f_attr.fa_atime); md_get_uint16le(mbp, &date); md_get_uint16le(mbp, &time); /* access time */ smb_dos2unixtime(date, time, 0, svtz, &ctx->f_attr.fa_mtime); md_get_uint32le(mbp, &size); ctx->f_attr.fa_size = size; md_get_uint32(mbp, NULL); /* allocation size */ md_get_uint16le(mbp, &wattr); ctx->f_attr.fa_attr = wattr; md_get_uint8(mbp, &tb); size = nmlen = tb; fxsz = 23; recsz = next = 24 + nmlen; /* docs misses zero byte at end */ break; case SMB_FIND_FILE_DIRECTORY_INFO: md_get_uint32le(mbp, &next); md_get_uint32(mbp, NULL); /* file index */ md_get_int64(mbp, NULL); /* creation time */ md_get_int64le(mbp, &lint); smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_atime); md_get_int64le(mbp, &lint); smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_mtime); md_get_int64le(mbp, &lint); smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_ctime); md_get_int64le(mbp, &lint); /* file size */ ctx->f_attr.fa_size = lint; md_get_int64(mbp, NULL); /* real size (should use) */ md_get_uint32le(mbp, &dattr); /* EA */ ctx->f_attr.fa_attr = dattr; md_get_uint32le(mbp, &size); /* name len */ fxsz = 64; recsz = next ? next : fxsz + size; break; default: SMBERROR("unexpected info level %d\n", ctx->f_infolevel); return EINVAL; } if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { nmlen = min(size, SMB_MAXFNAMELEN * 2); } else nmlen = min(size, SMB_MAXFNAMELEN); cp = ctx->f_name; error = md_get_mem(mbp, cp, nmlen, MB_MSYSTEM); if (error) return error; if (next) { cnt = next - nmlen - fxsz; if (cnt > 0) md_get_mem(mbp, NULL, cnt, MB_MSYSTEM); else if (cnt < 0) { SMBERROR("out of sync\n"); return EBADRPC; } } if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { if (nmlen > 1 && cp[nmlen - 1] == 0 && cp[nmlen - 2] == 0) nmlen -= 2; } else if (nmlen && cp[nmlen - 1] == 0) nmlen--; if (nmlen == 0) return EBADRPC; next = ctx->f_eofs + recsz; if (ctx->f_rnameofs && (ctx->f_flags & SMBFS_RDD_GOTRNAME) == 0 && (ctx->f_rnameofs >= ctx->f_eofs && ctx->f_rnameofs < next)) { /* * Server needs a resume filename. */ if (ctx->f_rnamelen <= nmlen) { if (ctx->f_rname) free(ctx->f_rname, M_SMBFSDATA); ctx->f_rname = malloc(nmlen + 1, M_SMBFSDATA, M_WAITOK); ctx->f_rnamelen = nmlen; } bcopy(ctx->f_name, ctx->f_rname, nmlen); ctx->f_rname[nmlen] = 0; ctx->f_flags |= SMBFS_RDD_GOTRNAME; } ctx->f_nmlen = nmlen; ctx->f_eofs = next; ctx->f_ecnt--; ctx->f_left--; return 0; } static int smbfs_findcloseLM2(struct smbfs_fctx *ctx) { if (ctx->f_name) free(ctx->f_name, M_SMBFSDATA); if (ctx->f_t2) smb_t2_done(ctx->f_t2); if ((ctx->f_flags & SMBFS_RDD_NOCLOSE) == 0) smbfs_smb_findclose2(ctx); return 0; } int smbfs_findopen(struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred, struct smbfs_fctx **ctxpp) { struct smbfs_fctx *ctx; int error; ctx = malloc(sizeof(*ctx), M_SMBFSDATA, M_WAITOK | M_ZERO); ctx->f_ssp = dnp->n_mount->sm_share; ctx->f_dnp = dnp; ctx->f_flags = SMBFS_RDD_FINDFIRST; ctx->f_scred = scred; if (SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_LANMAN2_0 || (dnp->n_mount->sm_flags & SMBFS_MOUNT_NO_LONG)) { ctx->f_flags |= SMBFS_RDD_USESEARCH; error = smbfs_findopenLM1(ctx, dnp, wildcard, wclen, attr, scred); } else error = smbfs_findopenLM2(ctx, dnp, wildcard, wclen, attr, scred); if (error) smbfs_findclose(ctx, scred); else *ctxpp = ctx; return error; } int smbfs_findnext(struct smbfs_fctx *ctx, int limit, struct smb_cred *scred) { int error; if (limit == 0) limit = 1000000; else if (limit > 1) limit *= 4; /* imperical */ ctx->f_scred = scred; for (;;) { if (ctx->f_flags & SMBFS_RDD_USESEARCH) { error = smbfs_findnextLM1(ctx, limit); } else error = smbfs_findnextLM2(ctx, limit); if (error) return error; if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { if ((ctx->f_nmlen == 2 && *(u_int16_t *)ctx->f_name == htole16(0x002e)) || (ctx->f_nmlen == 4 && *(u_int32_t *)ctx->f_name == htole32(0x002e002e))) continue; } else if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') || (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' && ctx->f_name[1] == '.')) continue; break; } smbfs_fname_tolocal(SSTOVC(ctx->f_ssp), ctx->f_name, &ctx->f_nmlen, ctx->f_dnp->n_mount->sm_caseopt); ctx->f_attr.fa_ino = smbfs_getino(ctx->f_dnp, ctx->f_name, ctx->f_nmlen); return 0; } int smbfs_findclose(struct smbfs_fctx *ctx, struct smb_cred *scred) { ctx->f_scred = scred; if (ctx->f_flags & SMBFS_RDD_USESEARCH) { smbfs_findcloseLM1(ctx); } else smbfs_findcloseLM2(ctx); if (ctx->f_rname) free(ctx->f_rname, M_SMBFSDATA); free(ctx, M_SMBFSDATA); return 0; } int smbfs_smb_lookup(struct smbnode *dnp, const char *name, int nmlen, struct smbfattr *fap, struct smb_cred *scred) { struct smbfs_fctx *ctx; int error; if (dnp == NULL || (dnp->n_ino == 2 && name == NULL)) { bzero(fap, sizeof(*fap)); fap->fa_attr = SMB_FA_DIR; fap->fa_ino = 2; return 0; } MPASS(!(nmlen == 2 && name[0] == '.' && name[1] == '.')); MPASS(!(nmlen == 1 && name[0] == '.')); ASSERT_VOP_ELOCKED(dnp->n_vnode, "smbfs_smb_lookup"); error = smbfs_findopen(dnp, name, nmlen, SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR, scred, &ctx); if (error) return error; ctx->f_flags |= SMBFS_RDD_FINDSINGLE; error = smbfs_findnext(ctx, 1, scred); if (error == 0) { *fap = ctx->f_attr; if (name == NULL) fap->fa_ino = dnp->n_ino; } smbfs_findclose(ctx, scred); return error; } Index: projects/clang380-import/sys/fs/smbfs/smbfs_subr.h =================================================================== --- projects/clang380-import/sys/fs/smbfs/smbfs_subr.h (revision 293686) +++ projects/clang380-import/sys/fs/smbfs/smbfs_subr.h (revision 293687) @@ -1,182 +1,183 @@ /*- * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_SMBFS_SMBFS_SUBR_H_ #define _FS_SMBFS_SMBFS_SUBR_H_ #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_SMBFSDATA); MALLOC_DECLARE(M_SMBFSCRED); #endif #define SMBFSERR(format, args...) printf("%s: "format, __func__ ,## args) #ifdef SMB_VNODE_DEBUG #define SMBVDEBUG(format, args...) printf("%s: "format, __func__ ,## args) #else #define SMBVDEBUG(format, args...) #endif /* * Possible lock commands */ #define SMB_LOCK_EXCL 0 #define SMB_LOCK_SHARED 1 #define SMB_LOCK_RELEASE 2 struct smbmount; struct proc; struct timespec; struct ucred; struct vattr; struct vnode; struct statfs; struct smbfattr { int fa_attr; int64_t fa_size; struct timespec fa_atime; struct timespec fa_ctime; struct timespec fa_mtime; long fa_ino; }; /* * Context to perform findfirst/findnext/findclose operations */ #define SMBFS_RDD_FINDFIRST 0x01 #define SMBFS_RDD_EOF 0x02 #define SMBFS_RDD_FINDSINGLE 0x04 #define SMBFS_RDD_USESEARCH 0x08 #define SMBFS_RDD_NOCLOSE 0x10 #define SMBFS_RDD_GOTRNAME 0x1000 /* * Search context supplied by server */ #define SMB_SKEYLEN 21 /* search context */ #define SMB_DENTRYLEN (SMB_SKEYLEN + 22) /* entire entry */ struct smbfs_fctx { /* * Setable values */ int f_flags; /* SMBFS_RDD_ */ /* * Return values */ struct smbfattr f_attr; /* current attributes */ char * f_name; /* current file name */ int f_nmlen; /* name len */ /* * Internal variables */ int f_limit; /* maximum number of entries */ int f_attrmask; /* SMB_FA_ */ int f_wclen; const char * f_wildcard; struct smbnode* f_dnp; struct smb_cred*f_scred; struct smb_share *f_ssp; union { struct smb_rq * uf_rq; struct smb_t2rq * uf_t2; } f_urq; int f_left; /* entries left */ int f_ecnt; /* entries left in the current reponse */ int f_eofs; /* entry offset in the parameter block */ u_char f_skey[SMB_SKEYLEN]; /* server side search context */ u_char f_fname[8 + 1 + 3 + 1]; /* common case for 8.3 filenames */ u_int16_t f_Sid; u_int16_t f_infolevel; int f_rnamelen; char * f_rname; /* resume name/key */ int f_rnameofs; }; #define f_rq f_urq.uf_rq #define f_t2 f_urq.uf_t2 /* * smb level */ int smbfs_smb_lock(struct smbnode *np, int op, caddr_t id, off_t start, off_t end, struct smb_cred *scred); int smbfs_smb_statfs(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred); -int smbfs_smb_setfsize(struct smbnode *np, int newsize, struct smb_cred *scred); +int smbfs_smb_setfsize(struct smbnode *np, int64_t newsize, + struct smb_cred *scred); int smbfs_smb_query_info(struct smbnode *np, const char *name, int len, struct smbfattr *fap, struct smb_cred *scred); int smbfs_smb_setpattr(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct smb_cred *scred); int smbfs_smb_setptime2(struct smbnode *np, struct timespec *mtime, struct timespec *atime, int attr, struct smb_cred *scred); int smbfs_smb_setpattrNT(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred); int smbfs_smb_setftime(struct smbnode *np, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred); int smbfs_smb_setfattrNT(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred); int smbfs_smb_open(struct smbnode *np, int accmode, struct smb_cred *scred); int smbfs_smb_close(struct smb_share *ssp, u_int16_t fid, struct timespec *mtime, struct smb_cred *scred); int smbfs_smb_create(struct smbnode *dnp, const char *name, int len, struct smb_cred *scred); int smbfs_smb_delete(struct smbnode *np, struct smb_cred *scred); int smbfs_smb_flush(struct smbnode *np, struct smb_cred *scred); int smbfs_smb_rename(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, struct smb_cred *scred); int smbfs_smb_move(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, u_int16_t flags, struct smb_cred *scred); int smbfs_smb_mkdir(struct smbnode *dnp, const char *name, int len, struct smb_cred *scred); int smbfs_smb_rmdir(struct smbnode *np, struct smb_cred *scred); int smbfs_findopen(struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred, struct smbfs_fctx **ctxpp); int smbfs_findnext(struct smbfs_fctx *ctx, int limit, struct smb_cred *scred); int smbfs_findclose(struct smbfs_fctx *ctx, struct smb_cred *scred); int smbfs_fullpath(struct mbchain *mbp, struct smb_vc *vcp, struct smbnode *dnp, const char *name, int nmlen); int smbfs_smb_lookup(struct smbnode *dnp, const char *name, int nmlen, struct smbfattr *fap, struct smb_cred *scred); int smbfs_fname_tolocal(struct smb_vc *vcp, char *name, int *nmlen, int caseopt); void smb_time_local2server(struct timespec *tsp, int tzoff, u_long *seconds); void smb_time_server2local(u_long seconds, int tzoff, struct timespec *tsp); void smb_time_NT2local(int64_t nsec, int tzoff, struct timespec *tsp); void smb_time_local2NT(struct timespec *tsp, int tzoff, int64_t *nsec); void smb_time_unix2dos(struct timespec *tsp, int tzoff, u_int16_t *ddp, u_int16_t *dtp, u_int8_t *dhp); void smb_dos2unixtime (u_int dd, u_int dt, u_int dh, int tzoff, struct timespec *tsp); void *smbfs_malloc_scred(void); void smbfs_free_scred(void *); #endif /* !_FS_SMBFS_SMBFS_SUBR_H_ */ Index: projects/clang380-import/sys/fs/smbfs/smbfs_vnops.c =================================================================== --- projects/clang380-import/sys/fs/smbfs/smbfs_vnops.c (revision 293686) +++ projects/clang380-import/sys/fs/smbfs/smbfs_vnops.c (revision 293687) @@ -1,1373 +1,1374 @@ /*- * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Prototypes for SMBFS vnode operations */ static vop_create_t smbfs_create; static vop_mknod_t smbfs_mknod; static vop_open_t smbfs_open; static vop_close_t smbfs_close; static vop_access_t smbfs_access; static vop_getattr_t smbfs_getattr; static vop_setattr_t smbfs_setattr; static vop_read_t smbfs_read; static vop_write_t smbfs_write; static vop_fsync_t smbfs_fsync; static vop_remove_t smbfs_remove; static vop_link_t smbfs_link; static vop_lookup_t smbfs_lookup; static vop_rename_t smbfs_rename; static vop_mkdir_t smbfs_mkdir; static vop_rmdir_t smbfs_rmdir; static vop_symlink_t smbfs_symlink; static vop_readdir_t smbfs_readdir; static vop_strategy_t smbfs_strategy; static vop_print_t smbfs_print; static vop_pathconf_t smbfs_pathconf; static vop_advlock_t smbfs_advlock; static vop_getextattr_t smbfs_getextattr; struct vop_vector smbfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = smbfs_access, .vop_advlock = smbfs_advlock, .vop_close = smbfs_close, .vop_create = smbfs_create, .vop_fsync = smbfs_fsync, .vop_getattr = smbfs_getattr, .vop_getextattr = smbfs_getextattr, .vop_getpages = smbfs_getpages, .vop_inactive = smbfs_inactive, .vop_ioctl = smbfs_ioctl, .vop_link = smbfs_link, .vop_lookup = smbfs_lookup, .vop_mkdir = smbfs_mkdir, .vop_mknod = smbfs_mknod, .vop_open = smbfs_open, .vop_pathconf = smbfs_pathconf, .vop_print = smbfs_print, .vop_putpages = smbfs_putpages, .vop_read = smbfs_read, .vop_readdir = smbfs_readdir, .vop_reclaim = smbfs_reclaim, .vop_remove = smbfs_remove, .vop_rename = smbfs_rename, .vop_rmdir = smbfs_rmdir, .vop_setattr = smbfs_setattr, /* .vop_setextattr = smbfs_setextattr,*/ .vop_strategy = smbfs_strategy, .vop_symlink = smbfs_symlink, .vop_write = smbfs_write, }; static int smbfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; accmode_t a_accmode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; mode_t mpmode; struct smbmount *smp = VTOSMBFS(vp); SMBVDEBUG("\n"); if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return EROFS; default: break; } } mpmode = vp->v_type == VREG ? smp->sm_file_mode : smp->sm_dir_mode; return (vaccess(vp->v_type, mpmode, smp->sm_uid, smp->sm_gid, ap->a_accmode, ap->a_cred, NULL)); } /* ARGSUSED */ static int smbfs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; struct vattr vattr; int mode = ap->a_mode; int error, accmode; SMBVDEBUG("%s,%d\n", np->n_name, (np->n_flag & NOPEN) != 0); if (vp->v_type != VREG && vp->v_type != VDIR) { SMBFSERR("open eacces vtype=%d\n", vp->v_type); return EACCES; } if (vp->v_type == VDIR) { np->n_flag |= NOPEN; return 0; } if (np->n_flag & NMODIFIED) { if ((error = smbfs_vinvalbuf(vp, ap->a_td)) == EINTR) return error; smbfs_attr_cacheremove(vp); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return error; np->n_mtime.tv_sec = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return error; if (np->n_mtime.tv_sec != vattr.va_mtime.tv_sec) { error = smbfs_vinvalbuf(vp, ap->a_td); if (error == EINTR) return error; np->n_mtime.tv_sec = vattr.va_mtime.tv_sec; } } if ((np->n_flag & NOPEN) != 0) return 0; /* * Use DENYNONE to give unixy semantics of permitting * everything not forbidden by permissions. Ie denial * is up to server with clients/openers needing to use * advisory locks for further control. */ accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) accmode = SMB_SM_DENYNONE|SMB_AM_OPENRW; scred = smbfs_malloc_scred(); smb_makescred(scred, ap->a_td, ap->a_cred); error = smbfs_smb_open(np, accmode, scred); if (error) { if (mode & FWRITE) return EACCES; else if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD; error = smbfs_smb_open(np, accmode, scred); } } if (error == 0) { np->n_flag |= NOPEN; vnode_create_vobject(ap->a_vp, vattr.va_size, ap->a_td); } smbfs_attr_cacheremove(vp); smbfs_free_scred(scred); return error; } static int smbfs_close(ap) struct vop_close_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; if (vp->v_type == VDIR && (np->n_flag & NOPEN) != 0 && np->n_dirseq != NULL) { scred = smbfs_malloc_scred(); smb_makescred(scred, td, ap->a_cred); smbfs_findclose(np->n_dirseq, scred); smbfs_free_scred(scred); np->n_dirseq = NULL; } return 0; } /* * smbfs_getattr call from vfs. */ static int smbfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct vattr *va=ap->a_vap; struct smbfattr fattr; struct smb_cred *scred; u_quad_t oldsize; int error; SMBVDEBUG("%lx: '%s' %d\n", (long)vp, np->n_name, (vp->v_vflag & VV_ROOT) != 0); error = smbfs_attr_cachelookup(vp, va); if (!error) return 0; SMBVDEBUG("not in the cache\n"); scred = smbfs_malloc_scred(); smb_makescred(scred, curthread, ap->a_cred); oldsize = np->n_size; error = smbfs_smb_lookup(np, NULL, 0, &fattr, scred); if (error) { SMBVDEBUG("error %d\n", error); smbfs_free_scred(scred); return error; } smbfs_attr_cacheenter(vp, &fattr); smbfs_attr_cachelookup(vp, va); if (np->n_flag & NOPEN) np->n_size = oldsize; smbfs_free_scred(scred); return 0; } static int smbfs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct vattr *vap = ap->a_vap; struct timespec *mtime, *atime; struct smb_cred *scred; struct smb_share *ssp = np->n_mount->sm_share; struct smb_vc *vcp = SSTOVC(ssp); struct thread *td = curthread; u_quad_t tsize = 0; int isreadonly, doclose, error = 0; int old_n_dosattr; SMBVDEBUG("\n"); isreadonly = (vp->v_mount->mnt_flag & MNT_RDONLY); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL || vap->va_flags != VNOVAL) && isreadonly) return EROFS; /* * We only support setting four flags. Don't allow setting others. * * We map UF_READONLY to SMB_FA_RDONLY, unlike the MacOS X version * of this code, which maps both UF_IMMUTABLE AND SF_IMMUTABLE to * SMB_FA_RDONLY. The immutable flags have different semantics * than readonly, which is the reason for the difference. */ if (vap->va_flags != VNOVAL) { if (vap->va_flags & ~(UF_HIDDEN|UF_SYSTEM|UF_ARCHIVE| UF_READONLY)) return EINVAL; } scred = smbfs_malloc_scred(); smb_makescred(scred, td, ap->a_cred); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: error = EISDIR; goto out; case VREG: break; default: error = EINVAL; goto out; }; if (isreadonly) { error = EROFS; goto out; } doclose = 0; vnode_pager_setsize(vp, (u_long)vap->va_size); tsize = np->n_size; np->n_size = vap->va_size; if ((np->n_flag & NOPEN) == 0) { error = smbfs_smb_open(np, SMB_SM_DENYNONE|SMB_AM_OPENRW, scred); if (error == 0) doclose = 1; } if (error == 0) - error = smbfs_smb_setfsize(np, vap->va_size, scred); + error = smbfs_smb_setfsize(np, + (int64_t)vap->va_size, scred); if (doclose) smbfs_smb_close(ssp, np->n_fid, NULL, scred); if (error) { np->n_size = tsize; vnode_pager_setsize(vp, (u_long)tsize); goto out; } } if ((vap->va_flags != VNOVAL) || (vap->va_mode != (mode_t)VNOVAL)) { old_n_dosattr = np->n_dosattr; if (vap->va_mode != (mode_t)VNOVAL) { if (vap->va_mode & S_IWUSR) np->n_dosattr &= ~SMB_FA_RDONLY; else np->n_dosattr |= SMB_FA_RDONLY; } if (vap->va_flags != VNOVAL) { if (vap->va_flags & UF_HIDDEN) np->n_dosattr |= SMB_FA_HIDDEN; else np->n_dosattr &= ~SMB_FA_HIDDEN; if (vap->va_flags & UF_SYSTEM) np->n_dosattr |= SMB_FA_SYSTEM; else np->n_dosattr &= ~SMB_FA_SYSTEM; if (vap->va_flags & UF_ARCHIVE) np->n_dosattr |= SMB_FA_ARCHIVE; else np->n_dosattr &= ~SMB_FA_ARCHIVE; /* * We only support setting the immutable / readonly * bit for regular files. According to comments in * the MacOS X version of this code, supporting the * readonly bit on directories doesn't do the same * thing in Windows as in Unix. */ if (vp->v_type == VREG) { if (vap->va_flags & UF_READONLY) np->n_dosattr |= SMB_FA_RDONLY; else np->n_dosattr &= ~SMB_FA_RDONLY; } } if (np->n_dosattr != old_n_dosattr) { error = smbfs_smb_setpattr(np, np->n_dosattr, NULL, scred); if (error) goto out; } } mtime = atime = NULL; if (vap->va_mtime.tv_sec != VNOVAL) mtime = &vap->va_mtime; if (vap->va_atime.tv_sec != VNOVAL) atime = &vap->va_atime; if (mtime != atime) { if (vap->va_vaflags & VA_UTIMES_NULL) { error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td); if (error) error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td); } else error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td); #if 0 if (mtime == NULL) mtime = &np->n_mtime; if (atime == NULL) atime = &np->n_atime; #endif /* * If file is opened, then we can use handle based calls. * If not, use path based ones. */ if ((np->n_flag & NOPEN) == 0) { if (vcp->vc_flags & SMBV_WIN95) { error = VOP_OPEN(vp, FWRITE, ap->a_cred, td, NULL); if (!error) { /* error = smbfs_smb_setfattrNT(np, 0, mtime, atime, scred); VOP_GETATTR(vp, &vattr, ap->a_cred); */ if (mtime) np->n_mtime = *mtime; VOP_CLOSE(vp, FWRITE, ap->a_cred, td); } } else if ((vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS)) { error = smbfs_smb_setptime2(np, mtime, atime, 0, scred); /* error = smbfs_smb_setpattrNT(np, 0, mtime, atime, scred);*/ } else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) { error = smbfs_smb_setptime2(np, mtime, atime, 0, scred); } else { error = smbfs_smb_setpattr(np, 0, mtime, scred); } } else { if (vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS) { error = smbfs_smb_setfattrNT(np, 0, mtime, atime, scred); } else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN1_0) { error = smbfs_smb_setftime(np, mtime, atime, scred); } else { /* * I have no idea how to handle this for core * level servers. The possible solution is to * update mtime after file is closed. */ SMBERROR("can't update times on an opened file\n"); } } } /* * Invalidate attribute cache in case if server doesn't set * required attributes. */ smbfs_attr_cacheremove(vp); /* invalidate cache */ VOP_GETATTR(vp, vap, ap->a_cred); np->n_mtime.tv_sec = vap->va_mtime.tv_sec; out: smbfs_free_scred(scred); return error; } /* * smbfs_read call. */ static int smbfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; SMBVDEBUG("\n"); if (vp->v_type != VREG && vp->v_type != VDIR) return EPERM; return smbfs_readvnode(vp, uio, ap->a_cred); } static int smbfs_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; SMBVDEBUG("%d,ofs=%jd,sz=%zd\n",vp->v_type, (intmax_t)uio->uio_offset, uio->uio_resid); if (vp->v_type != VREG) return (EPERM); return smbfs_writevnode(vp, uio, ap->a_cred,ap->a_ioflag); } /* * smbfs_create call * Create a regular file. On entry the directory to contain the file being * created is locked. We must release before we return. We must also free * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or * only if the SAVESTART bit in cn_flags is clear on success. */ static int smbfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct vnode **vpp=ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct smbnode *dnp = VTOSMB(dvp); struct vnode *vp; struct vattr vattr; struct smbfattr fattr; struct smb_cred *scred; char *name = cnp->cn_nameptr; int nmlen = cnp->cn_namelen; int error; SMBVDEBUG("\n"); *vpp = NULL; if (vap->va_type != VREG) return EOPNOTSUPP; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return error; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_create(dnp, name, nmlen, scred); if (error) goto out; error = smbfs_smb_lookup(dnp, name, nmlen, &fattr, scred); if (error) goto out; error = smbfs_nget(VTOVFS(dvp), dvp, name, nmlen, &fattr, &vp); if (error) goto out; *vpp = vp; if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, vp, cnp); out: smbfs_free_scred(scred); return error; } static int smbfs_remove(ap) struct vop_remove_args /* { struct vnodeop_desc *a_desc; struct vnode * a_dvp; struct vnode * a_vp; struct componentname * a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; /* struct vnode *dvp = ap->a_dvp;*/ struct componentname *cnp = ap->a_cnp; struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; int error; if (vp->v_type == VDIR || (np->n_flag & NOPEN) != 0 || vrefcnt(vp) != 1) return EPERM; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_delete(np, scred); if (error == 0) np->n_flag |= NGONE; cache_purge(vp); smbfs_free_scred(scred); return error; } /* * smbfs_file rename call */ static int smbfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; /* struct componentname *fcnp = ap->a_fcnp;*/ struct smb_cred *scred; u_int16_t flags = 6; int error=0; scred = NULL; /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (tvp && vrefcnt(tvp) > 1) { error = EBUSY; goto out; } flags = 0x10; /* verify all writes */ if (fvp->v_type == VDIR) { flags |= 2; } else if (fvp->v_type == VREG) { flags |= 1; } else { return EINVAL; } scred = smbfs_malloc_scred(); smb_makescred(scred, tcnp->cn_thread, tcnp->cn_cred); /* * It seems that Samba doesn't implement SMB_COM_MOVE call... */ #ifdef notnow if (SMB_DIALECT(SSTOCN(smp->sm_share)) >= SMB_DIALECT_LANMAN1_0) { error = smbfs_smb_move(VTOSMB(fvp), VTOSMB(tdvp), tcnp->cn_nameptr, tcnp->cn_namelen, flags, scred); } else #endif { /* * We have to do the work atomicaly */ if (tvp && tvp != fvp) { error = smbfs_smb_delete(VTOSMB(tvp), scred); if (error) goto out_cacherem; VTOSMB(fvp)->n_flag |= NGONE; } error = smbfs_smb_rename(VTOSMB(fvp), VTOSMB(tdvp), tcnp->cn_nameptr, tcnp->cn_namelen, scred); } if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out_cacherem: smbfs_attr_cacheremove(fdvp); smbfs_attr_cacheremove(tdvp); out: smbfs_free_scred(scred); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); #ifdef possible_mistake vgone(fvp); if (tvp) vgone(tvp); #endif return error; } /* * somtime it will come true... */ static int smbfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { return EOPNOTSUPP; } /* * smbfs_symlink link create call. * Sometime it will be functional... */ static int smbfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { return EOPNOTSUPP; } static int smbfs_mknod(ap) struct vop_mknod_args /* { } */ *ap; { return EOPNOTSUPP; } static int smbfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *dvp = ap->a_dvp; /* struct vattr *vap = ap->a_vap;*/ struct vnode *vp; struct componentname *cnp = ap->a_cnp; struct smbnode *dnp = VTOSMB(dvp); struct vattr vattr; struct smb_cred *scred; struct smbfattr fattr; char *name = cnp->cn_nameptr; int len = cnp->cn_namelen; int error; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) { return error; } if ((name[0] == '.') && ((len == 1) || ((len == 2) && (name[1] == '.')))) return EEXIST; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_mkdir(dnp, name, len, scred); if (error) goto out; error = smbfs_smb_lookup(dnp, name, len, &fattr, scred); if (error) goto out; error = smbfs_nget(VTOVFS(dvp), dvp, name, len, &fattr, &vp); if (error) goto out; *ap->a_vpp = vp; out: smbfs_free_scred(scred); return error; } /* * smbfs_remove directory call */ static int smbfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; /* struct smbmount *smp = VTOSMBFS(vp);*/ struct smbnode *dnp = VTOSMB(dvp); struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; int error; if (dvp == vp) return EINVAL; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_rmdir(np, scred); if (error == 0) np->n_flag |= NGONE; dnp->n_flag |= NMODIFIED; smbfs_attr_cacheremove(dvp); /* cache_purge(dvp);*/ cache_purge(vp); smbfs_free_scred(scred); return error; } /* * smbfs_readdir call */ static int smbfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; u_long *a_cookies; int a_ncookies; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int error; if (vp->v_type != VDIR) return (EPERM); #ifdef notnow if (ap->a_ncookies) { printf("smbfs_readdir: no support for cookies now..."); return (EOPNOTSUPP); } #endif error = smbfs_readvnode(vp, uio, ap->a_cred); return error; } /* ARGSUSED */ static int smbfs_fsync(ap) struct vop_fsync_args /* { struct vnodeop_desc *a_desc; struct vnode * a_vp; struct ucred * a_cred; int a_waitfor; struct thread * a_td; } */ *ap; { /* return (smb_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_td, 1));*/ return (0); } static int smbfs_print (ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); if (np == NULL) { printf("no smbnode data\n"); return (0); } printf("\tname = %s, parent = %p, open = %d\n", np->n_name, np->n_parent ? np->n_parent : NULL, (np->n_flag & NOPEN) != 0); return (0); } static int smbfs_pathconf (ap) struct vop_pathconf_args /* { struct vnode *vp; int name; register_t *retval; } */ *ap; { struct smbmount *smp = VFSTOSMBFS(VTOVFS(ap->a_vp)); struct smb_vc *vcp = SSTOVC(smp->sm_share); register_t *retval = ap->a_retval; int error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *retval = 0; break; case _PC_NAME_MAX: *retval = (vcp->vc_hflags2 & SMB_FLAGS2_KNOWS_LONG_NAMES) ? 255 : 12; break; case _PC_PATH_MAX: *retval = 800; /* XXX: a correct one ? */ break; default: error = EINVAL; } return error; } static int smbfs_strategy (ap) struct vop_strategy_args /* { struct buf *a_bp } */ *ap; { struct buf *bp=ap->a_bp; struct ucred *cr; struct thread *td; SMBVDEBUG("\n"); if (bp->b_flags & B_ASYNC) td = (struct thread *)0; else td = curthread; /* XXX */ if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; if ((bp->b_flags & B_ASYNC) == 0 ) (void)smbfs_doio(ap->a_vp, bp, cr, td); return (0); } int smbfs_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; u_long a_command; caddr_t a_data; int fflag; struct ucred *cred; struct thread *td; } */ *ap; { return ENOTTY; } static char smbfs_atl[] = "rhsvda"; static int smbfs_getextattr(struct vop_getextattr_args *ap) /* { IN struct vnode *a_vp; IN char *a_name; INOUT struct uio *a_uio; IN struct ucred *a_cred; IN struct thread *a_td; }; */ { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; struct uio *uio = ap->a_uio; const char *name = ap->a_name; struct smbnode *np = VTOSMB(vp); struct vattr vattr; char buf[10]; int i, attr, error; error = VOP_ACCESS(vp, VREAD, cred, td); if (error) return error; error = VOP_GETATTR(vp, &vattr, cred); if (error) return error; if (strcmp(name, "dosattr") == 0) { attr = np->n_dosattr; for (i = 0; i < 6; i++, attr >>= 1) buf[i] = (attr & 1) ? smbfs_atl[i] : '-'; buf[i] = 0; error = uiomove(buf, i, uio); } else error = EINVAL; return error; } /* * Since we expected to support F_GETLK (and SMB protocol has no such function), * it is necessary to use lf_advlock(). It would be nice if this function had * a callback mechanism because it will help to improve a level of consistency. */ int smbfs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct flock *fl = ap->a_fl; caddr_t id = (caddr_t)1 /* ap->a_id */; /* int flags = ap->a_flags;*/ struct thread *td = curthread; struct smb_cred *scred; u_quad_t size; off_t start, end, oadd; int error, lkop; if (vp->v_type == VDIR) { /* * SMB protocol have no support for directory locking. * Although locks can be processed on local machine, I don't * think that this is a good idea, because some programs * can work wrong assuming directory is locked. So, we just * return 'operation not supported */ return EOPNOTSUPP; } size = np->n_size; switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: start = fl->l_start; break; case SEEK_END: if (size > OFF_MAX || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) return EOVERFLOW; start = size + fl->l_start; break; default: return EINVAL; } if (start < 0) return EINVAL; if (fl->l_len < 0) { if (start == 0) return EINVAL; end = start - 1; start += fl->l_len; if (start < 0) return EINVAL; } else if (fl->l_len == 0) end = -1; else { oadd = fl->l_len - 1; if (oadd > OFF_MAX - start) return EOVERFLOW; end = start + oadd; } scred = smbfs_malloc_scred(); smb_makescred(scred, td, td->td_ucred); switch (ap->a_op) { case F_SETLK: switch (fl->l_type) { case F_WRLCK: lkop = SMB_LOCK_EXCL; break; case F_RDLCK: lkop = SMB_LOCK_SHARED; break; case F_UNLCK: lkop = SMB_LOCK_RELEASE; break; default: smbfs_free_scred(scred); return EINVAL; } error = lf_advlock(ap, &vp->v_lockf, size); if (error) break; lkop = SMB_LOCK_EXCL; error = smbfs_smb_lock(np, lkop, id, start, end, scred); if (error) { int oldtype = fl->l_type; fl->l_type = F_UNLCK; ap->a_op = F_UNLCK; lf_advlock(ap, &vp->v_lockf, size); fl->l_type = oldtype; } break; case F_UNLCK: lf_advlock(ap, &vp->v_lockf, size); error = smbfs_smb_lock(np, SMB_LOCK_RELEASE, id, start, end, scred); break; case F_GETLK: error = lf_advlock(ap, &vp->v_lockf, size); break; default: smbfs_free_scred(scred); return EINVAL; } smbfs_free_scred(scred); return error; } static int smbfs_pathcheck(struct smbmount *smp, const char *name, int nmlen, int nameiop) { static const char *badchars = "*/:<>;?"; static const char *badchars83 = " +|,[]="; const char *cp; int i, error; /* * Backslash characters, being a path delimiter, are prohibited * within a path component even for LOOKUP operations. */ if (strchr(name, '\\') != NULL) return ENOENT; if (nameiop == LOOKUP) return 0; error = ENOENT; if (SMB_DIALECT(SSTOVC(smp->sm_share)) < SMB_DIALECT_LANMAN2_0) { /* * Name should conform 8.3 format */ if (nmlen > 12) return ENAMETOOLONG; cp = strchr(name, '.'); if (cp == NULL) return error; if (cp == name || (cp - name) > 8) return error; cp = strchr(cp + 1, '.'); if (cp != NULL) return error; for (cp = name, i = 0; i < nmlen; i++, cp++) if (strchr(badchars83, *cp) != NULL) return error; } for (cp = name, i = 0; i < nmlen; i++, cp++) if (strchr(badchars, *cp) != NULL) return error; return 0; } /* * Things go even weird without fixed inode numbers... */ int smbfs_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct vnode *vp; struct smbmount *smp; struct mount *mp = dvp->v_mount; struct smbnode *dnp; struct smbfattr fattr, *fap; struct smb_cred *scred; char *name = cnp->cn_nameptr; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; int nmlen = cnp->cn_namelen; int error, islastcn, isdot; int killit; SMBVDEBUG("\n"); if (dvp->v_type != VDIR) return ENOTDIR; if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) { SMBFSERR("invalid '..'\n"); return EIO; } islastcn = flags & ISLASTCN; if (islastcn && (mp->mnt_flag & MNT_RDONLY) && (nameiop != LOOKUP)) return EROFS; if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) return error; smp = VFSTOSMBFS(mp); dnp = VTOSMB(dvp); isdot = (nmlen == 1 && name[0] == '.'); error = smbfs_pathcheck(smp, cnp->cn_nameptr, cnp->cn_namelen, nameiop); if (error) return ENOENT; error = cache_lookup(dvp, vpp, cnp, NULL, NULL); SMBVDEBUG("cache_lookup returned %d\n", error); if (error > 0) return error; if (error) { /* name was found */ struct vattr vattr; killit = 0; vp = *vpp; error = VOP_GETATTR(vp, &vattr, cnp->cn_cred); /* * If the file type on the server is inconsistent * with what it was when we created the vnode, * kill the bogus vnode now and fall through to * the code below to create a new one with the * right type. */ if (error == 0 && ((vp->v_type == VDIR && (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) == 0) || (vp->v_type == VREG && (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) != 0))) killit = 1; else if (error == 0 /* && vattr.va_ctime.tv_sec == VTOSMB(vp)->n_ctime*/) { if (nameiop != LOOKUP && islastcn) cnp->cn_flags |= SAVENAME; SMBVDEBUG("use cached vnode\n"); return (0); } cache_purge(vp); /* * XXX This is not quite right, if '.' is * inconsistent, we really need to start the lookup * all over again. Hopefully there is some other * guarantee that prevents this case from happening. */ if (killit && vp != dvp) vgone(vp); if (vp != dvp) vput(vp); else vrele(vp); *vpp = NULLVP; } /* * entry is not in the cache or has been expired */ error = 0; *vpp = NULLVP; scred = smbfs_malloc_scred(); smb_makescred(scred, td, cnp->cn_cred); fap = &fattr; if (flags & ISDOTDOT) { /* * In the DOTDOT case, don't go over-the-wire * in order to request attributes. We already * know it's a directory and subsequent call to * smbfs_getattr() will restore consistency. * */ SMBVDEBUG("smbfs_smb_lookup: dotdot\n"); } else if (isdot) { error = smbfs_smb_lookup(dnp, NULL, 0, fap, scred); SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error); } else { error = smbfs_smb_lookup(dnp, name, nmlen, fap, scred); SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error); } if (error && error != ENOENT) goto out; if (error) { /* entry not found */ /* * Handle RENAME or CREATE case... */ if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) goto out; cnp->cn_flags |= SAVENAME; error = EJUSTRETURN; goto out; } error = ENOENT; goto out; }/* else { SMBVDEBUG("Found entry %s with id=%d\n", fap->entryName, fap->dirEntNum); }*/ /* * handle DELETE case ... */ if (nameiop == DELETE && islastcn) { /* delete last component */ error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) goto out; if (isdot) { VREF(dvp); *vpp = dvp; goto out; } error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp); if (error) goto out; *vpp = vp; cnp->cn_flags |= SAVENAME; goto out; } if (nameiop == RENAME && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) goto out; if (isdot) { error = EISDIR; goto out; } error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp); if (error) goto out; *vpp = vp; cnp->cn_flags |= SAVENAME; goto out; } if (flags & ISDOTDOT) { mp = dvp->v_mount; error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(dvp, 0); error = vfs_busy(mp, 0); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); vfs_rel(mp); if (error) { error = ENOENT; goto out; } if ((dvp->v_iflag & VI_DOOMED) != 0) { vfs_unbusy(mp); error = ENOENT; goto out; } } VOP_UNLOCK(dvp, 0); error = smbfs_nget(mp, dvp, name, nmlen, NULL, &vp); vfs_unbusy(mp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); if ((dvp->v_iflag & VI_DOOMED) != 0) { if (error == 0) vput(vp); error = ENOENT; } if (error) goto out; *vpp = vp; } else if (isdot) { vref(dvp); *vpp = dvp; } else { error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp); if (error) goto out; *vpp = vp; SMBVDEBUG("lookup: getnewvp!\n"); } if ((cnp->cn_flags & MAKEENTRY)/* && !islastcn*/) { /* VTOSMB(*vpp)->n_ctime = VTOSMB(*vpp)->n_vattr.va_ctime.tv_sec;*/ cache_enter(dvp, *vpp, cnp); } out: smbfs_free_scred(scred); return (error); } Index: projects/clang380-import/sys/i386/i386/elf_machdep.c =================================================================== --- projects/clang380-import/sys/i386/i386/elf_machdep.c (revision 293686) +++ projects/clang380-import/sys/i386/i386/elf_machdep.c (revision 293687) @@ -1,288 +1,289 @@ /*- * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); static Elf32_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kfreebsd_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { #ifdef CPU_ENABLE_SSE void *buf; #endif size_t len; len = 0; #ifdef CPU_ENABLE_SSE if (use_xsave) { if (dst != NULL) { npxgetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } #endif *off = len; } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_386_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } return (0); } switch (rtype) { case R_386_NONE: /* none */ break; case R_386_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend; if (*where != addr) *where = addr; break; case R_386_PC32: /* S + A - P */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend - (Elf_Addr)where; if (*where != addr) *where = addr; break; case R_386_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_386_GLOB_DAT: /* S */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; if (*where != addr) *where = addr; break; case R_386_RELATIVE: break; default: printf("kldload: unexpected relocation type %d\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/clang380-import/sys/i386/ibcs2/ibcs2_sysvec.c =================================================================== --- projects/clang380-import/sys/i386/ibcs2/ibcs2_sysvec.c (revision 293686) +++ projects/clang380-import/sys/i386/ibcs2/ibcs2_sysvec.c (revision 293687) @@ -1,137 +1,138 @@ /*- * Copyright (c) 1995 Steven Wallace * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Steven Wallace. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(ibcs2, 1); extern int bsd_to_ibcs2_errno[]; extern struct sysent ibcs2_sysent[IBCS2_SYS_MAXSYSCALL]; static int ibcs2_fixup(register_t **, struct image_params *); struct sysentvec ibcs2_svr3_sysvec = { .sv_size = sizeof (ibcs2_sysent) / sizeof (ibcs2_sysent[0]), .sv_table = ibcs2_sysent, .sv_mask = 0xff, .sv_sigsize = IBCS2_SIGTBLSZ, .sv_sigtbl = bsd_to_ibcs2_sig, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_ibcs2_errno, .sv_transtrap = NULL, .sv_fixup = ibcs2_fixup, .sv_sendsig = sendsig, .sv_sigcode = sigcode, /* use generic trampoline */ .sv_szsigcode = &szsigcode, .sv_prepsyscall = NULL, .sv_name = "IBCS2 COFF", .sv_coredump = NULL, /* we don't have a COFF coredump function */ .sv_imgact_try = NULL, .sv_minsigstksz = IBCS2_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_UNDEF | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static int ibcs2_fixup(register_t **stack_base, struct image_params *imgp) { return (suword(--(*stack_base), imgp->args->argc)); } /* * Create an "ibcs2" module that does nothing but allow checking for * the presence of the subsystem. */ static int ibcs2_modevent(module_t mod, int type, void *unused) { struct proc *p = NULL; int rval = 0; switch(type) { case MOD_LOAD: break; case MOD_UNLOAD: /* if this was an ELF module we'd use elf_brand_inuse()... */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_sysent == &ibcs2_svr3_sysvec) { rval = EBUSY; break; } } sx_sunlock(&allproc_lock); break; default: rval = EOPNOTSUPP; break; } return (rval); } static moduledata_t ibcs2_mod = { "ibcs2", ibcs2_modevent, 0 }; DECLARE_MODULE_TIED(ibcs2, ibcs2_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); Index: projects/clang380-import/sys/i386/linux/linux_sysvec.c =================================================================== --- projects/clang380-import/sys/i386/linux/linux_sysvec.c (revision 293686) +++ projects/clang380-import/sys/i386/linux/linux_sysvec.c (revision 293687) @@ -1,1195 +1,1197 @@ /*- * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif #if defined(DEBUG) SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux debugging control"); #endif /* * Allow the sendsig functions to use the ldebug() facility * even though they are not syscalls themselves. Map them * to syscall 0. This is slightly less bogus than using * ldebug(sigreturn). */ #define LINUX_SYS_linux_rt_sendsig 0 #define LINUX_SYS_linux_sendsig 0 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup(register_t **stack_base, struct image_params *iparams); static int elf_linux_fixup(register_t **stack_base, struct image_params *iparams); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack); static register_t *linux_copyout_strings(struct image_params *imgp); static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static int linux_szplatform; const char *linux_kplatform; static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; static eventhandler_tag linux_thread_dtor_tag; /* * Linux syscalls return negative errno's, we do positive and map them * Reference: * FreeBSD: src/sys/sys/errno.h * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h * linux-2.6.17.8/include/asm-generic/errno.h */ static int bsd_to_linux_errno[ELAST + 1] = { -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, -72, -67, -71 }; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)args->argc + 1); (*stack_base)--; suword(*stack_base, (intptr_t)(void *)envp); (*stack_base)--; suword(*stack_base, (intptr_t)(void *)argv); (*stack_base)--; suword(*stack_base, imgp->args->argc); return (0); } static int elf_linux_fixup(register_t **stack_base, struct image_params *imgp) { struct proc *p; Elf32_Auxargs *args; Elf32_Addr *uplatform; struct ps_strings *arginfo; register_t *pos; KASSERT(curthread->td_proc == imgp->proc, ("unsafe elf_linux_fixup(), should be curproc")); p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); args = (Elf32_Auxargs *)imgp->auxargs; pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; (*stack_base)--; suword(*stack_base, (register_t)imgp->args->argc); return (0); } /* * Copied from kern/kern_exec.c */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; char **vectp; char *stringp, *destp; register_t *stack_base; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* * Calculate string base and vector table pointers. */ p = imgp->proc; if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); /* * install LINUX_PLATFORM */ copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform), linux_szplatform); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - linux_szplatform - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - linux_szplatform - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (LINUX_AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int sig, code; int oonstack; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; frame.sf_siginfo = &fp->sf_si; frame.sf_ucontext = &fp->sf_sc; /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = NULL; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_gs = rgs(); frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), fp, oonstack); #endif PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_rt_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int sig, code; int oonstack; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; sig = ksi->ksi_signo; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef DEBUG if (ldebug(sendsig)) printf(ARGS(sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = rgs(); frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_edi; frame.sf_sc.sc_esi = regs->tf_esi; frame.sf_sc.sc_ebp = regs->tf_ebp; frame.sf_sc.sc_ebx = regs->tf_ebx; frame.sf_sc.sc_esp = regs->tf_esp; frame.sf_sc.sc_edx = regs->tf_edx; frame.sf_sc.sc_ecx = regs->tf_ecx; frame.sf_sc.sc_eax = regs->tf_eax; frame.sf_sc.sc_eip = regs->tf_eip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_eflags; frame.sf_sc.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; l_sigset_t lmask; sigset_t bmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(sigreturn)) printf(ARGS(sigreturn, "%p"), (void *)args->sfp); #endif /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_es = frame.sf_sc.sc_es; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_edi = frame.sf_sc.sc_edi; regs->tf_esi = frame.sf_sc.sc_esi; regs->tf_ebp = frame.sf_sc.sc_ebp; regs->tf_ebx = frame.sf_sc.sc_ebx; regs->tf_edx = frame.sf_sc.sc_edx; regs->tf_ecx = frame.sf_sc.sc_ecx; regs->tf_eax = frame.sf_sc.sc_eax; regs->tf_eip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_eflags = eflags; regs->tf_esp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); #endif /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context */ /* %gs was restored by the trampoline. */ regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_edi = context->sc_edi; regs->tf_esi = context->sc_esi; regs->tf_ebp = context->sc_ebp; regs->tf_ebx = context->sc_ebx; regs->tf_edx = context->sc_edx; regs->tf_ecx = context->sc_ecx; regs->tf_eax = context->sc_eax; regs->tf_eip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_eflags = eflags; regs->tf_esp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; /* * call sigaltstack & ignore results.. */ lss = &uc.uc_stack; ss.ss_sp = lss->ss_sp; ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); #endif (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; p = td->td_proc; frame = td->td_frame; sa->code = frame->tf_eax; sa->args[0] = frame->tf_ebx; sa->args[1] = frame->tf_ecx; sa->args[2] = frame->tf_edx; sa->args[3] = frame->tf_esi; sa->args[4] = frame->tf_edi; sa->args[5] = frame->tf_ebp; /* Unconfirmed */ if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; return (0); } /* * If a linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a linux * binary is doing the exec, so we do not create an EXEC module for it. */ static int exec_linux_imgact_try(struct image_params *iparams); static int exec_linux_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1; /* * The interpreter for shell scripts run from a linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds attempt * to use the alternate path for the interpreter. If an alternate * path is found, use our stringspace to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); if (rpath != NULL) imgp->args->fname_buf = imgp->interpreter_name = rpath; } } return (error); } /* * exec_setregs may initialize some registers differently than Linux * does, thus potentially confusing Linux binaries. If necessary, we * override the exec_setregs default(s) here. */ static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct pcb *pcb = td->td_pcb; exec_setregs(td, imgp, stack); /* Linux sets %gs to 0, we default to _udatasel */ pcb->pcb_gs = 0; load_gs(0); pcb->pcb_initial_npxcw = __LINUX_NPXCW__; } static void linux_get_machine(const char **dst) { switch (cpu_class) { case CPUCLASS_686: *dst = "i686"; break; case CPUCLASS_586: *dst = "i586"; break; case CPUCLASS_486: *dst = "i486"; break; default: *dst = "i386"; } } struct sysentvec linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux a.out", .sv_coredump = NULL, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_linux_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, + .sv_trap = NULL, }; INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = elf_linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF", .sv_coredump = elf32_coredump, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = LINUX_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = exec_linux_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, + .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); }; SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (FALSE); /* * For linux we encode osrel as follows (see linux_mib.c): * VVVMMMIII (version, major, minor), see linux_mib.c. */ *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; return (TRUE); } static Elf_Brandnote linux_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, NULL, 1000); linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, NULL, 1000); linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); linux_get_machine(&linux_kplatform); linux_szplatform = roundup(strlen(linux_kplatform) + 1, sizeof(char *)); linux_osd_jail_register(); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); linux_osd_jail_deregister(); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); Index: projects/clang380-import/sys/kern/imgact_aout.c =================================================================== --- projects/clang380-import/sys/kern/imgact_aout.c (revision 293686) +++ projects/clang380-import/sys/kern/imgact_aout.c (revision 293687) @@ -1,338 +1,339 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __amd64__ #include #include #include #include #include #endif static int exec_aout_imgact(struct image_params *imgp); static int aout_fixup(register_t **stack_base, struct image_params *imgp); #if defined(__i386__) struct sysentvec aout_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = aout_fixup, .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; #elif defined(__amd64__) #define AOUT32_USRSTACK 0xbfc00000 #define AOUT32_PS_STRINGS \ (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings)) #define AOUT32_MINUSER FREEBSD32_MINUSER extern const char *freebsd32_syscallnames[]; extern u_long ia32_maxssiz; struct sysentvec aout_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = aout_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = ia32_sigcode, .sv_szsigcode = &sz_ia32_sigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = IA32_PAGE_SIZE, .sv_minuser = AOUT32_MINUSER, .sv_maxuser = AOUT32_USRSTACK, .sv_usrstack = AOUT32_USRSTACK, .sv_psstrings = AOUT32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, }; #else #error "Port me" #endif static int aout_fixup(register_t **stack_base, struct image_params *imgp) { *(char **)stack_base -= sizeof(uint32_t); return (suword32(*stack_base, imgp->args->argc)); } static int exec_aout_imgact(struct image_params *imgp) { const struct exec *a_out = (const struct exec *) imgp->image_header; struct vmspace *vmspace; vm_map_t map; vm_object_t object; vm_offset_t text_end, data_end; unsigned long virtual_offset; unsigned long file_offset; unsigned long bss_size; int error; /* * Linux and *BSD binaries look very much alike, * only the machine id is different: * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI. * NetBSD is in network byte order.. ugh. */ if (((a_out->a_midmag >> 16) & 0xff) != 0x86 && ((a_out->a_midmag >> 16) & 0xff) != 0 && ((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86) return -1; /* * Set file/virtual offset based on a.out variant. * We do two cases: host byte order and network byte order * (for NetBSD compatibility) */ switch ((int)(a_out->a_midmag & 0xffff)) { case ZMAGIC: virtual_offset = 0; if (a_out->a_text) { file_offset = PAGE_SIZE; } else { /* Bill's "screwball mode" */ file_offset = 0; } break; case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; /* Pass PS_STRINGS for BSD/OS binaries only. */ if (N_GETMID(*a_out) == MID_ZERO) imgp->ps_strings = aout_sysvec.sv_psstrings; break; default: /* NetBSD compatibility */ switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) { case ZMAGIC: case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; break; default: return (-1); } } bss_size = roundup(a_out->a_bss, PAGE_SIZE); /* * Check various fields in header for validity/bounds. */ if (/* entry point must lay with text region */ a_out->a_entry < virtual_offset || a_out->a_entry >= virtual_offset + a_out->a_text || /* text and data size must each be page rounded */ a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK #ifdef __amd64__ || /* overflows */ virtual_offset + a_out->a_text + a_out->a_data + bss_size > UINT_MAX #endif ) return (-1); /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > imgp->attr->va_size) return (EFAULT); /* * text/data/bss must not exceed limits */ PROC_LOCK(imgp->proc); if (/* text can't exceed maximum text size */ a_out->a_text > maxtsiz || /* data + bss can't exceed rlimit */ a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) || racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(imgp->proc); return (ENOMEM); } PROC_UNLOCK(imgp->proc); /* * Avoid a possible deadlock if the current address space is destroyed * and that address space maps the locked vnode. In the common case, * the locked vnode's v_usecount is decremented but remains greater * than zero. Consequently, the vnode lock is not needed by vrele(). * However, in cases where the vnode lock is external, such as nullfs, * v_usecount may become zero. */ VOP_UNLOCK(imgp->vp, 0); /* * Destroy old process VM and create a new one (with a new stack) */ error = exec_new_vmspace(imgp, &aout_sysvec); vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); /* * The vm space can be changed by exec_new_vmspace */ vmspace = imgp->proc->p_vmspace; object = imgp->object; map = &vmspace->vm_map; vm_map_lock(map); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; error = vm_map_insert(map, object, file_offset, virtual_offset, text_end, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } data_end = text_end + a_out->a_data; if (a_out->a_data) { vm_object_reference(object); error = vm_map_insert(map, object, file_offset + a_out->a_text, text_end, data_end, VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } } if (bss_size) { error = vm_map_insert(map, NULL, 0, data_end, data_end + bss_size, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); return (error); } } vm_map_unlock(map); /* Fill in process VM information */ vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT; vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset; vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; imgp->proc->p_sysent = &aout_sysvec; return (0); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw aout_execsw = { exec_aout_imgact, "a.out" }; EXEC_SET(aout, aout_execsw); Index: projects/clang380-import/sys/kern/init_main.c =================================================================== --- projects/clang380-import/sys/kern/init_main.c (revision 293686) +++ projects/clang380-import/sys/kern/init_main.c (revision 293687) @@ -1,879 +1,880 @@ /*- * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_init_path.h" #include "opt_verbose_sysinit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void mi_startup(void); /* Should be elsewhere */ /* Components of the first process -- never freed. */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; struct thread thread0 __aligned(16); struct vmspace vmspace0; struct proc *initproc; #ifndef BOOTHOWTO #define BOOTHOWTO 0 #endif int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "Boot control flags, passed from loader"); #ifndef BOOTVERBOSE #define BOOTVERBOSE 0 #endif int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); #ifdef INVARIANTS FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif /* * This ensures that there is at least one entry so that the sysinit_set * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never * executed. */ SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); /* * The sysinit table itself. Items are checked off as the are run. * If we want to register new sysinit types, add them to newsysinit. */ SET_DECLARE(sysinit_set, struct sysinit); struct sysinit **sysinit, **sysinit_end; struct sysinit **newsysinit, **newsysinit_end; /* * Merge a new sysinit set into the current set, reallocating it if * necessary. This can only be called after malloc is running. */ void sysinit_add(struct sysinit **set, struct sysinit **set_end) { struct sysinit **newset; struct sysinit **sipp; struct sysinit **xipp; int count; count = set_end - set; if (newsysinit) count += newsysinit_end - newsysinit; else count += sysinit_end - sysinit; newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); if (newset == NULL) panic("cannot malloc for sysinit"); xipp = newset; if (newsysinit) for (sipp = newsysinit; sipp < newsysinit_end; sipp++) *xipp++ = *sipp; else for (sipp = sysinit; sipp < sysinit_end; sipp++) *xipp++ = *sipp; for (sipp = set; sipp < set_end; sipp++) *xipp++ = *sipp; if (newsysinit) free(newsysinit, M_TEMP); newsysinit = newset; newsysinit_end = newset + count; } #if defined (DDB) && defined(VERBOSE_SYSINIT) static const char * symbol_name(vm_offset_t va, db_strategy_t strategy) { const char *name; c_db_sym_t sym; db_expr_t offset; if (va == 0) return (NULL); sym = db_search_symbol(va, strategy, &offset); if (offset != 0) return (NULL); db_symbol_values(sym, &name, NULL); return (name); } #endif /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { register struct sysinit **sipp; /* system initialization*/ register struct sysinit **xipp; /* interior loop of sort*/ register struct sysinit *save; /* bubble*/ #if defined(VERBOSE_SYSINIT) int last; int verbose; #endif if (boothowto & RB_VERBOSE) bootverbose++; if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); sysinit_end = SET_LIMIT(sysinit_set); } restart: /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip*/ save = *sipp; *sipp = *xipp; *xipp = save; } } #if defined(VERBOSE_SYSINIT) last = SI_SUB_COPYRIGHT; verbose = 0; #if !defined(DDB) printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); #endif #endif /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ if ((*sipp)->subsystem == SI_SUB_DONE) continue; #if defined(VERBOSE_SYSINIT) if ((*sipp)->subsystem > last) { verbose = 1; last = (*sipp)->subsystem; printf("subsystem %x\n", last); } if (verbose) { #if defined(DDB) const char *func, *data; func = symbol_name((vm_offset_t)(*sipp)->func, DB_STGY_PROC); data = symbol_name((vm_offset_t)(*sipp)->udata, DB_STGY_ANY); if (func != NULL && data != NULL) printf(" %s(&%s)... ", func, data); else if (func != NULL) printf(" %s(%p)... ", func, (*sipp)->udata); else #endif printf(" %p(%p)... ", (*sipp)->func, (*sipp)->udata); } #endif /* Call function */ (*((*sipp)->func))((*sipp)->udata); #if defined(VERBOSE_SYSINIT) if (verbose) printf("done.\n"); #endif /* Check off the one we're just done */ (*sipp)->subsystem = SI_SUB_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { if (sysinit != SET_BEGIN(sysinit_set)) free(sysinit, M_TEMP); sysinit = newsysinit; sysinit_end = newsysinit_end; newsysinit = NULL; newsysinit_end = NULL; goto restart; } } mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); mtx_unlock(&Giant); /* * Now hand over this thread to swapper. */ swapper(); /* NOTREACHED*/ } /* *************************************************************************** **** **** The following SYSINIT's belong elsewhere, but have not yet **** been moved. **** *************************************************************************** */ static void print_caddr_t(void *data) { printf("%s", (char *)data); } static void print_version(void *data __unused) { int len; /* Strip a trailing newline from version. */ len = strlen(version); while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright); SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark); SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); #ifdef WITNESS static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); #endif #ifdef DIAGNOSTIC static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); #endif static int null_fetch_syscall_args(struct thread *td __unused, struct syscall_args *sa __unused) { panic("null_fetch_syscall_args"); } static void null_set_syscall_retval(struct thread *td __unused, int error __unused) { panic("null_set_syscall_retval"); } struct sysentvec null_sysvec = { .sv_size = 0, .sv_table = NULL, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = NULL, .sv_sendsig = NULL, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "null", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = 0, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = NULL, .sv_setregs = NULL, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = 0, .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; /* *************************************************************************** **** **** The two following SYSINIT's are proc0 specific glue code. I am not **** convinced that they can not be safely combined, but their order of **** operation has been maintained as the same as the original init_main.c **** for right now. **** **** These probably belong in init_proc.c or kern_proc.c, since they **** deal with proc0 (the fork template process). **** *************************************************************************** */ /* ARGSUSED*/ static void proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; struct ucred *newcred; vm_paddr_t pageablemem; int i; GIANT_REQUIRED; p = &proc0; td = &thread0; /* * Initialize magic number and osrel. */ p->p_magic = P_MAGIC; p->p_osrel = osreldate; /* * Initialize thread and process structures. */ procinit(); /* set up proc zone */ threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); pgrp0.pg_session = &session0; mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); refcount_init(&session0.s_count, 1); session0.s_leader = p; p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM; p->p_flag2 = 0; p->p_state = PRS_NORMAL; knlist_init_mtx(&p->p_klist, &p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; /* pid_max cannot be greater than PID_MAX */ td->td_tid = PID_MAX + 1; LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); td->td_state = TDS_RUNNING; td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = 0; td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); vm_domain_policy_init(&td->td_vm_dom_policy); vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1); vm_domain_policy_init(&p->p_vm_dom_policy); vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1); prison0_init(); p->p_peers = 0; p->p_leader = p; p->p_reaper = p; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, 1); /* Create credentials. */ newcred = crget(); newcred->cr_ngroups = 1; /* group 0 */ newcred->cr_uidinfo = uifind(0); newcred->cr_ruidinfo = uifind(0); newcred->cr_prison = &prison0; newcred->cr_loginclass = loginclass_find("default"); proc_set_cred_init(p, newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif #ifdef MAC mac_cred_create_swapper(newcred); #endif /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); /* Initialize signal state for process 0. */ siginit(&proc0); /* Create the file descriptor table. */ p->p_fd = fdinit(NULL, false); p->p_fdtol = NULL; /* Create the limits structures. */ p->p_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) p->p_limit->pl_rlimit[i].rlim_cur = p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; PROC_LOCK(p); thread_cow_get_proc(td, p); PROC_UNLOCK(p); /* Initialize resource accounting structures. */ racct_create(&p->p_racct); p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ p->p_vmspace = &vmspace0; vmspace0.vm_refcnt = 1; pmap_pinit0(vmspace_pmap(&vmspace0)); /* * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); /* * Call the init and ctor for the new thread and proc. We wait * to do this until all other structures are fairly sane. */ EVENTHANDLER_INVOKE(process_init, p); EVENTHANDLER_INVOKE(thread_init, td); EVENTHANDLER_INVOKE(process_ctor, p); EVENTHANDLER_INVOKE(thread_ctor, td); /* * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); PROC_LOCK(p); racct_add_force(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct timespec ts; struct proc *p; struct rusage ru; struct thread *td; /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ PROC_STATUNLOCK(p); p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); /* * Give the ``random'' number generator a thump. */ nanotime(&ts); srandom(ts.tv_sec ^ ts.tv_nsec); } SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); static void random_init(void *dummy __unused) { /* * After CPU has been started we have some randomness on most * platforms via get_cyclecount(). For platforms that don't * we will reseed random(9) in proc0_post() as well. */ srandom(get_cyclecount()); } SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL); /* *************************************************************************** **** **** The following SYSINIT's and glue code should be moved to the **** respective files on a per subsystem basis. **** *************************************************************************** */ /* *************************************************************************** **** **** The following code probably belongs in another file, like **** kern/init_init.c. **** *************************************************************************** */ /* * List of paths to try when searching for "init". */ static char init_path[MAXPATHLEN] = #ifdef INIT_PATH __XSTRING(INIT_PATH); #else "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; #endif SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "Path used to search the init process"); /* * Shutdown timeout of init(8). * Unused within kernel, but used to control init(8), hence do not remove. */ #ifndef INIT_SHUTDOWN_TIMEOUT #define INIT_SHUTDOWN_TIMEOUT 120 #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. */ static void start_init(void *dummy) { vm_offset_t addr; struct execve_args args; int options, error; char *var, *path, *next, *s; char *ucp, **uap, *arg0, *arg1; struct thread *td; struct proc *p; mtx_lock(&Giant); GIANT_REQUIRED; td = curthread; p = td->td_proc; vfs_mountroot(); /* Wipe GELI passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); /* * Need just enough stack to hold the faked-up "execve()" arguments. */ addr = p->p_sysent->sv_usrstack - PAGE_SIZE; if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) panic("init: couldn't allocate argument space"); p->p_vmspace->vm_maxsaddr = (caddr_t)addr; p->p_vmspace->vm_ssize = 1; if ((var = kern_getenv("init_path")) != NULL) { strlcpy(init_path, var, sizeof(init_path)); freeenv(var); } for (path = init_path; *path != '\0'; path = next) { while (*path == ':') path++; if (*path == '\0') break; for (next = path; *next != '\0' && *next != ':'; next++) /* nothing */ ; if (bootverbose) printf("start_init: trying %.*s\n", (int)(next - path), path); /* * Move out the boot flag argument. */ options = 0; ucp = (char *)p->p_sysent->sv_usrstack; (void)subyte(--ucp, 0); /* trailing zero */ if (boothowto & RB_SINGLE) { (void)subyte(--ucp, 's'); options = 1; } #ifdef notyet if (boothowto & RB_FASTBOOT) { (void)subyte(--ucp, 'f'); options = 1; } #endif #ifdef BOOTCDROM (void)subyte(--ucp, 'C'); options = 1; #endif if (options == 0) (void)subyte(--ucp, '-'); (void)subyte(--ucp, '-'); /* leading hyphen */ arg1 = ucp; /* * Move out the file name (also arg 0). */ (void)subyte(--ucp, 0); for (s = next - 1; s >= path; s--) (void)subyte(--ucp, *s); arg0 = ucp; /* * Move out the arg pointers. */ uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1)); (void)suword((caddr_t)--uap, (long)0); /* terminator */ (void)suword((caddr_t)--uap, (long)(intptr_t)arg1); (void)suword((caddr_t)--uap, (long)(intptr_t)arg0); /* * Point at the arguments. */ args.fname = arg0; args.argv = uap; args.envv = NULL; /* * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. * * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ if ((error = sys_execve(td, &args)) == 0) { mtx_unlock(&Giant); return; } if (error != ENOENT) printf("exec %.*s: error %d\n", (int)(next - path), path, error); } printf("init: not found in path %s\n", init_path); panic("no init"); } /* * Like kproc_create(), but runs in it's own address space. * We do this early to reserve pid 1. * * Note special case - do not make it runnable yet. Other work * in progress will change this more. */ static void create_init(const void *udata __unused) { struct ucred *newcred, *oldcred; struct thread *td; int error; error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc, NULL, 0, NULL); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling); oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC mac_cred_create_init(newcred); #endif #ifdef AUDIT audit_cred_proc1(newcred); #endif proc_set_cred(initproc, newcred); td = FIRST_THREAD_IN_PROC(initproc); crfree(td->td_ucred); td->td_ucred = crhold(initproc->p_ucred); PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); /* * Make it runnable now. */ static void kick_init(const void *udata __unused) { struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); thread_unlock(td); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); Index: projects/clang380-import/sys/kern/kern_condvar.c =================================================================== --- projects/clang380-import/sys/kern/kern_condvar.c (revision 293686) +++ projects/clang380-import/sys/kern/kern_condvar.c (revision 293687) @@ -1,457 +1,476 @@ /*- * Copyright (c) 2000 Jake Burkholder . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include +#include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif /* + * A bound below which cv_waiters is valid. Once cv_waiters reaches this bound, + * cv_signal must manually check the wait queue for threads. + */ +#define CV_WAITERS_BOUND INT_MAX + +#define CV_WAITERS_INC(cvp) do { \ + if ((cvp)->cv_waiters < CV_WAITERS_BOUND) \ + (cvp)->cv_waiters++; \ +} while (0) + +/* * Common sanity checks for cv_wait* functions. */ #define CV_ASSERT(cvp, lock, td) do { \ KASSERT((td) != NULL, ("%s: td NULL", __func__)); \ KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ KASSERT((lock) != NULL, ("%s: lock NULL", __func__)); \ } while (0) /* * Initialize a condition variable. Must be called before use. */ void cv_init(struct cv *cvp, const char *desc) { cvp->cv_description = desc; cvp->cv_waiters = 0; } /* * Destroy a condition variable. The condition variable must be re-initialized * in order to be re-used. */ void cv_destroy(struct cv *cvp) { #ifdef INVARIANTS struct sleepqueue *sq; sleepq_lock(cvp); sq = sleepq_lookup(cvp); sleepq_release(cvp); KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__)); #endif } /* * Wait on a condition variable. The current thread is placed on the condition * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same * condition variable will resume the thread. The mutex is released before * sleeping and will be held on return. It is recommended that the mutex be * held when cv_signal or cv_broadcast are called. */ void _cv_wait(struct cv *cvp, struct lock_object *lock) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; uintptr_t lock_state; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * During autoconfiguration, just give interrupts * a chance, then just return. Don't run any other * thread or panic below, in case this is the idle * process and already asleep. */ return; } sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); } sleepq_wait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } } /* * Wait on a condition variable. This function differs from cv_wait by * not aquiring the mutex after condition variable was signaled. */ void _cv_wait_unlock(struct cv *cvp, struct lock_object *lock) { struct lock_class *class; struct thread *td; td = curthread; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); KASSERT(lock != &Giant.lock_object, ("cv_wait_unlock cannot be used with Giant")); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * During autoconfiguration, just give interrupts * a chance, then just return. Don't run any other * thread or panic below, in case this is the idle * process and already asleep. */ class->lc_unlock(lock); return; } sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); sleepq_wait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); } /* * Wait on a condition variable, allowing interruption by signals. Return 0 if * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if * a signal was caught. If ERESTART is returned the system call should be * restarted if possible. */ int _cv_wait_sig(struct cv *cvp, struct lock_object *lock) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; uintptr_t lock_state; int rval; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * procs or panic below, in case this is the idle process and * already asleep. */ return (0); } sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); } rval = sleepq_wait_sig(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } /* * Wait on a condition variable for (at most) the value specified in sbt * argument. Returns 0 if the process was resumed by cv_signal or cv_broadcast, * EWOULDBLOCK if the timeout expires. */ int _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, sbintime_t pr, int flags) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; int lock_state, rval; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * thread or panic below, in case this is the idle process and * already asleep. */ return 0; } sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); } rval = sleepq_timedwait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } /* * Wait on a condition variable for (at most) the value specified in sbt * argument, allowing interruption by signals. * Returns 0 if the thread was resumed by cv_signal or cv_broadcast, * EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if a signal * was caught. */ int _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, sbintime_t pr, int flags) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; int lock_state, rval; td = curthread; lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, cv_wmesg(cvp)); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * thread or panic below, in case this is the idle process and * already asleep. */ return 0; } sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); } rval = sleepq_timedwait_sig(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, cv_wmesg(cvp)); #endif PICKUP_GIANT(); if (lock != &Giant.lock_object) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } /* * Signal a condition variable, wakes up one waiting thread. Will also wakeup * the swapper if the process is not in memory, so that it can bring the * sleeping process in. Note that this may also result in additional threads * being made runnable. Should be called with the same mutex as was passed to * cv_wait held. */ void cv_signal(struct cv *cvp) { int wakeup_swapper; wakeup_swapper = 0; sleepq_lock(cvp); if (cvp->cv_waiters > 0) { - cvp->cv_waiters--; - wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0); + if (cvp->cv_waiters == CV_WAITERS_BOUND && + sleepq_lookup(cvp) == NULL) { + cvp->cv_waiters = 0; + } else { + if (cvp->cv_waiters < CV_WAITERS_BOUND) + cvp->cv_waiters--; + wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, + 0); + } } sleepq_release(cvp); if (wakeup_swapper) kick_proc0(); } /* * Broadcast a signal to a condition variable. Wakes up all waiting threads. * Should be called with the same mutex as was passed to cv_wait held. */ void cv_broadcastpri(struct cv *cvp, int pri) { int wakeup_swapper; /* * XXX sleepq_broadcast pri argument changed from -1 meaning * no pri to 0 meaning no pri. */ wakeup_swapper = 0; if (pri == -1) pri = 0; sleepq_lock(cvp); if (cvp->cv_waiters > 0) { cvp->cv_waiters = 0; wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0); } sleepq_release(cvp); if (wakeup_swapper) kick_proc0(); } Index: projects/clang380-import/sys/kern/uipc_mbuf.c =================================================================== --- projects/clang380-import/sys/kern/uipc_mbuf.c (revision 293686) +++ projects/clang380-import/sys/kern/uipc_mbuf.c (revision 293687) @@ -1,2061 +1,2065 @@ /*- * Copyright (c) 1982, 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include "opt_mbuf_stress_test.h" #include "opt_mbuf_profiling.h" #include #include #include #include #include #include #include #include #include #include #include int max_linkhdr; int max_protohdr; int max_hdr; int max_datalen; #ifdef MBUF_STRESS_TEST int m_defragpackets; int m_defragbytes; int m_defraguseless; int m_defragfailure; int m_defragrandomfailures; #endif /* * sysctl(8) exported objects */ SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD, &max_linkhdr, 0, "Size of largest link layer header"); SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD, &max_protohdr, 0, "Size of largest protocol layer header"); SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD, &max_hdr, 0, "Size of largest link plus protocol header"); SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD, &max_datalen, 0, "Minimum space left in mbuf after max_hdr"); #ifdef MBUF_STRESS_TEST SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, &m_defragpackets, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, &m_defragbytes, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, &m_defraguseless, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, &m_defragfailure, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, &m_defragrandomfailures, 0, ""); #endif /* * Ensure the correct size of various mbuf parameters. It could be off due * to compiler-induced padding and alignment artifacts. */ CTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN); CTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN); /* * mbuf data storage should be 64-bit aligned regardless of architectural * pointer size; check this is the case with and without a packet header. */ CTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0); CTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0); /* * While the specific values here don't matter too much (i.e., +/- a few * words), we do want to ensure that changes to these values are carefully * reasoned about and properly documented. This is especially the case as * network-protocol and device-driver modules encode these layouts, and must * be recompiled if the structures change. Check these values at compile time * against the ones documented in comments in mbuf.h. * * NB: Possibly they should be documented there via #define's and not just * comments. */ #if defined(__LP64__) CTASSERT(offsetof(struct mbuf, m_dat) == 32); CTASSERT(sizeof(struct pkthdr) == 56); CTASSERT(sizeof(struct m_ext) == 48); #else CTASSERT(offsetof(struct mbuf, m_dat) == 24); CTASSERT(sizeof(struct pkthdr) == 48); CTASSERT(sizeof(struct m_ext) == 28); #endif /* * Assert that the queue(3) macros produce code of the same size as an old * plain pointer does. */ #ifdef INVARIANTS static struct mbuf m_assertbuf; CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt)); CTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt)); #endif /* * m_get2() allocates minimum mbuf that would fit "size" argument. */ struct mbuf * m_get2(int size, int how, short type, int flags) { struct mb_args args; struct mbuf *m, *n; args.flags = flags; args.type = type; if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) return (uma_zalloc_arg(zone_mbuf, &args, how)); if (size <= MCLBYTES) return (uma_zalloc_arg(zone_pack, &args, how)); if (size > MJUMPAGESIZE) return (NULL); m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); n = uma_zalloc_arg(zone_jumbop, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * m_getjcl() returns an mbuf with a cluster of the specified size attached. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. */ struct mbuf * m_getjcl(int how, short type, int flags, int size) { struct mb_args args; struct mbuf *m, *n; uma_zone_t zone; if (size == MCLBYTES) return m_getcl(how, type, flags); args.flags = flags; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); zone = m_getzone(size); n = uma_zalloc_arg(zone, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * Allocate a given length worth of mbufs and/or clusters (whatever fits * best) and return a pointer to the top of the allocated chain. If an * existing mbuf chain is provided, then we will append the new chain * to the existing one but still return the top of the newly allocated * chain. */ struct mbuf * m_getm2(struct mbuf *m, int len, int how, short type, int flags) { struct mbuf *mb, *nm = NULL, *mtail = NULL; KASSERT(len >= 0, ("%s: len is < 0", __func__)); /* Validate flags. */ flags &= (M_PKTHDR | M_EOR); /* Packet header mbuf must be first in chain. */ if ((flags & M_PKTHDR) && m != NULL) flags &= ~M_PKTHDR; /* Loop and append maximum sized mbufs to the chain tail. */ while (len > 0) { if (len > MCLBYTES) mb = m_getjcl(how, type, (flags & M_PKTHDR), MJUMPAGESIZE); else if (len >= MINCLSIZE) mb = m_getcl(how, type, (flags & M_PKTHDR)); else if (flags & M_PKTHDR) mb = m_gethdr(how, type); else mb = m_get(how, type); /* Fail the whole operation if one mbuf can't be allocated. */ if (mb == NULL) { if (nm != NULL) m_freem(nm); return (NULL); } /* Book keeping. */ len -= M_SIZE(mb); if (mtail != NULL) mtail->m_next = mb; else nm = mb; mtail = mb; flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ } if (flags & M_EOR) mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ /* If mbuf was supplied, append new chain to the end of it. */ if (m != NULL) { for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) ; mtail->m_next = nm; mtail->m_flags &= ~M_EOR; } else m = nm; return (m); } /* * Free an entire chain of mbufs and associated external buffers, if * applicable. */ void m_freem(struct mbuf *mb) { while (mb != NULL) mb = m_free(mb); } /*- * Configure a provided mbuf to refer to the provided external storage * buffer and setup a reference count for said buffer. If the setting * up of the reference count fails, the M_EXT bit will not be set. If * successfull, the M_EXT bit is set in the mbuf's flags. * * Arguments: * mb The existing mbuf to which to attach the provided buffer. * buf The address of the provided external storage buffer. * size The size of the provided buffer. * freef A pointer to a routine that is responsible for freeing the * provided external storage buffer. * args A pointer to an argument structure (of any type) to be passed * to the provided freef routine (may be NULL). * flags Any other flags to be passed to the provided mbuf. * type The type that the external storage buffer should be * labeled with. * * Returns: * Nothing. */ int m_extadd(struct mbuf *mb, caddr_t buf, u_int size, void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2, int flags, int type, int wait) { KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); if (type != EXT_EXTREF) mb->m_ext.ext_cnt = uma_zalloc(zone_ext_refcnt, wait); if (mb->m_ext.ext_cnt == NULL) return (ENOMEM); *(mb->m_ext.ext_cnt) = 1; mb->m_flags |= (M_EXT | flags); mb->m_ext.ext_buf = buf; mb->m_data = mb->m_ext.ext_buf; mb->m_ext.ext_size = size; mb->m_ext.ext_free = freef; mb->m_ext.ext_arg1 = arg1; mb->m_ext.ext_arg2 = arg2; mb->m_ext.ext_type = type; mb->m_ext.ext_flags = 0; return (0); } /* * Non-directly-exported function to clean up after mbufs with M_EXT * storage attached to them if the reference count hits 1. */ void mb_free_ext(struct mbuf *m) { int freembuf; KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); /* * Check if the header is embedded in the cluster. */ freembuf = (m->m_flags & M_NOFREE) ? 0 : 1; switch (m->m_ext.ext_type) { case EXT_SFBUF: sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; + case EXT_SFBUF_NOCACHE: + sf_ext_free_nocache(m->m_ext.ext_arg1, m->m_ext.ext_arg2); + break; default: KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); /* * Free attached storage if this mbuf is the only * reference to it. */ if (*(m->m_ext.ext_cnt) != 1) { if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1) break; } switch (m->m_ext.ext_type) { case EXT_PACKET: /* The packet zone is special. */ if (*(m->m_ext.ext_cnt) == 0) *(m->m_ext.ext_cnt) = 1; uma_zfree(zone_pack, m); return; /* Job done. */ case EXT_CLUSTER: uma_zfree(zone_clust, m->m_ext.ext_buf); break; case EXT_JUMBOP: uma_zfree(zone_jumbop, m->m_ext.ext_buf); break; case EXT_JUMBO9: uma_zfree(zone_jumbo9, m->m_ext.ext_buf); break; case EXT_JUMBO16: uma_zfree(zone_jumbo16, m->m_ext.ext_buf); break; case EXT_NET_DRV: case EXT_MOD_TYPE: case EXT_DISPOSABLE: *(m->m_ext.ext_cnt) = 0; uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, m->m_ext.ext_cnt)); /* FALLTHROUGH */ case EXT_EXTREF: KASSERT(m->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; default: KASSERT(m->m_ext.ext_type == 0, ("%s: unknown ext_type", __func__)); } } if (freembuf) uma_zfree(zone_mbuf, m); } /* * Attach the cluster from *m to *n, set up m_ext in *n * and bump the refcount of the cluster. */ void mb_dupcl(struct mbuf *n, const struct mbuf *m) { KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); switch (m->m_ext.ext_type) { case EXT_SFBUF: + case EXT_SFBUF_NOCACHE: sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; default: KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); if (*(m->m_ext.ext_cnt) == 1) *(m->m_ext.ext_cnt) += 1; else atomic_add_int(m->m_ext.ext_cnt, 1); } n->m_ext = m->m_ext; n->m_flags |= M_EXT; n->m_flags |= m->m_flags & M_RDONLY; } void m_demote_pkthdr(struct mbuf *m) { M_ASSERTPKTHDR(m); m_tag_delete_chain(m, NULL); m->m_flags &= ~M_PKTHDR; bzero(&m->m_pkthdr, sizeof(struct pkthdr)); } /* * Clean up mbuf (chain) from any tags and packet headers. * If "all" is set then the first mbuf in the chain will be * cleaned too. */ void m_demote(struct mbuf *m0, int all, int flags) { struct mbuf *m; for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) { KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p", __func__, m, m0)); if (m->m_flags & M_PKTHDR) m_demote_pkthdr(m); m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags); } } /* * Sanity checks on mbuf (chain) for use in KASSERT() and general * debugging. * Returns 0 or panics when bad and 1 on all tests passed. * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they * blow up later. */ int m_sanity(struct mbuf *m0, int sanitize) { struct mbuf *m; caddr_t a, b; int pktlen = 0; #ifdef INVARIANTS #define M_SANITY_ACTION(s) panic("mbuf %p: " s, m) #else #define M_SANITY_ACTION(s) printf("mbuf %p: " s, m) #endif for (m = m0; m != NULL; m = m->m_next) { /* * Basic pointer checks. If any of these fails then some * unrelated kernel memory before or after us is trashed. * No way to recover from that. */ a = M_START(m); b = a + M_SIZE(m); if ((caddr_t)m->m_data < a) M_SANITY_ACTION("m_data outside mbuf data range left"); if ((caddr_t)m->m_data > b) M_SANITY_ACTION("m_data outside mbuf data range right"); if ((caddr_t)m->m_data + m->m_len > b) M_SANITY_ACTION("m_data + m_len exeeds mbuf space"); /* m->m_nextpkt may only be set on first mbuf in chain. */ if (m != m0 && m->m_nextpkt != NULL) { if (sanitize) { m_freem(m->m_nextpkt); m->m_nextpkt = (struct mbuf *)0xDEADC0DE; } else M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf"); } /* packet length (not mbuf length!) calculation */ if (m0->m_flags & M_PKTHDR) pktlen += m->m_len; /* m_tags may only be attached to first mbuf in chain. */ if (m != m0 && m->m_flags & M_PKTHDR && !SLIST_EMPTY(&m->m_pkthdr.tags)) { if (sanitize) { m_tag_delete_chain(m, NULL); /* put in 0xDEADC0DE perhaps? */ } else M_SANITY_ACTION("m_tags on in-chain mbuf"); } /* M_PKTHDR may only be set on first mbuf in chain */ if (m != m0 && m->m_flags & M_PKTHDR) { if (sanitize) { bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); m->m_flags &= ~M_PKTHDR; /* put in 0xDEADCODE and leave hdr flag in */ } else M_SANITY_ACTION("M_PKTHDR on in-chain mbuf"); } } m = m0; if (pktlen && pktlen != m->m_pkthdr.len) { if (sanitize) m->m_pkthdr.len = 0; else M_SANITY_ACTION("m_pkthdr.len != mbuf chain length"); } return 1; #undef M_SANITY_ACTION } /* * "Move" mbuf pkthdr from "from" to "to". * "from" must have M_PKTHDR set, and "to" must be empty. */ void m_move_pkthdr(struct mbuf *to, struct mbuf *from) { #if 0 /* see below for why these are not enabled */ M_ASSERTPKTHDR(to); /* Note: with MAC, this may not be a good assertion. */ KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_move_pkthdr: to has tags")); #endif #ifdef MAC /* * XXXMAC: It could be this should also occur for non-MAC? */ if (to->m_flags & M_PKTHDR) m_tag_delete_chain(to, NULL); #endif to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; /* especially tags */ SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ from->m_flags &= ~M_PKTHDR; } /* * Duplicate "from"'s mbuf pkthdr in "to". * "from" must have M_PKTHDR set, and "to" must be empty. * In particular, this does a deep copy of the packet tags. */ int m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) { #if 0 /* * The mbuf allocator only initializes the pkthdr * when the mbuf is allocated with m_gethdr(). Many users * (e.g. m_copy*, m_prepend) use m_get() and then * smash the pkthdr as needed causing these * assertions to trip. For now just disable them. */ M_ASSERTPKTHDR(to); /* Note: with MAC, this may not be a good assertion. */ KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags")); #endif MBUF_CHECKSLEEP(how); #ifdef MAC if (to->m_flags & M_PKTHDR) m_tag_delete_chain(to, NULL); #endif to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; SLIST_INIT(&to->m_pkthdr.tags); return (m_tag_copy_chain(to, from, how)); } /* * Lesser-used path for M_PREPEND: * allocate new mbuf to prepend to chain, * copy junk along. */ struct mbuf * m_prepend(struct mbuf *m, int len, int how) { struct mbuf *mn; if (m->m_flags & M_PKTHDR) mn = m_gethdr(how, m->m_type); else mn = m_get(how, m->m_type); if (mn == NULL) { m_freem(m); return (NULL); } if (m->m_flags & M_PKTHDR) m_move_pkthdr(mn, m); mn->m_next = m; m = mn; if (len < M_SIZE(m)) M_ALIGN(m, len); m->m_len = len; return (m); } /* * Make a copy of an mbuf chain starting "off0" bytes from the beginning, * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller. * Note that the copy is read-only, because clusters are not copied, * only their reference counts are incremented. */ struct mbuf * m_copym(const struct mbuf *m, int off0, int len, int wait) { struct mbuf *n, **np; int off = off0; struct mbuf *top; int copyhdr = 0; KASSERT(off >= 0, ("m_copym, negative off %d", off)); KASSERT(len >= 0, ("m_copym, negative len %d", len)); MBUF_CHECKSLEEP(wait); if (off == 0 && m->m_flags & M_PKTHDR) copyhdr = 1; while (off > 0) { KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } np = ⊤ top = 0; while (len > 0) { if (m == NULL) { KASSERT(len == M_COPYALL, ("m_copym, length > size of mbuf chain")); break; } if (copyhdr) n = m_gethdr(wait, m->m_type); else n = m_get(wait, m->m_type); *np = n; if (n == NULL) goto nospace; if (copyhdr) { if (!m_dup_pkthdr(n, m, wait)) goto nospace; if (len == M_COPYALL) n->m_pkthdr.len -= off0; else n->m_pkthdr.len = len; copyhdr = 0; } n->m_len = min(len, m->m_len - off); if (m->m_flags & M_EXT) { n->m_data = m->m_data + off; mb_dupcl(n, m); } else bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), (u_int)n->m_len); if (len != M_COPYALL) len -= n->m_len; off = 0; m = m->m_next; np = &n->m_next; } return (top); nospace: m_freem(top); return (NULL); } /* * Copy an entire packet, including header (which must be present). * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. * Note that the copy is read-only, because clusters are not copied, * only their reference counts are incremented. * Preserve alignment of the first mbuf so if the creator has left * some room at the beginning (e.g. for inserting protocol headers) * the copies still have the room available. */ struct mbuf * m_copypacket(struct mbuf *m, int how) { struct mbuf *top, *n, *o; MBUF_CHECKSLEEP(how); n = m_get(how, m->m_type); top = n; if (n == NULL) goto nospace; if (!m_dup_pkthdr(n, m, how)) goto nospace; n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; mb_dupcl(n, m); } else { n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } m = m->m_next; while (m) { o = m_get(how, m->m_type); if (o == NULL) goto nospace; n->m_next = o; n = n->m_next; n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; mb_dupcl(n, m); } else { bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } m = m->m_next; } return top; nospace: m_freem(top); return (NULL); } /* * Copy data from an mbuf chain starting "off" bytes from the beginning, * continuing for "len" bytes, into the indicated buffer. */ void m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) { u_int count; KASSERT(off >= 0, ("m_copydata, negative off %d", off)); KASSERT(len >= 0, ("m_copydata, negative len %d", len)); while (off > 0) { KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); count = min(m->m_len - off, len); bcopy(mtod(m, caddr_t) + off, cp, count); len -= count; cp += count; off = 0; m = m->m_next; } } /* * Copy a packet header mbuf chain into a completely new chain, including * copying any mbuf clusters. Use this instead of m_copypacket() when * you need a writable copy of an mbuf chain. */ struct mbuf * m_dup(const struct mbuf *m, int how) { struct mbuf **p, *top = NULL; int remain, moff, nsize; MBUF_CHECKSLEEP(how); /* Sanity check */ if (m == NULL) return (NULL); M_ASSERTPKTHDR(m); /* While there's more data, get a new mbuf, tack it on, and fill it */ remain = m->m_pkthdr.len; moff = 0; p = ⊤ while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ struct mbuf *n; /* Get the next new mbuf */ if (remain >= MINCLSIZE) { n = m_getcl(how, m->m_type, 0); nsize = MCLBYTES; } else { n = m_get(how, m->m_type); nsize = MLEN; } if (n == NULL) goto nospace; if (top == NULL) { /* First one, must be PKTHDR */ if (!m_dup_pkthdr(n, m, how)) { m_free(n); goto nospace; } if ((n->m_flags & M_EXT) == 0) nsize = MHLEN; n->m_flags &= ~M_RDONLY; } n->m_len = 0; /* Link it into the new chain */ *p = n; p = &n->m_next; /* Copy data from original mbuf(s) into new mbuf */ while (n->m_len < nsize && m != NULL) { int chunk = min(nsize - n->m_len, m->m_len - moff); bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); moff += chunk; n->m_len += chunk; remain -= chunk; if (moff == m->m_len) { m = m->m_next; moff = 0; } } /* Check correct total mbuf length */ KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), ("%s: bogus m_pkthdr.len", __func__)); } return (top); nospace: m_freem(top); return (NULL); } /* * Concatenate mbuf chain n to m. * Both chains must be of the same type (e.g. MT_DATA). * Any m_pkthdr is not updated. */ void m_cat(struct mbuf *m, struct mbuf *n) { while (m->m_next) m = m->m_next; while (n) { if (!M_WRITABLE(m) || M_TRAILINGSPACE(m) < n->m_len) { /* just join the two chains */ m->m_next = n; return; } /* splat the data from one into the other */ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (u_int)n->m_len); m->m_len += n->m_len; n = m_free(n); } } /* * Concatenate two pkthdr mbuf chains. */ void m_catpkt(struct mbuf *m, struct mbuf *n) { M_ASSERTPKTHDR(m); M_ASSERTPKTHDR(n); m->m_pkthdr.len += n->m_pkthdr.len; m_demote(n, 1, 0); m_cat(m, n); } void m_adj(struct mbuf *mp, int req_len) { int len = req_len; struct mbuf *m; int count; if ((m = mp) == NULL) return; if (len >= 0) { /* * Trim from head. */ while (m != NULL && len > 0) { if (m->m_len <= len) { len -= m->m_len; m->m_len = 0; m = m->m_next; } else { m->m_len -= len; m->m_data += len; len = 0; } } if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= (req_len - len); } else { /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ len = -len; count = 0; for (;;) { count += m->m_len; if (m->m_next == (struct mbuf *)0) break; m = m->m_next; } if (m->m_len >= len) { m->m_len -= len; if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= len; return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ m = mp; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len = count; for (; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; if (m->m_next != NULL) { m_freem(m->m_next); m->m_next = NULL; } break; } count -= m->m_len; } } } /* * Rearange an mbuf chain so that len bytes are contiguous * and in the data area of an mbuf (so that mtod will work * for a structure of size len). Returns the resulting * mbuf chain on success, frees it and returns null on failure. * If there is room, it will add up to max_protohdr-len extra bytes to the * contiguous region in an attempt to avoid being called next time. */ struct mbuf * m_pullup(struct mbuf *n, int len) { struct mbuf *m; int count; int space; /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ if ((n->m_flags & M_EXT) == 0 && n->m_data + len < &n->m_dat[MLEN] && n->m_next) { if (n->m_len >= len) return (n); m = n; n = n->m_next; len -= m->m_len; } else { if (len > MHLEN) goto bad; m = m_get(M_NOWAIT, n->m_type); if (m == NULL) goto bad; if (n->m_flags & M_PKTHDR) m_move_pkthdr(m, n); } space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (u_int)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void) m_free(m); goto bad; } m->m_next = n; return (m); bad: m_freem(n); return (NULL); } /* * Like m_pullup(), except a new mbuf is always allocated, and we allow * the amount of empty space before the data in the new mbuf to be specified * (in the event that the caller expects to prepend later). */ struct mbuf * m_copyup(struct mbuf *n, int len, int dstoff) { struct mbuf *m; int count, space; if (len > (MHLEN - dstoff)) goto bad; m = m_get(M_NOWAIT, n->m_type); if (m == NULL) goto bad; if (n->m_flags & M_PKTHDR) m_move_pkthdr(m, n); m->m_data += dstoff; space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void) m_free(m); goto bad; } m->m_next = n; return (m); bad: m_freem(n); return (NULL); } /* * Partition an mbuf chain in two pieces, returning the tail -- * all but the first len0 bytes. In case of failure, it returns NULL and * attempts to restore the chain to its original state. * * Note that the resulting mbufs might be read-only, because the new * mbuf can end up sharing an mbuf cluster with the original mbuf if * the "breaking point" happens to lie within a cluster mbuf. Use the * M_WRITABLE() macro to check for this case. */ struct mbuf * m_split(struct mbuf *m0, int len0, int wait) { struct mbuf *m, *n; u_int len = len0, remain; MBUF_CHECKSLEEP(wait); for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; if (m == NULL) return (NULL); remain = m->m_len - len; if (m0->m_flags & M_PKTHDR && remain == 0) { n = m_gethdr(wait, m0->m_type); if (n == NULL) return (NULL); n->m_next = m->m_next; m->m_next = NULL; n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; return (n); } else if (m0->m_flags & M_PKTHDR) { n = m_gethdr(wait, m0->m_type); if (n == NULL) return (NULL); n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; if (m->m_flags & M_EXT) goto extpacket; if (remain > MHLEN) { /* m can't be the lead packet */ M_ALIGN(n, 0); n->m_next = m_split(m, len, wait); if (n->m_next == NULL) { (void) m_free(n); return (NULL); } else { n->m_len = 0; return (n); } } else M_ALIGN(n, remain); } else if (remain == 0) { n = m->m_next; m->m_next = NULL; return (n); } else { n = m_get(wait, m->m_type); if (n == NULL) return (NULL); M_ALIGN(n, remain); } extpacket: if (m->m_flags & M_EXT) { n->m_data = m->m_data + len; mb_dupcl(n, m); } else { bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); } n->m_len = remain; m->m_len = len; n->m_next = m->m_next; m->m_next = NULL; return (n); } /* * Routine to copy from device local memory into mbufs. * Note that `off' argument is offset into first mbuf of target chain from * which to begin copying the data to. */ struct mbuf * m_devget(char *buf, int totlen, int off, struct ifnet *ifp, void (*copy)(char *from, caddr_t to, u_int len)) { struct mbuf *m; struct mbuf *top = NULL, **mp = ⊤ int len; if (off < 0 || off > MHLEN) return (NULL); while (totlen > 0) { if (top == NULL) { /* First one, must be PKTHDR */ if (totlen + off >= MINCLSIZE) { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); len = MCLBYTES; } else { m = m_gethdr(M_NOWAIT, MT_DATA); len = MHLEN; /* Place initial small packet/header at end of mbuf */ if (m && totlen + off + max_linkhdr <= MLEN) { m->m_data += max_linkhdr; len -= max_linkhdr; } } if (m == NULL) return NULL; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = totlen; } else { if (totlen + off >= MINCLSIZE) { m = m_getcl(M_NOWAIT, MT_DATA, 0); len = MCLBYTES; } else { m = m_get(M_NOWAIT, MT_DATA); len = MLEN; } if (m == NULL) { m_freem(top); return NULL; } } if (off) { m->m_data += off; len -= off; off = 0; } m->m_len = len = min(totlen, len); if (copy) copy(buf, mtod(m, caddr_t), (u_int)len); else bcopy(buf, mtod(m, caddr_t), (u_int)len); buf += len; *mp = m; mp = &m->m_next; totlen -= len; } return (top); } /* * Copy data from a buffer back into the indicated mbuf chain, * starting "off" bytes from the beginning, extending the mbuf * chain if necessary. */ void m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp) { int mlen; struct mbuf *m = m0, *n; int totlen = 0; if (m0 == NULL) return; while (off > (mlen = m->m_len)) { off -= mlen; totlen += mlen; if (m->m_next == NULL) { n = m_get(M_NOWAIT, m->m_type); if (n == NULL) goto out; bzero(mtod(n, caddr_t), MLEN); n->m_len = min(MLEN, len + off); m->m_next = n; } m = m->m_next; } while (len > 0) { if (m->m_next == NULL && (len > m->m_len - off)) { m->m_len += min(len - (m->m_len - off), M_TRAILINGSPACE(m)); } mlen = min (m->m_len - off, len); bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); cp += mlen; len -= mlen; mlen += off; off = 0; totlen += mlen; if (len == 0) break; if (m->m_next == NULL) { n = m_get(M_NOWAIT, m->m_type); if (n == NULL) break; n->m_len = min(MLEN, len); m->m_next = n; } m = m->m_next; } out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) m->m_pkthdr.len = totlen; } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * Return 1 if able to complete the job; otherwise 0. */ int m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space, remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_get(M_NOWAIT, m->m_type); if (n == NULL) break; n->m_len = min(MLEN, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len, remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } /* * Apply function f to the data in an mbuf chain starting "off" bytes from * the beginning, continuing for "len" bytes. */ int m_apply(struct mbuf *m, int off, int len, int (*f)(void *, void *, u_int), void *arg) { u_int count; int rval; KASSERT(off >= 0, ("m_apply, negative off %d", off)); KASSERT(len >= 0, ("m_apply, negative len %d", len)); while (off > 0) { KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); count = min(m->m_len - off, len); rval = (*f)(arg, mtod(m, caddr_t) + off, count); if (rval) return (rval); len -= count; off = 0; m = m->m_next; } return (0); } /* * Return a pointer to mbuf/offset of location in mbuf chain. */ struct mbuf * m_getptr(struct mbuf *m, int loc, int *off) { while (loc >= 0) { /* Normal end of search. */ if (m->m_len > loc) { *off = loc; return (m); } else { loc -= m->m_len; if (m->m_next == NULL) { if (loc == 0) { /* Point at the end of valid data. */ *off = m->m_len; return (m); } return (NULL); } m = m->m_next; } } return (NULL); } void m_print(const struct mbuf *m, int maxlen) { int len; int pdata; const struct mbuf *m2; if (m == NULL) { printf("mbuf: %p\n", m); return; } if (m->m_flags & M_PKTHDR) len = m->m_pkthdr.len; else len = -1; m2 = m; while (m2 != NULL && (len == -1 || len)) { pdata = m2->m_len; if (maxlen != -1 && pdata > maxlen) pdata = maxlen; printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len, m2->m_next, m2->m_flags, "\20\20freelist\17skipfw" "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly" "\3eor\2pkthdr\1ext", pdata ? "" : "\n"); if (pdata) printf(", %*D\n", pdata, (u_char *)m2->m_data, "-"); if (len != -1) len -= m2->m_len; m2 = m2->m_next; } if (len > 0) printf("%d bytes unaccounted for.\n", len); return; } u_int m_fixhdr(struct mbuf *m0) { u_int len; len = m_length(m0, NULL); m0->m_pkthdr.len = len; return (len); } u_int m_length(struct mbuf *m0, struct mbuf **last) { struct mbuf *m; u_int len; len = 0; for (m = m0; m != NULL; m = m->m_next) { len += m->m_len; if (m->m_next == NULL) break; } if (last != NULL) *last = m; return (len); } /* * Defragment a mbuf chain, returning the shortest possible * chain of mbufs and clusters. If allocation fails and * this cannot be completed, NULL will be returned, but * the passed in chain will be unchanged. Upon success, * the original chain will be freed, and the new chain * will be returned. * * If a non-packet header is passed in, the original * mbuf (chain?) will be returned unharmed. */ struct mbuf * m_defrag(struct mbuf *m0, int how) { struct mbuf *m_new = NULL, *m_final = NULL; int progress = 0, length; MBUF_CHECKSLEEP(how); if (!(m0->m_flags & M_PKTHDR)) return (m0); m_fixhdr(m0); /* Needed sanity check */ #ifdef MBUF_STRESS_TEST if (m_defragrandomfailures) { int temp = arc4random() & 0xff; if (temp == 0xba) goto nospace; } #endif if (m0->m_pkthdr.len > MHLEN) m_final = m_getcl(how, MT_DATA, M_PKTHDR); else m_final = m_gethdr(how, MT_DATA); if (m_final == NULL) goto nospace; if (m_dup_pkthdr(m_final, m0, how) == 0) goto nospace; m_new = m_final; while (progress < m0->m_pkthdr.len) { length = m0->m_pkthdr.len - progress; if (length > MCLBYTES) length = MCLBYTES; if (m_new == NULL) { if (length > MLEN) m_new = m_getcl(how, MT_DATA, 0); else m_new = m_get(how, MT_DATA); if (m_new == NULL) goto nospace; } m_copydata(m0, progress, length, mtod(m_new, caddr_t)); progress += length; m_new->m_len = length; if (m_new != m_final) m_cat(m_final, m_new); m_new = NULL; } #ifdef MBUF_STRESS_TEST if (m0->m_next == NULL) m_defraguseless++; #endif m_freem(m0); m0 = m_final; #ifdef MBUF_STRESS_TEST m_defragpackets++; m_defragbytes += m0->m_pkthdr.len; #endif return (m0); nospace: #ifdef MBUF_STRESS_TEST m_defragfailure++; #endif if (m_final) m_freem(m_final); return (NULL); } /* * Defragment an mbuf chain, returning at most maxfrags separate * mbufs+clusters. If this is not possible NULL is returned and * the original mbuf chain is left in it's present (potentially * modified) state. We use two techniques: collapsing consecutive * mbufs and replacing consecutive mbufs by a cluster. * * NB: this should really be named m_defrag but that name is taken */ struct mbuf * m_collapse(struct mbuf *m0, int how, int maxfrags) { struct mbuf *m, *n, *n2, **prev; u_int curfrags; /* * Calculate the current number of frags. */ curfrags = 0; for (m = m0; m != NULL; m = m->m_next) curfrags++; /* * First, try to collapse mbufs. Note that we always collapse * towards the front so we don't need to deal with moving the * pkthdr. This may be suboptimal if the first mbuf has much * less data than the following. */ m = m0; again: for (;;) { n = m->m_next; if (n == NULL) break; if (M_WRITABLE(m) && n->m_len < M_TRAILINGSPACE(m)) { bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, n->m_len); m->m_len += n->m_len; m->m_next = n->m_next; m_free(n); if (--curfrags <= maxfrags) return m0; } else m = n; } KASSERT(maxfrags > 1, ("maxfrags %u, but normal collapse failed", maxfrags)); /* * Collapse consecutive mbufs to a cluster. */ prev = &m0->m_next; /* NB: not the first mbuf */ while ((n = *prev) != NULL) { if ((n2 = n->m_next) != NULL && n->m_len + n2->m_len < MCLBYTES) { m = m_getcl(how, MT_DATA, 0); if (m == NULL) goto bad; bcopy(mtod(n, void *), mtod(m, void *), n->m_len); bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len, n2->m_len); m->m_len = n->m_len + n2->m_len; m->m_next = n2->m_next; *prev = m; m_free(n); m_free(n2); if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */ return m0; /* * Still not there, try the normal collapse * again before we allocate another cluster. */ goto again; } prev = &n->m_next; } /* * No place where we can collapse to a cluster; punt. * This can occur if, for example, you request 2 frags * but the packet requires that both be clusters (we * never reallocate the first mbuf to avoid moving the * packet header). */ bad: return NULL; } #ifdef MBUF_STRESS_TEST /* * Fragment an mbuf chain. There's no reason you'd ever want to do * this in normal usage, but it's great for stress testing various * mbuf consumers. * * If fragmentation is not possible, the original chain will be * returned. * * Possible length values: * 0 no fragmentation will occur * > 0 each fragment will be of the specified length * -1 each fragment will be the same random value in length * -2 each fragment's length will be entirely random * (Random values range from 1 to 256) */ struct mbuf * m_fragment(struct mbuf *m0, int how, int length) { struct mbuf *m_new = NULL, *m_final = NULL; int progress = 0; if (!(m0->m_flags & M_PKTHDR)) return (m0); if ((length == 0) || (length < -2)) return (m0); m_fixhdr(m0); /* Needed sanity check */ m_final = m_getcl(how, MT_DATA, M_PKTHDR); if (m_final == NULL) goto nospace; if (m_dup_pkthdr(m_final, m0, how) == 0) goto nospace; m_new = m_final; if (length == -1) length = 1 + (arc4random() & 255); while (progress < m0->m_pkthdr.len) { int fraglen; if (length > 0) fraglen = length; else fraglen = 1 + (arc4random() & 255); if (fraglen > m0->m_pkthdr.len - progress) fraglen = m0->m_pkthdr.len - progress; if (fraglen > MCLBYTES) fraglen = MCLBYTES; if (m_new == NULL) { m_new = m_getcl(how, MT_DATA, 0); if (m_new == NULL) goto nospace; } m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t)); progress += fraglen; m_new->m_len = fraglen; if (m_new != m_final) m_cat(m_final, m_new); m_new = NULL; } m_freem(m0); m0 = m_final; return (m0); nospace: if (m_final) m_freem(m_final); /* Return the original chain on failure */ return (m0); } #endif /* * Copy the contents of uio into a properly sized mbuf chain. */ struct mbuf * m_uiotombuf(struct uio *uio, int how, int len, int align, int flags) { struct mbuf *m, *mb; int error, length; ssize_t total; int progress = 0; /* * len can be zero or an arbitrary large value bound by * the total data supplied by the uio. */ if (len > 0) total = min(uio->uio_resid, len); else total = uio->uio_resid; /* * The smallest unit returned by m_getm2() is a single mbuf * with pkthdr. We can't align past it. */ if (align >= MHLEN) return (NULL); /* * Give us the full allocation or nothing. * If len is zero return the smallest empty mbuf. */ m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags); if (m == NULL) return (NULL); m->m_data += align; /* Fill all mbufs with uio data and update header information. */ for (mb = m; mb != NULL; mb = mb->m_next) { length = min(M_TRAILINGSPACE(mb), total - progress); error = uiomove(mtod(mb, void *), length, uio); if (error) { m_freem(m); return (NULL); } mb->m_len = length; progress += length; if (flags & M_PKTHDR) m->m_pkthdr.len += length; } KASSERT(progress == total, ("%s: progress != total", __func__)); return (m); } /* * Copy an mbuf chain into a uio limited by len if set. */ int m_mbuftouio(struct uio *uio, struct mbuf *m, int len) { int error, length, total; int progress = 0; if (len > 0) total = min(uio->uio_resid, len); else total = uio->uio_resid; /* Fill the uio with data from the mbufs. */ for (; m != NULL; m = m->m_next) { length = min(m->m_len, total - progress); error = uiomove(mtod(m, void *), length, uio); if (error) return (error); progress += length; } return (0); } /* * Create a writable copy of the mbuf chain. While doing this * we compact the chain with a goal of producing a chain with * at most two mbufs. The second mbuf in this chain is likely * to be a cluster. The primary purpose of this work is to create * a writable packet for encryption, compression, etc. The * secondary goal is to linearize the data so the data can be * passed to crypto hardware in the most efficient manner possible. */ struct mbuf * m_unshare(struct mbuf *m0, int how) { struct mbuf *m, *mprev; struct mbuf *n, *mfirst, *mlast; int len, off; mprev = NULL; for (m = m0; m != NULL; m = mprev->m_next) { /* * Regular mbufs are ignored unless there's a cluster * in front of it that we can use to coalesce. We do * the latter mainly so later clusters can be coalesced * also w/o having to handle them specially (i.e. convert * mbuf+cluster -> cluster). This optimization is heavily * influenced by the assumption that we're running over * Ethernet where MCLBYTES is large enough that the max * packet size will permit lots of coalescing into a * single cluster. This in turn permits efficient * crypto operations, especially when using hardware. */ if ((m->m_flags & M_EXT) == 0) { if (mprev && (mprev->m_flags & M_EXT) && m->m_len <= M_TRAILINGSPACE(mprev)) { /* XXX: this ignores mbuf types */ memcpy(mtod(mprev, caddr_t) + mprev->m_len, mtod(m, caddr_t), m->m_len); mprev->m_len += m->m_len; mprev->m_next = m->m_next; /* unlink from chain */ m_free(m); /* reclaim mbuf */ #if 0 newipsecstat.ips_mbcoalesced++; #endif } else { mprev = m; } continue; } /* * Writable mbufs are left alone (for now). */ if (M_WRITABLE(m)) { mprev = m; continue; } /* * Not writable, replace with a copy or coalesce with * the previous mbuf if possible (since we have to copy * it anyway, we try to reduce the number of mbufs and * clusters so that future work is easier). */ KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); /* NB: we only coalesce into a cluster or larger */ if (mprev != NULL && (mprev->m_flags & M_EXT) && m->m_len <= M_TRAILINGSPACE(mprev)) { /* XXX: this ignores mbuf types */ memcpy(mtod(mprev, caddr_t) + mprev->m_len, mtod(m, caddr_t), m->m_len); mprev->m_len += m->m_len; mprev->m_next = m->m_next; /* unlink from chain */ m_free(m); /* reclaim mbuf */ #if 0 newipsecstat.ips_clcoalesced++; #endif continue; } /* * Allocate new space to hold the copy and copy the data. * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by * splitting them into clusters. We could just malloc a * buffer and make it external but too many device drivers * don't know how to break up the non-contiguous memory when * doing DMA. */ n = m_getcl(how, m->m_type, m->m_flags); if (n == NULL) { m_freem(m0); return (NULL); } if (m->m_flags & M_PKTHDR) { KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR", __func__, m0, m)); m_move_pkthdr(n, m); } len = m->m_len; off = 0; mfirst = n; mlast = NULL; for (;;) { int cc = min(len, MCLBYTES); memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); n->m_len = cc; if (mlast != NULL) mlast->m_next = n; mlast = n; #if 0 newipsecstat.ips_clcopied++; #endif len -= cc; if (len <= 0) break; off += cc; n = m_getcl(how, m->m_type, m->m_flags); if (n == NULL) { m_freem(mfirst); m_freem(m0); return (NULL); } } n->m_next = m->m_next; if (mprev == NULL) m0 = mfirst; /* new head of chain */ else mprev->m_next = mfirst; /* replace old mbuf */ m_free(m); /* release old mbuf */ mprev = mfirst; } return (m0); } #ifdef MBUF_PROFILING #define MP_BUCKETS 32 /* don't just change this as things may overflow.*/ struct mbufprofile { uintmax_t wasted[MP_BUCKETS]; uintmax_t used[MP_BUCKETS]; uintmax_t segments[MP_BUCKETS]; } mbprof; #define MP_MAXDIGITS 21 /* strlen("16,000,000,000,000,000,000") == 21 */ #define MP_NUMLINES 6 #define MP_NUMSPERLINE 16 #define MP_EXTRABYTES 64 /* > strlen("used:\nwasted:\nsegments:\n") */ /* work out max space needed and add a bit of spare space too */ #define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE) #define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES) char mbprofbuf[MP_BUFSIZE]; void m_profile(struct mbuf *m) { int segments = 0; int used = 0; int wasted = 0; while (m) { segments++; used += m->m_len; if (m->m_flags & M_EXT) { wasted += MHLEN - sizeof(m->m_ext) + m->m_ext.ext_size - m->m_len; } else { if (m->m_flags & M_PKTHDR) wasted += MHLEN - m->m_len; else wasted += MLEN - m->m_len; } m = m->m_next; } /* be paranoid.. it helps */ if (segments > MP_BUCKETS - 1) segments = MP_BUCKETS - 1; if (used > 100000) used = 100000; if (wasted > 100000) wasted = 100000; /* store in the appropriate bucket */ /* don't bother locking. if it's slightly off, so what? */ mbprof.segments[segments]++; mbprof.used[fls(used)]++; mbprof.wasted[fls(wasted)]++; } static void mbprof_textify(void) { int offset; char *c; uint64_t *p; p = &mbprof.wasted[0]; c = mbprofbuf; offset = snprintf(c, MP_MAXLINE + 10, "wasted:\n" "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #ifdef BIG_ARRAY p = &mbprof.wasted[16]; c += offset; offset = snprintf(c, MP_MAXLINE, "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #endif p = &mbprof.used[0]; c += offset; offset = snprintf(c, MP_MAXLINE + 10, "used:\n" "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #ifdef BIG_ARRAY p = &mbprof.used[16]; c += offset; offset = snprintf(c, MP_MAXLINE, "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #endif p = &mbprof.segments[0]; c += offset; offset = snprintf(c, MP_MAXLINE + 10, "segments:\n" "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #ifdef BIG_ARRAY p = &mbprof.segments[16]; c += offset; offset = snprintf(c, MP_MAXLINE, "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %jju", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #endif } static int mbprof_handler(SYSCTL_HANDLER_ARGS) { int error; mbprof_textify(); error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1); return (error); } static int mbprof_clr_handler(SYSCTL_HANDLER_ARGS) { int clear, error; clear = 0; error = sysctl_handle_int(oidp, &clear, 0, req); if (error || !req->newptr) return (error); if (clear) { bzero(&mbprof, sizeof(mbprof)); } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD, NULL, 0, mbprof_handler, "A", "mbuf profiling statistics"); SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW, NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics"); #endif Index: projects/clang380-import/sys/kern/uipc_sockbuf.c =================================================================== --- projects/clang380-import/sys/kern/uipc_sockbuf.c (revision 293686) +++ projects/clang380-import/sys/kern/uipc_sockbuf.c (revision 293687) @@ -1,1314 +1,1331 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include #include /* for aio_swake proto */ #include #include #include #include #include #include #include #include #include #include #include #include /* * Function pointer set by the AIO routines so that the socket buffer code * can call back into the AIO module if it is loaded. */ void (*aio_swake)(struct socket *, struct sockbuf *); /* * Primitive routines for operating on socket buffers */ u_long sb_max = SB_MAX; u_long sb_max_adj = (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); static void sbflush_internal(struct sockbuf *sb); /* + * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. + */ +static void +sbm_clrprotoflags(struct mbuf *m, int flags) +{ + int mask; + + mask = ~M_PROTOFLAGS; + if (flags & PRUS_NOTREADY) + mask |= M_NOTREADY; + while (m) { + m->m_flags &= mask; + m = m->m_next; + } +} + +/* * Mark ready "count" mbufs starting with "m". */ int sbready(struct sockbuf *sb, struct mbuf *m, int count) { u_int blocker; SOCKBUF_LOCK_ASSERT(sb); KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; for (int i = 0; i < count; i++, m = m->m_next) { KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); m->m_flags &= ~(M_NOTREADY | blocker); if (blocker) sb->sb_acc += m->m_len; } if (!blocker) return (EINPROGRESS); /* This one was blocking all the queue. */ for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { KASSERT(m->m_flags & M_BLOCKED, ("%s: m %p !M_BLOCKED", __func__, m)); m->m_flags &= ~M_BLOCKED; sb->sb_acc += m->m_len; } sb->sb_fnrdy = m; return (0); } /* * Adjust sockbuf state reflecting allocation of m. */ void sballoc(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) { if (m->m_flags & M_NOTREADY) sb->sb_fnrdy = m; else sb->sb_acc += m->m_len; } else m->m_flags |= M_BLOCKED; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl += m->m_len; sb->sb_mbcnt += MSIZE; sb->sb_mcnt += 1; if (m->m_flags & M_EXT) { sb->sb_mbcnt += m->m_ext.ext_size; sb->sb_ccnt += 1; } } /* * Adjust sockbuf state reflecting freeing of m. */ void sbfree(struct sockbuf *sb, struct mbuf *m) { #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ SOCKBUF_LOCK_ASSERT(sb); #endif sb->sb_ccc -= m->m_len; if (!(m->m_flags & M_NOTAVAIL)) sb->sb_acc -= m->m_len; if (m == sb->sb_fnrdy) { struct mbuf *n; KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); n = m->m_next; while (n != NULL && !(n->m_flags & M_NOTREADY)) { n->m_flags &= ~M_BLOCKED; sb->sb_acc += n->m_len; n = n->m_next; } sb->sb_fnrdy = n; } if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= m->m_len; sb->sb_mbcnt -= MSIZE; sb->sb_mcnt -= 1; if (m->m_flags & M_EXT) { sb->sb_mbcnt -= m->m_ext.ext_size; sb->sb_ccnt -= 1; } if (sb->sb_sndptr == m) { sb->sb_sndptr = NULL; sb->sb_sndptroff = 0; } if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= m->m_len; } /* * Socantsendmore indicates that no more data will be sent on the socket; it * would normally be applied to a socket when the user informs the system * that no more data is to be sent, by the protocol code (in case * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be * received, and will normally be applied to the socket by a protocol when it * detects that the peer will send no more data. Data queued for reading in * the socket may yet be read. */ void socantsendmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_snd); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); } void socantsendmore(struct socket *so) { SOCKBUF_LOCK(&so->so_snd); socantsendmore_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); } void socantrcvmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); so->so_rcv.sb_state |= SBS_CANTRCVMORE; sorwakeup_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); } void socantrcvmore(struct socket *so) { SOCKBUF_LOCK(&so->so_rcv); socantrcvmore_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); } /* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags |= SB_WAIT; return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0)); } int sblock(struct sockbuf *sb, int flags) { KASSERT((flags & SBL_VALID) == flags, ("sblock: flags invalid (0x%x)", flags)); if (flags & SBL_WAIT) { if ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR)) { sx_xlock(&sb->sb_sx); return (0); } return (sx_xlock_sig(&sb->sb_sx)); } else { if (sx_try_xlock(&sb->sb_sx) == 0) return (EWOULDBLOCK); return (0); } } void sbunlock(struct sockbuf *sb) { sx_xunlock(&sb->sb_sx); } /* * Wakeup processes waiting on a socket buffer. Do asynchronous notification * via SIGIO if the socket has the SS_ASYNC flag set. * * Called with the socket buffer lock held; will release the lock by the end * of the function. This allows the caller to acquire the socket buffer lock * while testing for the need for various sorts of wakeup and hold it through * to the point where it's no longer required. We currently hold the lock * through calls out to other subsystems (with the exception of kqueue), and * then release it to avoid lock order issues. It's not clear that's * correct. */ void sowakeup(struct socket *so, struct sockbuf *sb) { int ret; SOCKBUF_LOCK_ASSERT(sb); selwakeuppri(&sb->sb_sel, PSOCK); if (!SEL_WAITING(&sb->sb_sel)) sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_acc); } KNOTE_LOCKED(&sb->sb_sel.si_note, 0); if (sb->sb_upcall != NULL) { ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); if (ret == SU_ISCONNECTED) { KASSERT(sb == &so->so_rcv, ("SO_SND upcall returned SU_ISCONNECTED")); soupcall_clear(so, SO_RCV); } } else ret = SU_OK; if (sb->sb_flags & SB_AIO) aio_swake(so, sb); SOCKBUF_UNLOCK(sb); if (ret == SU_ISCONNECTED) soisconnected(so); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); } /* * Socket buffer (struct sockbuf) utility routines. * * Each socket contains two socket buffers: one for sending data and one for * receiving data. Each buffer contains a queue of mbufs, information about * the number of mbufs and amount of data in the queue, and other fields * allowing select() statements and notification on data availability to be * implemented. * * Data stored in a socket buffer is maintained as a list of records. Each * record is a list of mbufs chained together with the m_next field. Records * are chained together with the m_nextpkt field. The upper level routine * soreceive() expects the following conventions to be observed when placing * information in the receive buffer: * * 1. If the protocol requires each message be preceded by the sender's name, * then a record containing that name must be present before any * associated data (mbuf's must be of type MT_SONAME). * 2. If the protocol supports the exchange of ``access rights'' (really just * additional data associated with the message), and there are ``rights'' * to be received, then a record containing this data should be present * (mbuf's must be of type MT_RIGHTS). * 3. If a name or rights record exists, then it must be followed by a data * record, perhaps of zero length. * * Before using a new socket structure it is first necessary to reserve * buffer space to the socket, by calling sbreserve(). This should commit * some of the available buffer space in the system buffer pool for the * socket (currently, it does nothing but enforce limits). The space should * be released by calling sbrelease() when the socket is destroyed. */ int soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { struct thread *td = curthread; SOCKBUF_LOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) goto bad; if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; if (so->so_snd.sb_lowat == 0) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_snd); return (0); bad2: sbrelease_locked(&so->so_snd, so); bad: SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_snd); return (ENOBUFS); } static int sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) { int error = 0; u_long tmp_sb_max = sb_max; error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); if (error || !req->newptr) return (error); if (tmp_sb_max < MSIZE + MCLBYTES) return (EINVAL); sb_max = tmp_sb_max; sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); return (0); } /* * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td) { rlim_t sbsize_limit; SOCKBUF_LOCK_ASSERT(sb); /* * When a thread is passed, we take into account the thread's socket * buffer size limit. The caller will generally pass curthread, but * in the TCP input path, NULL will be passed to indicate that no * appropriate thread resource limits are available. In that case, * we don't apply a process limit. */ if (cc > sb_max_adj) return (0); if (td != NULL) { sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); } else sbsize_limit = RLIM_INFINITY; if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) return (0); sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (1); } int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td) { int error; SOCKBUF_LOCK(sb); error = sbreserve_locked(sb, cc, so, td); SOCKBUF_UNLOCK(sb); return (error); } /* * Free mbufs held by a socket, and reserved mbuf space. */ void sbrelease_internal(struct sockbuf *sb, struct socket *so) { sbflush_internal(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); sb->sb_mbmax = 0; } void sbrelease_locked(struct sockbuf *sb, struct socket *so) { SOCKBUF_LOCK_ASSERT(sb); sbrelease_internal(sb, so); } void sbrelease(struct sockbuf *sb, struct socket *so) { SOCKBUF_LOCK(sb); sbrelease_locked(sb, so); SOCKBUF_UNLOCK(sb); } void sbdestroy(struct sockbuf *sb, struct socket *so) { sbrelease_internal(sb, so); } /* * Routines to add and remove data from an mbuf queue. * * The routines sbappend() or sbappendrecord() are normally called to append * new mbufs to a socket buffer, after checking that adequate space is * available, comparing the function sbspace() with the amount of data to be * added. sbappendrecord() differs from sbappend() in that data supplied is * treated as the beginning of a new record. To place a sender's address, * optional access rights, and data in a socket receive buffer, * sbappendaddr() should be used. To place access rights and data in a * socket receive buffer, sbappendrights() should be used. In either case, * the new data begins a new record. Note that unlike sbappend() and * sbappendrecord(), these routines check for the caller that there will be * enough space to store the data. Each fails if there is not enough space, * or if it cannot find mbufs to store additional information in. * * Reliable protocols may use the socket send buffer to hold data awaiting * acknowledgement. Data is normally copied from a socket send buffer in a * protocol with m_copy for output to a peer, and then removing the data from * the socket buffer with sbdrop() or sbdroprecord() when the data is * acknowledged by the peer. */ #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; if (m != sb->sb_lastrecord) { printf("%s: sb_mb %p sb_lastrecord %p last %p\n", __func__, sb->sb_mb, sb->sb_lastrecord, m); printf("packet chain:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) printf("\t%p\n", m); panic("%s from %s:%u", __func__, file, line); } } void sblastmbufchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; while (m && m->m_next) m = m->m_next; if (m != sb->sb_mbtail) { printf("%s: sb_mb %p sb_mbtail %p last %p\n", __func__, sb->sb_mb, sb->sb_mbtail, m); printf("packet tree:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { printf("\t"); for (n = m; n != NULL; n = n->m_next) printf("%p ", n); printf("\n"); } panic("%s from %s:%u", __func__, file, line); } } #endif /* SOCKBUF_DEBUG */ #define SBLINKRECORD(sb, m0) do { \ SOCKBUF_LOCK_ASSERT(sb); \ if ((sb)->sb_lastrecord != NULL) \ (sb)->sb_lastrecord->m_nextpkt = (m0); \ else \ (sb)->sb_mb = (m0); \ (sb)->sb_lastrecord = (m0); \ } while (/*CONSTCOND*/0) /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void -sbappend_locked(struct sockbuf *sb, struct mbuf *m) +sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) { struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); if (m == 0) return; - m_clrprotoflags(m); + sbm_clrprotoflags(m, flags); SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * XXX Would like to simply use sb_mbtail here, but * XXX I need to verify that I won't miss an EOR that * XXX way. */ if ((n = sb->sb_lastrecord) != NULL) { do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * If this is the first record in the socket buffer, * it's also the last record. */ sb->sb_lastrecord = m; } } sbcompress(sb, m, n); SBLASTRECORDCHK(sb); } /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void -sbappend(struct sockbuf *sb, struct mbuf *m) +sbappend(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); - sbappend_locked(sb, m); + sbappend_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK_ASSERT(sb); KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); SBLASTMBUFCHK(sb); /* Remove all packet headers and mbuf tags to get a pure data chain. */ m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); sbcompress(sb, m, sb->sb_mbtail); sb->sb_lastrecord = sb->sb_mb; SBLASTRECORDCHK(sb); } /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappendstream_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } #ifdef SOCKBUF_DEBUG void sbcheck(struct sockbuf *sb, const char *file, int line) { struct mbuf *m, *n, *fnrdy; u_long acc, ccc, mbcnt; SOCKBUF_LOCK_ASSERT(sb); acc = ccc = mbcnt = 0; fnrdy = NULL; for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { if (m->m_len == 0) { printf("sb %p empty mbuf %p\n", sb, m); goto fail; } if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { if (m != sb->sb_fnrdy) { printf("sb %p: fnrdy %p != m %p\n", sb, sb->sb_fnrdy, m); goto fail; } fnrdy = m; } if (fnrdy) { if (!(m->m_flags & M_NOTAVAIL)) { printf("sb %p: fnrdy %p, m %p is avail\n", sb, sb->sb_fnrdy, m); goto fail; } } else acc += m->m_len; ccc += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } } if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); goto fail; } return; fail: panic("%s from %s:%u", __func__, file, line); } #endif /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); if (m0 == 0) return; m_clrprotoflags(m0); /* * Put the first mbuf on the queue. Note this permits zero length * records. */ sballoc(sb, m0); SBLASTRECORDCHK(sb); SBLINKRECORD(sb, m0); sb->sb_mbtail = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } /* always call sbcompress() so it can do SBLASTMBUFCHK() */ sbcompress(sb, m, m0); } /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord(struct sockbuf *sb, struct mbuf *m0) { SOCKBUF_LOCK(sb); sbappendrecord_locked(sb, m0); SOCKBUF_UNLOCK(sb); } /* Helper routine that appends data, control, and address to a sockbuf. */ static int sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) { struct mbuf *m, *n, *nlast; #if MSIZE <= 256 if (asa->sa_len > MLEN) return (0); #endif m = m_get(M_NOWAIT, MT_SONAME); if (m == NULL) return (0); m->m_len = asa->sa_len; bcopy(asa, mtod(m, caddr_t), asa->sa_len); if (m0) m_clrprotoflags(m0); if (ctrl_last) ctrl_last->m_next = m0; /* concatenate data to control */ else control = m0; m->m_next = control; for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); sballoc(sb, n); nlast = n; SBLINKRECORD(sb, m); sb->sb_mbtail = nlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); return (1); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; int space = asa->sa_len; SOCKBUF_LOCK_ASSERT(sb); if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr_locked"); if (m0) space += m0->m_pkthdr.len; space += m_length(control, &ctrl_last); if (space > sbspace(sb)) return (0); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if insufficient mbufs. Does not validate space * on the receiving sockbuf. */ int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; SOCKBUF_LOCK_ASSERT(sb); ctrl_last = (control == NULL) ? NULL : m_last(control); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendaddr_locked(sb, asa, m0, control); SOCKBUF_UNLOCK(sb); return (retval); } int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { struct mbuf *m, *n, *mlast; int space; SOCKBUF_LOCK_ASSERT(sb); if (control == 0) panic("sbappendcontrol_locked"); space = m_length(control, &n) + m_length(m0, NULL); if (space > sbspace(sb)) return (0); m_clrprotoflags(m0); n->m_next = m0; /* concatenate data to control */ SBLASTRECORDCHK(sb); for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); sballoc(sb, m); mlast = m; SBLINKRECORD(sb, control); sb->sb_mbtail = mlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); return (1); } int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendcontrol_locked(sb, m0, control); SOCKBUF_UNLOCK(sb); return (retval); } /* * Append the data in mbuf chain (m) into the socket buffer sb following mbuf * (n). If (n) is NULL, the buffer is presumed empty. * * When the data is compressed, mbufs in the chain may be handled in one of * three ways: * * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no * record boundary, and no change in data type). * * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into * an mbuf already in the socket buffer. This can occur if an * appropriate mbuf exists, there is room, both mbufs are not marked as * not ready, and no merging of data types will occur. * * (3) The mbuf may be appended to the end of the existing mbuf chain. * * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as * end-of-record. */ void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { int eor = 0; struct mbuf *o; SOCKBUF_LOCK_ASSERT(sb); while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { if (sb->sb_lastrecord == m) sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } if (n && (n->m_flags & M_EOR) == 0 && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && !(m->m_flags & M_NOTREADY) && !(n->m_flags & M_NOTREADY) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && n->m_type == m->m_type) { bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, (unsigned)m->m_len); n->m_len += m->m_len; sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) sb->sb_acc += m->m_len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) /* XXX: Probably don't need.*/ sb->sb_ctl += m->m_len; m = m_free(m); continue; } if (n) n->m_next = m; else sb->sb_mb = m; sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; n->m_next = 0; } if (eor) { KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); n->m_flags |= eor; } SBLASTMBUFCHK(sb); } /* * Free all mbufs in a sockbuf. Check that all resources are reclaimed. */ static void sbflush_internal(struct sockbuf *sb) { while (sb->sb_mbcnt) { /* * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: * we would loop forever. Panic instead. */ if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) break; m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); } KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, ("%s: ccc %u mb %p mbcnt %u", __func__, sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); } void sbflush_locked(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sbflush_internal(sb); } void sbflush(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbflush_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Cut data from (the front of) a sockbuf. */ static struct mbuf * sbcut_internal(struct sockbuf *sb, int len) { struct mbuf *m, *next, *mfree; next = (m = sb->sb_mb) ? m->m_nextpkt : 0; mfree = NULL; while (len > 0) { if (m == NULL) { KASSERT(next, ("%s: no next, len %d", __func__, len)); m = next; next = m->m_nextpkt; } if (m->m_len > len) { KASSERT(!(m->m_flags & M_NOTAVAIL), ("%s: m %p M_NOTAVAIL", __func__, m)); m->m_len -= len; m->m_data += len; sb->sb_ccc -= len; sb->sb_acc -= len; if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= len; break; } len -= m->m_len; sbfree(sb, m); /* * Do not put M_NOTREADY buffers to the free list, they * are referenced from outside. */ if (m->m_flags & M_NOTREADY) m = m->m_next; else { struct mbuf *n; n = m->m_next; m->m_next = mfree; mfree = m; m = n; } } /* * Free any zero-length mbufs from the buffer. * For SOCK_DGRAM sockets such mbufs represent empty records. * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, * when sosend_generic() needs to send only control data. */ while (m && m->m_len == 0) { struct mbuf *n; sbfree(sb, m); n = m->m_next; m->m_next = mfree; mfree = m; m = n; } if (m) { sb->sb_mb = m; m->m_nextpkt = next; } else sb->sb_mb = next; /* * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure * sb_lastrecord is up-to-date if we dropped part of the last record. */ m = sb->sb_mb; if (m == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (m->m_nextpkt == NULL) { sb->sb_lastrecord = m; } return (mfree); } /* * Drop data from (the front of) a sockbuf. */ void sbdrop_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); m_freem(sbcut_internal(sb, len)); } /* * Drop data from (the front of) a sockbuf, * and return it to caller. */ struct mbuf * sbcut_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); return (sbcut_internal(sb, len)); } void sbdrop(struct sockbuf *sb, int len) { struct mbuf *mfree; SOCKBUF_LOCK(sb); mfree = sbcut_internal(sb, len); SOCKBUF_UNLOCK(sb); m_freem(mfree); } /* * Maintain a pointer and offset pair into the socket buffer mbuf chain to * avoid traversal of the entire socket buffer for larger offsets. */ struct mbuf * sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff) { struct mbuf *m, *ret; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__)); KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__)); /* * Is off below stored offset? Happens on retransmits. * Just return, we can't help here. */ if (sb->sb_sndptroff > off) { *moff = off; return (sb->sb_mb); } /* Return closest mbuf in chain for current offset. */ *moff = off - sb->sb_sndptroff; m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb; if (*moff == m->m_len) { *moff = 0; sb->sb_sndptroff += m->m_len; m = ret = m->m_next; KASSERT(ret->m_len > 0, ("mbuf %p in sockbuf %p chain has no valid data", ret, sb)); } /* Advance by len to be as close as possible for the next transmit. */ for (off = off - sb->sb_sndptroff + len - 1; off > 0 && m != NULL && off >= m->m_len; m = m->m_next) { sb->sb_sndptroff += m->m_len; off -= m->m_len; } if (off > 0 && m == NULL) panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret); sb->sb_sndptr = m; return (ret); } /* * Return the first mbuf and the mbuf data offset for the provided * send offset without changing the "sb_sndptroff" field. */ struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) { struct mbuf *m; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); /* * If the "off" is below the stored offset, which happens on * retransmits, just use "sb_mb": */ if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { m = sb->sb_mb; } else { m = sb->sb_sndptr; off -= sb->sb_sndptroff; } while (off > 0 && m != NULL) { if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } *moff = off; return (m); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord_locked(struct sockbuf *sb) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; do { sbfree(sb, m); m = m_free(m); } while (m); } SB_EMPTY_FIXUP(sb); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbdroprecord_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Create a "control" mbuf containing the specified data with the specified * type for presentation on a socket buffer. */ struct mbuf * sbcreatecontrol(caddr_t p, int size, int type, int level) { struct cmsghdr *cp; struct mbuf *m; if (CMSG_SPACE((u_int)size) > MCLBYTES) return ((struct mbuf *) NULL); if (CMSG_SPACE((u_int)size) > MLEN) m = m_getcl(M_NOWAIT, MT_CONTROL, 0); else m = m_get(M_NOWAIT, MT_CONTROL); if (m == NULL) return ((struct mbuf *) NULL); cp = mtod(m, struct cmsghdr *); m->m_len = 0; KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), ("sbcreatecontrol: short mbuf")); /* * Don't leave the padding between the msg header and the * cmsg data and the padding after the cmsg data un-initialized. */ bzero(cp, CMSG_SPACE((u_int)size)); if (p != NULL) (void)memcpy(CMSG_DATA(cp), p, size); m->m_len = CMSG_SPACE(size); cp->cmsg_len = CMSG_LEN(size); cp->cmsg_level = level; cp->cmsg_type = type; return (m); } /* * This does the same for socket buffers that sotoxsocket does for sockets: * generate an user-format data structure describing the socket buffer. Note * that the xsockbuf structure, since it is always embedded in a socket, does * not include a self pointer nor a length. We make this entry point public * in case some other mechanism needs it. */ void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mcnt = sb->sb_mcnt; xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, "Socket buffer size waste factor"); Index: projects/clang380-import/sys/kern/uipc_syscalls.c =================================================================== --- projects/clang380-import/sys/kern/uipc_syscalls.c (revision 293686) +++ projects/clang380-import/sys/kern/uipc_syscalls.c (revision 293687) @@ -1,2570 +1,2748 @@ /*- * Copyright (c) 1982, 1986, 1989, 1990, 1993 * The Regents of the University of California. All rights reserved. * * sendfile(2) and related extensions: * Copyright (c) 1998, David Greenman. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_compat.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef COMPAT_FREEBSD32 #include #endif #include #include #include #include #include #include #include #include #include #include #include /* * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC * and SOCK_NONBLOCK. */ #define ACCEPT4_INHERIT 0x1 #define ACCEPT4_COMPAT 0x2 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); static int accept1(struct thread *td, int s, struct sockaddr *uname, socklen_t *anamelen, int flags); static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); static int getsockname1(struct thread *td, struct getsockname_args *uap, int compat); static int getpeername1(struct thread *td, struct getpeername_args *uap, int compat); counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; -/* - * sendfile(2)-related variables and associated sysctls - */ -static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0, - "sendfile(2) tunables"); -static int sfreadahead = 1; -SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW, - &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks"); - static void sfstat_init(const void *unused) { COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), M_WAITOK); } SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); static int sfstat_sysctl(SYSCTL_HANDLER_ARGS) { struct sfstat s; COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); if (req->newptr) COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); return (SYSCTL_OUT(req, &s, sizeof(s))); } SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); /* * Convert a user file descriptor to a kernel file entry and check if required * capability rights are present. * A reference on the file entry is held upon returning. */ int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp, u_int *fflagp) { struct file *fp; int error; error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); return (ENOTSOCK); } if (fflagp != NULL) *fflagp = fp->f_flag; *fpp = fp; return (0); } /* * System call interface to the socket abstraction. */ #if defined(COMPAT_43) #define COMPAT_OLDSOCK #endif int sys_socket(td, uap) struct thread *td; struct socket_args /* { int domain; int type; int protocol; } */ *uap; { struct socket *so; struct file *fp; int fd, error, type, oflag, fflag; AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); type = uap->type; oflag = 0; fflag = 0; if ((type & SOCK_CLOEXEC) != 0) { type &= ~SOCK_CLOEXEC; oflag |= O_CLOEXEC; } if ((type & SOCK_NONBLOCK) != 0) { type &= ~SOCK_NONBLOCK; fflag |= FNONBLOCK; } #ifdef MAC error = mac_socket_check_create(td->td_ucred, uap->domain, type, uap->protocol); if (error != 0) return (error); #endif error = falloc(td, &fp, &fd, oflag); if (error != 0) return (error); /* An extra reference on `fp' has been held for us by falloc(). */ error = socreate(uap->domain, &so, type, uap->protocol, td->td_ucred, td); if (error != 0) { fdclose(td, fp, fd); } else { finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); if ((fflag & FNONBLOCK) != 0) (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); td->td_retval[0] = fd; } fdrop(fp, td); return (error); } /* ARGSUSED */ int sys_bind(td, uap) struct thread *td; struct bind_args /* { int s; caddr_t name; int namelen; } */ *uap; { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_bindat(td, AT_FDCWD, uap->s, sa); free(sa, M_SONAME); } return (error); } int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) { struct socket *so; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); AUDIT_ARG_SOCKADDR(td, dirfd, sa); error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND), &fp, NULL); if (error != 0) return (error); so = fp->f_data; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(sa); #endif #ifdef MAC error = mac_socket_check_bind(td->td_ucred, so, sa); if (error == 0) { #endif if (dirfd == AT_FDCWD) error = sobind(so, sa, td); else error = sobindat(dirfd, so, sa, td); #ifdef MAC } #endif fdrop(fp, td); return (error); } /* ARGSUSED */ int sys_bindat(td, uap) struct thread *td; struct bindat_args /* { int fd; int s; caddr_t name; int namelen; } */ *uap; { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_bindat(td, uap->fd, uap->s, sa); free(sa, M_SONAME); } return (error); } /* ARGSUSED */ int sys_listen(td, uap) struct thread *td; struct listen_args /* { int s; int backlog; } */ *uap; { struct socket *so; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->s); error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_LISTEN), &fp, NULL); if (error == 0) { so = fp->f_data; #ifdef MAC error = mac_socket_check_listen(td->td_ucred, so); if (error == 0) #endif error = solisten(so, uap->backlog, td); fdrop(fp, td); } return(error); } /* * accept1() */ static int accept1(td, s, uname, anamelen, flags) struct thread *td; int s; struct sockaddr *uname; socklen_t *anamelen; int flags; { struct sockaddr *name; socklen_t namelen; struct file *fp; int error; if (uname == NULL) return (kern_accept4(td, s, NULL, NULL, flags, NULL)); error = copyin(anamelen, &namelen, sizeof (namelen)); if (error != 0) return (error); error = kern_accept4(td, s, &name, &namelen, flags, &fp); if (error != 0) return (error); if (error == 0 && uname != NULL) { #ifdef COMPAT_OLDSOCK if (flags & ACCEPT4_COMPAT) ((struct osockaddr *)name)->sa_family = name->sa_family; #endif error = copyout(name, uname, namelen); } if (error == 0) error = copyout(&namelen, anamelen, sizeof(namelen)); if (error != 0) fdclose(td, fp, td->td_retval[0]); fdrop(fp, td); free(name, M_SONAME); return (error); } int kern_accept(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, struct file **fp) { return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp)); } int kern_accept4(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, int flags, struct file **fp) { struct file *headfp, *nfp = NULL; struct sockaddr *sa = NULL; struct socket *head, *so; cap_rights_t rights; u_int fflag; pid_t pgid; int error, fd, tmp; if (name != NULL) *name = NULL; AUDIT_ARG_FD(s); error = getsock_cap(td, s, cap_rights_init(&rights, CAP_ACCEPT), &headfp, &fflag); if (error != 0) return (error); head = headfp->f_data; if ((head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; goto done; } #ifdef MAC error = mac_socket_check_accept(td->td_ucred, head); if (error != 0) goto done; #endif error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0); if (error != 0) goto done; ACCEPT_LOCK(); if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { ACCEPT_UNLOCK(); error = EWOULDBLOCK; goto noconnection; } while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { head->so_error = ECONNABORTED; break; } error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, "accept", 0); if (error != 0) { ACCEPT_UNLOCK(); goto noconnection; } } if (head->so_error) { error = head->so_error; head->so_error = 0; ACCEPT_UNLOCK(); goto noconnection; } so = TAILQ_FIRST(&head->so_comp); KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); /* * Before changing the flags on the socket, we have to bump the * reference count. Otherwise, if the protocol calls sofree(), * the socket will be released due to a zero refcount. */ SOCK_LOCK(so); /* soref() and so_state update */ soref(so); /* file descriptor reference */ TAILQ_REMOVE(&head->so_comp, so, so_list); head->so_qlen--; if (flags & ACCEPT4_INHERIT) so->so_state |= (head->so_state & SS_NBIO); else so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; so->so_qstate &= ~SQ_COMP; so->so_head = NULL; SOCK_UNLOCK(so); ACCEPT_UNLOCK(); /* An extra reference on `nfp' has been held for us by falloc(). */ td->td_retval[0] = fd; /* connection has been removed from the listen queue */ KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); if (flags & ACCEPT4_INHERIT) { pgid = fgetown(&head->so_sigio); if (pgid != 0) fsetown(pgid, &so->so_sigio); } else { fflag &= ~(FNONBLOCK | FASYNC); if (flags & SOCK_NONBLOCK) fflag |= FNONBLOCK; } finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); /* Sync socket nonblocking/async state with file flags */ tmp = fflag & FNONBLOCK; (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); tmp = fflag & FASYNC; (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); sa = 0; error = soaccept(so, &sa); if (error != 0) goto noconnection; if (sa == NULL) { if (name) *namelen = 0; goto done; } AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); if (name) { /* check sa_len before it is destroyed */ if (*namelen > sa->sa_len) *namelen = sa->sa_len; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(sa); #endif *name = sa; sa = NULL; } noconnection: free(sa, M_SONAME); /* * close the new descriptor, assuming someone hasn't ripped it * out from under us. */ if (error != 0) fdclose(td, nfp, fd); /* * Release explicitly held references before returning. We return * a reference on nfp to the caller on success if they request it. */ done: if (fp != NULL) { if (error == 0) { *fp = nfp; nfp = NULL; } else *fp = NULL; } if (nfp != NULL) fdrop(nfp, td); fdrop(headfp, td); return (error); } int sys_accept(td, uap) struct thread *td; struct accept_args *uap; { return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT)); } int sys_accept4(td, uap) struct thread *td; struct accept4_args *uap; { if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return (EINVAL); return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); } #ifdef COMPAT_OLDSOCK int oaccept(td, uap) struct thread *td; struct accept_args *uap; { return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT | ACCEPT4_COMPAT)); } #endif /* COMPAT_OLDSOCK */ /* ARGSUSED */ int sys_connect(td, uap) struct thread *td; struct connect_args /* { int s; caddr_t name; int namelen; } */ *uap; { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_connectat(td, AT_FDCWD, uap->s, sa); free(sa, M_SONAME); } return (error); } int kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) { struct socket *so; struct file *fp; cap_rights_t rights; int error, interrupted = 0; AUDIT_ARG_FD(fd); AUDIT_ARG_SOCKADDR(td, dirfd, sa); error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_CONNECT), &fp, NULL); if (error != 0) return (error); so = fp->f_data; if (so->so_state & SS_ISCONNECTING) { error = EALREADY; goto done1; } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(sa); #endif #ifdef MAC error = mac_socket_check_connect(td->td_ucred, so, sa); if (error != 0) goto bad; #endif if (dirfd == AT_FDCWD) error = soconnect(so, sa, td); else error = soconnectat(dirfd, so, sa, td); if (error != 0) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { error = EINPROGRESS; goto done1; } SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, "connec", 0); if (error != 0) { if (error == EINTR || error == ERESTART) interrupted = 1; break; } } if (error == 0) { error = so->so_error; so->so_error = 0; } SOCK_UNLOCK(so); bad: if (!interrupted) so->so_state &= ~SS_ISCONNECTING; if (error == ERESTART) error = EINTR; done1: fdrop(fp, td); return (error); } /* ARGSUSED */ int sys_connectat(td, uap) struct thread *td; struct connectat_args /* { int fd; int s; caddr_t name; int namelen; } */ *uap; { struct sockaddr *sa; int error; error = getsockaddr(&sa, uap->name, uap->namelen); if (error == 0) { error = kern_connectat(td, uap->fd, uap->s, sa); free(sa, M_SONAME); } return (error); } int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv) { struct file *fp1, *fp2; struct socket *so1, *so2; int fd, error, oflag, fflag; AUDIT_ARG_SOCKET(domain, type, protocol); oflag = 0; fflag = 0; if ((type & SOCK_CLOEXEC) != 0) { type &= ~SOCK_CLOEXEC; oflag |= O_CLOEXEC; } if ((type & SOCK_NONBLOCK) != 0) { type &= ~SOCK_NONBLOCK; fflag |= FNONBLOCK; } #ifdef MAC /* We might want to have a separate check for socket pairs. */ error = mac_socket_check_create(td->td_ucred, domain, type, protocol); if (error != 0) return (error); #endif error = socreate(domain, &so1, type, protocol, td->td_ucred, td); if (error != 0) return (error); error = socreate(domain, &so2, type, protocol, td->td_ucred, td); if (error != 0) goto free1; /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ error = falloc(td, &fp1, &fd, oflag); if (error != 0) goto free2; rsv[0] = fd; fp1->f_data = so1; /* so1 already has ref count */ error = falloc(td, &fp2, &fd, oflag); if (error != 0) goto free3; fp2->f_data = so2; /* so2 already has ref count */ rsv[1] = fd; error = soconnect2(so1, so2); if (error != 0) goto free4; if (type == SOCK_DGRAM) { /* * Datagram socket connection is asymmetric. */ error = soconnect2(so2, so1); if (error != 0) goto free4; } finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, &socketops); finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, &socketops); if ((fflag & FNONBLOCK) != 0) { (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); } fdrop(fp1, td); fdrop(fp2, td); return (0); free4: fdclose(td, fp2, rsv[1]); fdrop(fp2, td); free3: fdclose(td, fp1, rsv[0]); fdrop(fp1, td); free2: if (so2 != NULL) (void)soclose(so2); free1: if (so1 != NULL) (void)soclose(so1); return (error); } int sys_socketpair(struct thread *td, struct socketpair_args *uap) { int error, sv[2]; error = kern_socketpair(td, uap->domain, uap->type, uap->protocol, sv); if (error != 0) return (error); error = copyout(sv, uap->rsv, 2 * sizeof(int)); if (error != 0) { (void)kern_close(td, sv[0]); (void)kern_close(td, sv[1]); } return (error); } static int sendit(td, s, mp, flags) struct thread *td; int s; struct msghdr *mp; int flags; { struct mbuf *control; struct sockaddr *to; int error; #ifdef CAPABILITY_MODE if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) return (ECAPMODE); #endif if (mp->msg_name != NULL) { error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); if (error != 0) { to = NULL; goto bad; } mp->msg_name = to; } else { to = NULL; } if (mp->msg_control) { if (mp->msg_controllen < sizeof(struct cmsghdr) #ifdef COMPAT_OLDSOCK && mp->msg_flags != MSG_COMPAT #endif ) { error = EINVAL; goto bad; } error = sockargs(&control, mp->msg_control, mp->msg_controllen, MT_CONTROL); if (error != 0) goto bad; #ifdef COMPAT_OLDSOCK if (mp->msg_flags == MSG_COMPAT) { struct cmsghdr *cm; M_PREPEND(control, sizeof(*cm), M_WAITOK); cm = mtod(control, struct cmsghdr *); cm->cmsg_len = control->m_len; cm->cmsg_level = SOL_SOCKET; cm->cmsg_type = SCM_RIGHTS; } #endif } else { control = NULL; } error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); bad: free(to, M_SONAME); return (error); } int kern_sendit(td, s, mp, flags, control, segflg) struct thread *td; int s; struct msghdr *mp; int flags; struct mbuf *control; enum uio_seg segflg; { struct file *fp; struct uio auio; struct iovec *iov; struct socket *so; cap_rights_t rights; #ifdef KTRACE struct uio *ktruio = NULL; #endif ssize_t len; int i, error; AUDIT_ARG_FD(s); cap_rights_init(&rights, CAP_SEND); if (mp->msg_name != NULL) { AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); cap_rights_set(&rights, CAP_CONNECT); } error = getsock_cap(td, s, &rights, &fp, NULL); if (error != 0) return (error); so = (struct socket *)fp->f_data; #ifdef KTRACE if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(mp->msg_name); #endif #ifdef MAC if (mp->msg_name != NULL) { error = mac_socket_check_connect(td->td_ucred, so, mp->msg_name); if (error != 0) goto bad; } error = mac_socket_check_send(td->td_ucred, so); if (error != 0) goto bad; #endif auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_segflg = segflg; auio.uio_rw = UIO_WRITE; auio.uio_td = td; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { if ((auio.uio_resid += iov->iov_len) < 0) { error = EINVAL; goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) ktruio = cloneuio(&auio); #endif len = auio.uio_resid; error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); if (error != 0) { if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; /* Generation of SIGPIPE can be controlled per socket */ if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && !(flags & MSG_NOSIGNAL)) { PROC_LOCK(td->td_proc); tdsignal(td, SIGPIPE); PROC_UNLOCK(td->td_proc); } } if (error == 0) td->td_retval[0] = len - auio.uio_resid; #ifdef KTRACE if (ktruio != NULL) { ktruio->uio_resid = td->td_retval[0]; ktrgenio(s, UIO_WRITE, ktruio, error); } #endif bad: fdrop(fp, td); return (error); } int sys_sendto(td, uap) struct thread *td; struct sendto_args /* { int s; caddr_t buf; size_t len; int flags; caddr_t to; int tolen; } */ *uap; { struct msghdr msg; struct iovec aiov; msg.msg_name = uap->to; msg.msg_namelen = uap->tolen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; msg.msg_control = 0; #ifdef COMPAT_OLDSOCK msg.msg_flags = 0; #endif aiov.iov_base = uap->buf; aiov.iov_len = uap->len; return (sendit(td, uap->s, &msg, uap->flags)); } #ifdef COMPAT_OLDSOCK int osend(td, uap) struct thread *td; struct osend_args /* { int s; caddr_t buf; int len; int flags; } */ *uap; { struct msghdr msg; struct iovec aiov; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = uap->buf; aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = 0; return (sendit(td, uap->s, &msg, uap->flags)); } int osendmsg(td, uap) struct thread *td; struct osendmsg_args /* { int s; caddr_t msg; int flags; } */ *uap; { struct msghdr msg; struct iovec *iov; int error; error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_iov = iov; msg.msg_flags = MSG_COMPAT; error = sendit(td, uap->s, &msg, uap->flags); free(iov, M_IOV); return (error); } #endif int sys_sendmsg(td, uap) struct thread *td; struct sendmsg_args /* { int s; caddr_t msg; int flags; } */ *uap; { struct msghdr msg; struct iovec *iov; int error; error = copyin(uap->msg, &msg, sizeof (msg)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_iov = iov; #ifdef COMPAT_OLDSOCK msg.msg_flags = 0; #endif error = sendit(td, uap->s, &msg, uap->flags); free(iov, M_IOV); return (error); } int kern_recvit(td, s, mp, fromseg, controlp) struct thread *td; int s; struct msghdr *mp; enum uio_seg fromseg; struct mbuf **controlp; { struct uio auio; struct iovec *iov; struct mbuf *m, *control = NULL; caddr_t ctlbuf; struct file *fp; struct socket *so; struct sockaddr *fromsa = NULL; cap_rights_t rights; #ifdef KTRACE struct uio *ktruio = NULL; #endif ssize_t len; int error, i; if (controlp != NULL) *controlp = NULL; AUDIT_ARG_FD(s); error = getsock_cap(td, s, cap_rights_init(&rights, CAP_RECV), &fp, NULL); if (error != 0) return (error); so = fp->f_data; #ifdef MAC error = mac_socket_check_receive(td->td_ucred, so); if (error != 0) { fdrop(fp, td); return (error); } #endif auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_segflg = UIO_USERSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { if ((auio.uio_resid += iov->iov_len) < 0) { fdrop(fp, td); return (EINVAL); } } #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) ktruio = cloneuio(&auio); #endif len = auio.uio_resid; error = soreceive(so, &fromsa, &auio, NULL, (mp->msg_control || controlp) ? &control : NULL, &mp->msg_flags); if (error != 0) { if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } if (fromsa != NULL) AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); #ifdef KTRACE if (ktruio != NULL) { ktruio->uio_resid = len - auio.uio_resid; ktrgenio(s, UIO_READ, ktruio, error); } #endif if (error != 0) goto out; td->td_retval[0] = len - auio.uio_resid; if (mp->msg_name) { len = mp->msg_namelen; if (len <= 0 || fromsa == NULL) len = 0; else { /* save sa_len before it is destroyed by MSG_COMPAT */ len = MIN(len, fromsa->sa_len); #ifdef COMPAT_OLDSOCK if (mp->msg_flags & MSG_COMPAT) ((struct osockaddr *)fromsa)->sa_family = fromsa->sa_family; #endif if (fromseg == UIO_USERSPACE) { error = copyout(fromsa, mp->msg_name, (unsigned)len); if (error != 0) goto out; } else bcopy(fromsa, mp->msg_name, len); } mp->msg_namelen = len; } if (mp->msg_control && controlp == NULL) { #ifdef COMPAT_OLDSOCK /* * We assume that old recvmsg calls won't receive access * rights and other control info, esp. as control info * is always optional and those options didn't exist in 4.3. * If we receive rights, trim the cmsghdr; anything else * is tossed. */ if (control && mp->msg_flags & MSG_COMPAT) { if (mtod(control, struct cmsghdr *)->cmsg_level != SOL_SOCKET || mtod(control, struct cmsghdr *)->cmsg_type != SCM_RIGHTS) { mp->msg_controllen = 0; goto out; } control->m_len -= sizeof (struct cmsghdr); control->m_data += sizeof (struct cmsghdr); } #endif len = mp->msg_controllen; m = control; mp->msg_controllen = 0; ctlbuf = mp->msg_control; while (m && len > 0) { unsigned int tocopy; if (len >= m->m_len) tocopy = m->m_len; else { mp->msg_flags |= MSG_CTRUNC; tocopy = len; } if ((error = copyout(mtod(m, caddr_t), ctlbuf, tocopy)) != 0) goto out; ctlbuf += tocopy; len -= tocopy; m = m->m_next; } mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; } out: fdrop(fp, td); #ifdef KTRACE if (fromsa && KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(fromsa); #endif free(fromsa, M_SONAME); if (error == 0 && controlp != NULL) *controlp = control; else if (control) m_freem(control); return (error); } static int recvit(td, s, mp, namelenp) struct thread *td; int s; struct msghdr *mp; void *namelenp; { int error; error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); if (error != 0) return (error); if (namelenp != NULL) { error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); #ifdef COMPAT_OLDSOCK if (mp->msg_flags & MSG_COMPAT) error = 0; /* old recvfrom didn't check */ #endif } return (error); } int sys_recvfrom(td, uap) struct thread *td; struct recvfrom_args /* { int s; caddr_t buf; size_t len; int flags; struct sockaddr * __restrict from; socklen_t * __restrict fromlenaddr; } */ *uap; { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(uap->fromlenaddr, &msg.msg_namelen, sizeof (msg.msg_namelen)); if (error != 0) goto done2; } else { msg.msg_namelen = 0; } msg.msg_name = uap->from; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = uap->buf; aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = uap->flags; error = recvit(td, uap->s, &msg, uap->fromlenaddr); done2: return (error); } #ifdef COMPAT_OLDSOCK int orecvfrom(td, uap) struct thread *td; struct recvfrom_args *uap; { uap->flags |= MSG_COMPAT; return (sys_recvfrom(td, uap)); } #endif #ifdef COMPAT_OLDSOCK int orecv(td, uap) struct thread *td; struct orecv_args /* { int s; caddr_t buf; int len; int flags; } */ *uap; { struct msghdr msg; struct iovec aiov; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = uap->buf; aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = uap->flags; return (recvit(td, uap->s, &msg, NULL)); } /* * Old recvmsg. This code takes advantage of the fact that the old msghdr * overlays the new one, missing only the flags, and with the (old) access * rights where the control fields are now. */ int orecvmsg(td, uap) struct thread *td; struct orecvmsg_args /* { int s; struct omsghdr *msg; int flags; } */ *uap; { struct msghdr msg; struct iovec *iov; int error; error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_flags = uap->flags | MSG_COMPAT; msg.msg_iov = iov; error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); if (msg.msg_controllen && error == 0) error = copyout(&msg.msg_controllen, &uap->msg->msg_accrightslen, sizeof (int)); free(iov, M_IOV); return (error); } #endif int sys_recvmsg(td, uap) struct thread *td; struct recvmsg_args /* { int s; struct msghdr *msg; int flags; } */ *uap; { struct msghdr msg; struct iovec *uiov, *iov; int error; error = copyin(uap->msg, &msg, sizeof (msg)); if (error != 0) return (error); error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error != 0) return (error); msg.msg_flags = uap->flags; #ifdef COMPAT_OLDSOCK msg.msg_flags &= ~MSG_COMPAT; #endif uiov = msg.msg_iov; msg.msg_iov = iov; error = recvit(td, uap->s, &msg, NULL); if (error == 0) { msg.msg_iov = uiov; error = copyout(&msg, uap->msg, sizeof(msg)); } free(iov, M_IOV); return (error); } /* ARGSUSED */ int sys_shutdown(td, uap) struct thread *td; struct shutdown_args /* { int s; int how; } */ *uap; { struct socket *so; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->s); error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL); if (error == 0) { so = fp->f_data; error = soshutdown(so, uap->how); /* * Previous versions did not return ENOTCONN, but 0 in * case the socket was not connected. Some important * programs like syslogd up to r279016, 2015-02-19, * still depend on this behavior. */ if (error == ENOTCONN && td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN) error = 0; fdrop(fp, td); } return (error); } /* ARGSUSED */ int sys_setsockopt(td, uap) struct thread *td; struct setsockopt_args /* { int s; int level; int name; caddr_t val; int valsize; } */ *uap; { return (kern_setsockopt(td, uap->s, uap->level, uap->name, uap->val, UIO_USERSPACE, uap->valsize)); } int kern_setsockopt(td, s, level, name, val, valseg, valsize) struct thread *td; int s; int level; int name; void *val; enum uio_seg valseg; socklen_t valsize; { struct socket *so; struct file *fp; struct sockopt sopt; cap_rights_t rights; int error; if (val == NULL && valsize != 0) return (EFAULT); if ((int)valsize < 0) return (EINVAL); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = level; sopt.sopt_name = name; sopt.sopt_val = val; sopt.sopt_valsize = valsize; switch (valseg) { case UIO_USERSPACE: sopt.sopt_td = td; break; case UIO_SYSSPACE: sopt.sopt_td = NULL; break; default: panic("kern_setsockopt called with bad valseg"); } AUDIT_ARG_FD(s); error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SETSOCKOPT), &fp, NULL); if (error == 0) { so = fp->f_data; error = sosetopt(so, &sopt); fdrop(fp, td); } return(error); } /* ARGSUSED */ int sys_getsockopt(td, uap) struct thread *td; struct getsockopt_args /* { int s; int level; int name; void * __restrict val; socklen_t * __restrict avalsize; } */ *uap; { socklen_t valsize; int error; if (uap->val) { error = copyin(uap->avalsize, &valsize, sizeof (valsize)); if (error != 0) return (error); } error = kern_getsockopt(td, uap->s, uap->level, uap->name, uap->val, UIO_USERSPACE, &valsize); if (error == 0) error = copyout(&valsize, uap->avalsize, sizeof (valsize)); return (error); } /* * Kernel version of getsockopt. * optval can be a userland or userspace. optlen is always a kernel pointer. */ int kern_getsockopt(td, s, level, name, val, valseg, valsize) struct thread *td; int s; int level; int name; void *val; enum uio_seg valseg; socklen_t *valsize; { struct socket *so; struct file *fp; struct sockopt sopt; cap_rights_t rights; int error; if (val == NULL) *valsize = 0; if ((int)*valsize < 0) return (EINVAL); sopt.sopt_dir = SOPT_GET; sopt.sopt_level = level; sopt.sopt_name = name; sopt.sopt_val = val; sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ switch (valseg) { case UIO_USERSPACE: sopt.sopt_td = td; break; case UIO_SYSSPACE: sopt.sopt_td = NULL; break; default: panic("kern_getsockopt called with bad valseg"); } AUDIT_ARG_FD(s); error = getsock_cap(td, s, cap_rights_init(&rights, CAP_GETSOCKOPT), &fp, NULL); if (error == 0) { so = fp->f_data; error = sogetopt(so, &sopt); *valsize = sopt.sopt_valsize; fdrop(fp, td); } return (error); } /* * getsockname1() - Get socket name. */ /* ARGSUSED */ static int getsockname1(td, uap, compat) struct thread *td; struct getsockname_args /* { int fdes; struct sockaddr * __restrict asa; socklen_t * __restrict alen; } */ *uap; int compat; { struct sockaddr *sa; socklen_t len; int error; error = copyin(uap->alen, &len, sizeof(len)); if (error != 0) return (error); error = kern_getsockname(td, uap->fdes, &sa, &len); if (error != 0) return (error); if (len != 0) { #ifdef COMPAT_OLDSOCK if (compat) ((struct osockaddr *)sa)->sa_family = sa->sa_family; #endif error = copyout(sa, uap->asa, (u_int)len); } free(sa, M_SONAME); if (error == 0) error = copyout(&len, uap->alen, sizeof(len)); return (error); } int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen) { struct socket *so; struct file *fp; cap_rights_t rights; socklen_t len; int error; AUDIT_ARG_FD(fd); error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETSOCKNAME), &fp, NULL); if (error != 0) return (error); so = fp->f_data; *sa = NULL; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); CURVNET_RESTORE(); if (error != 0) goto bad; if (*sa == NULL) len = 0; else len = MIN(*alen, (*sa)->sa_len); *alen = len; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(*sa); #endif bad: fdrop(fp, td); if (error != 0 && *sa != NULL) { free(*sa, M_SONAME); *sa = NULL; } return (error); } int sys_getsockname(td, uap) struct thread *td; struct getsockname_args *uap; { return (getsockname1(td, uap, 0)); } #ifdef COMPAT_OLDSOCK int ogetsockname(td, uap) struct thread *td; struct getsockname_args *uap; { return (getsockname1(td, uap, 1)); } #endif /* COMPAT_OLDSOCK */ /* * getpeername1() - Get name of peer for connected socket. */ /* ARGSUSED */ static int getpeername1(td, uap, compat) struct thread *td; struct getpeername_args /* { int fdes; struct sockaddr * __restrict asa; socklen_t * __restrict alen; } */ *uap; int compat; { struct sockaddr *sa; socklen_t len; int error; error = copyin(uap->alen, &len, sizeof (len)); if (error != 0) return (error); error = kern_getpeername(td, uap->fdes, &sa, &len); if (error != 0) return (error); if (len != 0) { #ifdef COMPAT_OLDSOCK if (compat) ((struct osockaddr *)sa)->sa_family = sa->sa_family; #endif error = copyout(sa, uap->asa, (u_int)len); } free(sa, M_SONAME); if (error == 0) error = copyout(&len, uap->alen, sizeof(len)); return (error); } int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen) { struct socket *so; struct file *fp; cap_rights_t rights; socklen_t len; int error; AUDIT_ARG_FD(fd); error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETPEERNAME), &fp, NULL); if (error != 0) return (error); so = fp->f_data; if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { error = ENOTCONN; goto done; } *sa = NULL; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); CURVNET_RESTORE(); if (error != 0) goto bad; if (*sa == NULL) len = 0; else len = MIN(*alen, (*sa)->sa_len); *alen = len; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(*sa); #endif bad: if (error != 0 && *sa != NULL) { free(*sa, M_SONAME); *sa = NULL; } done: fdrop(fp, td); return (error); } int sys_getpeername(td, uap) struct thread *td; struct getpeername_args *uap; { return (getpeername1(td, uap, 0)); } #ifdef COMPAT_OLDSOCK int ogetpeername(td, uap) struct thread *td; struct ogetpeername_args *uap; { /* XXX uap should have type `getpeername_args *' to begin with. */ return (getpeername1(td, (struct getpeername_args *)uap, 1)); } #endif /* COMPAT_OLDSOCK */ int sockargs(mp, buf, buflen, type) struct mbuf **mp; caddr_t buf; int buflen, type; { struct sockaddr *sa; struct mbuf *m; int error; if (buflen > MLEN) { #ifdef COMPAT_OLDSOCK if (type == MT_SONAME && buflen <= 112) buflen = MLEN; /* unix domain compat. hack */ else #endif if (buflen > MCLBYTES) return (EINVAL); } m = m_get2(buflen, M_WAITOK, type, 0); m->m_len = buflen; error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); if (error != 0) (void) m_free(m); else { *mp = m; if (type == MT_SONAME) { sa = mtod(m, struct sockaddr *); #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN if (sa->sa_family == 0 && sa->sa_len < AF_MAX) sa->sa_family = sa->sa_len; #endif sa->sa_len = buflen; } } return (error); } int getsockaddr(namp, uaddr, len) struct sockaddr **namp; caddr_t uaddr; size_t len; { struct sockaddr *sa; int error; if (len > SOCK_MAXADDRLEN) return (ENAMETOOLONG); if (len < offsetof(struct sockaddr, sa_data[0])) return (EINVAL); sa = malloc(len, M_SONAME, M_WAITOK); error = copyin(uaddr, sa, len); if (error != 0) { free(sa, M_SONAME); } else { #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN if (sa->sa_family == 0 && sa->sa_len < AF_MAX) sa->sa_family = sa->sa_len; #endif sa->sa_len = len; *namp = sa; } return (error); } struct sendfile_sync { struct mtx mtx; struct cv cv; unsigned count; }; /* * Add more references to a vm_page + sf_buf + sendfile_sync. */ void sf_ext_ref(void *arg1, void *arg2) { struct sf_buf *sf = arg1; struct sendfile_sync *sfs = arg2; vm_page_t pg = sf_buf_page(sf); sf_buf_ref(sf); vm_page_lock(pg); vm_page_wire(pg); vm_page_unlock(pg); if (sfs != NULL) { mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); sfs->count++; mtx_unlock(&sfs->mtx); } } /* * Detach mapped page and release resources back to the system. */ void sf_ext_free(void *arg1, void *arg2) { struct sf_buf *sf = arg1; struct sendfile_sync *sfs = arg2; vm_page_t pg = sf_buf_page(sf); sf_buf_free(sf); vm_page_lock(pg); - vm_page_unwire(pg, PQ_INACTIVE); /* * Check for the object going away on us. This can * happen since we don't hold a reference to it. * If so, we're responsible for freeing the page. */ - if (pg->wire_count == 0 && pg->object == NULL) + if (vm_page_unwire(pg, PQ_INACTIVE) && pg->object == NULL) vm_page_free(pg); vm_page_unlock(pg); if (sfs != NULL) { mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); if (--sfs->count == 0) cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); } } /* + * Same as above, but forces the page to be detached from the object + * and go into free pool. + */ +void +sf_ext_free_nocache(void *arg1, void *arg2) +{ + struct sf_buf *sf = arg1; + struct sendfile_sync *sfs = arg2; + vm_page_t pg = sf_buf_page(sf); + + sf_buf_free(sf); + + vm_page_lock(pg); + if (vm_page_unwire(pg, PQ_NONE)) { + vm_object_t obj; + + /* Try to free the page, but only if it is cheap to. */ + if ((obj = pg->object) == NULL) + vm_page_free(pg); + else if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) { + vm_page_free(pg); + VM_OBJECT_WUNLOCK(obj); + } else + vm_page_deactivate(pg); + } + vm_page_unlock(pg); + + if (sfs != NULL) { + mtx_lock(&sfs->mtx); + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); + if (--sfs->count == 0) + cv_signal(&sfs->cv); + mtx_unlock(&sfs->mtx); + } +} + +/* * sendfile(2) * * int sendfile(int fd, int s, off_t offset, size_t nbytes, * struct sf_hdtr *hdtr, off_t *sbytes, int flags) * * Send a file specified by 'fd' and starting at 'offset' to a socket * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == * 0. Optionally add a header and/or trailer to the socket output. If * specified, write the total number of bytes sent into *sbytes. */ int sys_sendfile(struct thread *td, struct sendfile_args *uap) { return (do_sendfile(td, uap, 0)); } static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) { struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; off_t sbytes; int error; /* * File offset must be positive. If it goes beyond EOF * we send only the header/trailer and no payload data. */ if (uap->offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); if (error != 0) goto out; if (hdtr.headers != NULL) { error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); if (error != 0) goto out; } if (hdtr.trailers != NULL) { error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); if (error != 0) goto out; } } AUDIT_ARG_FD(uap->fd); /* * sendfile(2) can start at any offset within a file so we require * CAP_READ+CAP_SEEK = CAP_PREAD. */ if ((error = fget_read(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { goto out; } error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: free(hdr_uio, M_IOV); free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) { struct sendfile_args args; args.fd = uap->fd; args.s = uap->s; args.offset = uap->offset; args.nbytes = uap->nbytes; args.hdtr = uap->hdtr; args.sbytes = uap->sbytes; args.flags = uap->flags; return (do_sendfile(td, &args, 1)); } #endif /* COMPAT_FREEBSD4 */ + /* + * How much data to put into page i of n. + * Only first and last pages are special. + */ +static inline off_t +xfsize(int i, int n, off_t off, off_t len) +{ + + if (i == 0) + return (omin(PAGE_SIZE - (off & PAGE_MASK), len)); + + if (i == n - 1 && ((off + len) & PAGE_MASK) > 0) + return ((off + len) & PAGE_MASK); + + return (PAGE_SIZE); +} + +/* + * Offset within object for i page. + */ +static inline vm_offset_t +vmoff(int i, off_t off) +{ + + if (i == 0) + return ((vm_offset_t)off); + + return (trunc_page(off + i * PAGE_SIZE)); +} + +/* + * Pretend as if we don't have enough space, subtract xfsize() of + * all pages that failed. + */ +static inline void +fixspace(int old, int new, off_t off, int *space) +{ + + KASSERT(old > new, ("%s: old %d new %d", __func__, old, new)); + + /* Subtract last one. */ + *space -= xfsize(old - 1, old, off, *space); + old--; + + if (new == old) + /* There was only one page. */ + return; + + /* Subtract first one. */ + if (new == 0) { + *space -= xfsize(0, old, off, *space); + new++; + } + + /* Rest of pages are full sized. */ + *space -= (old - new) * PAGE_SIZE; + + KASSERT(*space >= 0, ("%s: space went backwards", __func__)); +} + +/* + * Structure describing a single sendfile(2) I/O, which may consist of + * several underlying pager I/Os. + * + * The syscall context allocates the structure and initializes 'nios' + * to 1. As sendfile_swapin() runs through pages and starts asynchronous + * paging operations, it increments 'nios'. + * + * Every I/O completion calls sf_iodone(), which decrements the 'nios', and + * the syscall also calls sf_iodone() after allocating all mbufs, linking them + * and sending to socket. Whoever reaches zero 'nios' is responsible to + * call pru_ready on the socket, to notify it of readyness of the data. + */ +struct sf_io { + volatile u_int nios; + u_int error; + int npages; + struct file *sock_fp; + struct mbuf *m; + vm_page_t pa[]; +}; + +static void +sf_iodone(void *arg, vm_page_t *pg, int count, int error) +{ + struct sf_io *sfio = arg; + struct socket *so; + + for (int i = 0; i < count; i++) + vm_page_xunbusy(pg[i]); + + if (error) + sfio->error = error; + + if (!refcount_release(&sfio->nios)) + return; + + so = sfio->sock_fp->f_data; + + if (sfio->error) { + struct mbuf *m; + + /* + * I/O operation failed. The state of data in the socket + * is now inconsistent, and all what we can do is to tear + * it down. Protocol abort method would tear down protocol + * state, free all ready mbufs and detach not ready ones. + * We will free the mbufs corresponding to this I/O manually. + * + * The socket would be marked with EIO and made available + * for read, so that application receives EIO on next + * syscall and eventually closes the socket. + */ + so->so_proto->pr_usrreqs->pru_abort(so); + so->so_error = EIO; + + m = sfio->m; + for (int i = 0; i < sfio->npages; i++) + m = m_free(m); + } else { + CURVNET_SET(so->so_vnet); + (void )(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m, + sfio->npages); + CURVNET_RESTORE(); + } + + /* XXXGL: curthread */ + fdrop(sfio->sock_fp, curthread); + free(sfio, M_TEMP); +} + +/* + * Iterate through pages vector and request paging for non-valid pages. + */ static int -sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, - off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res) +sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len, + int npages, int rhpages, int flags) { - vm_page_t m; - vm_pindex_t pindex; - ssize_t resid; - int error, readahead, rv; + vm_page_t *pa = sfio->pa; + int nios; - pindex = OFF_TO_IDX(off); - VM_OBJECT_WLOCK(obj); - m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY | - VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL); + nios = 0; + flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0; /* - * Check if page is valid for what we need, otherwise initiate I/O. - * - * The non-zero nd argument prevents disk I/O, instead we - * return the caller what he specified in nd. In particular, - * if we already turned some pages into mbufs, nd == EAGAIN - * and the main function send them the pages before we come - * here again and block. + * First grab all the pages and wire them. Note that we grab + * only required pages. Readahead pages are dealt with later. */ - if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) { - if (vp == NULL) - vm_page_xunbusy(m); - VM_OBJECT_WUNLOCK(obj); - *res = m; - return (0); - } else if (nd != 0) { - if (vp == NULL) - vm_page_xunbusy(m); - error = nd; - goto free_page; + VM_OBJECT_WLOCK(obj); + for (int i = 0; i < npages; i++) { + pa[i] = vm_page_grab(obj, OFF_TO_IDX(vmoff(i, off)), + VM_ALLOC_WIRED | VM_ALLOC_NORMAL | flags); + if (pa[i] == NULL) { + npages = i; + rhpages = 0; + break; + } } - /* - * Get the page from backing store. - */ - error = 0; - if (vp != NULL) { - VM_OBJECT_WUNLOCK(obj); - readahead = sfreadahead * MAXBSIZE; + for (int i = 0; i < npages;) { + int j, a, count, rv; + /* Skip valid pages. */ + if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK, + xfsize(i, npages, off, len))) { + vm_page_xunbusy(pa[i]); + SFSTAT_INC(sf_pages_valid); + i++; + continue; + } + /* - * Use vn_rdwr() instead of the pager interface for - * the vnode, to allow the read-ahead. - * - * XXXMAC: Because we don't have fp->f_cred here, we - * pass in NOCRED. This is probably wrong, but is - * consistent with our original implementation. + * Now 'i' points to first invalid page, iterate further + * to make 'j' point at first valid after a bunch of + * invalid ones. */ - error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off), - UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead / - bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); - SFSTAT_INC(sf_iocnt); - VM_OBJECT_WLOCK(obj); - } else { - if (vm_pager_has_page(obj, pindex, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL); - SFSTAT_INC(sf_iocnt); - if (rv != VM_PAGER_OK) { - vm_page_lock(m); - vm_page_free(m); - vm_page_unlock(m); - m = NULL; - error = EIO; + for (j = i + 1; j < npages; j++) + if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, + xfsize(j, npages, off, len))) { + SFSTAT_INC(sf_pages_valid); + break; } - } else { - pmap_zero_page(m); - m->valid = VM_PAGE_BITS_ALL; - m->dirty = 0; + + /* + * Now we got region of invalid pages between 'i' and 'j'. + * Check that they belong to pager. They may not be there, + * which is a regular situation for shmem pager. For vnode + * pager this happens only in case of sparse file. + * + * Important feature of vm_pager_has_page() is the hint + * stored in 'a', about how many pages we can pagein after + * this page in a single I/O. + */ + while (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), + NULL, &a) && i < j) { + pmap_zero_page(pa[i]); + pa[i]->valid = VM_PAGE_BITS_ALL; + pa[i]->dirty = 0; + vm_page_xunbusy(pa[i]); + i++; } - if (m != NULL) - vm_page_xunbusy(m); - } - if (error == 0) { - *res = m; - } else if (m != NULL) { -free_page: - vm_page_lock(m); - vm_page_unwire(m, PQ_INACTIVE); + if (i == j) + continue; /* - * See if anyone else might know about this page. If - * not and it is not valid, then free it. + * We want to pagein as many pages as possible, limited only + * by the 'a' hint and actual request. + * + * We should not pagein into already valid page, thus if + * 'j' didn't reach last page, trim by that page. + * + * When the pagein fulfils the request, also specify readahead. */ - if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m)) - vm_page_free(m); - vm_page_unlock(m); + if (j < npages) + a = min(a, j - i - 1); + count = min(a + 1, npages - i); + + refcount_acquire(&sfio->nios); + rv = vm_pager_get_pages_async(obj, pa + i, count, NULL, + i + count == npages ? &rhpages : NULL, + &sf_iodone, sfio); + KASSERT(rv == VM_PAGER_OK, ("%s: pager fail obj %p page %p", + __func__, obj, pa[i])); + + SFSTAT_INC(sf_iocnt); + SFSTAT_ADD(sf_pages_read, count); + if (i + count == npages) + SFSTAT_ADD(sf_rhpages_read, rhpages); + +#ifdef INVARIANTS + for (j = i; j < i + count && j < npages; j++) + KASSERT(pa[j] == vm_page_lookup(obj, + OFF_TO_IDX(vmoff(j, off))), + ("pa[j] %p lookup %p\n", pa[j], + vm_page_lookup(obj, OFF_TO_IDX(vmoff(j, off))))); +#endif + i += count; + nios++; } - KASSERT(error != 0 || (m->wire_count > 0 && - vm_page_is_valid(m, off & PAGE_MASK, xfsize)), - ("wrong page state m %p off %#jx xfsize %d", m, (uintmax_t)off, - xfsize)); + VM_OBJECT_WUNLOCK(obj); - return (error); + + if (nios == 0 && npages != 0) + SFSTAT_INC(sf_noiocnt); + + return (nios); } static int sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size, int *bsize) { struct vattr va; vm_object_t obj; struct vnode *vp; struct shmfd *shmfd; int error; vp = *vp_res = NULL; obj = NULL; shmfd = *shmfd_res = NULL; *bsize = 0; /* * The file descriptor must be a regular file and have a * backing VM object. */ if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type != VREG) { error = EINVAL; goto out; } *bsize = vp->v_mount->mnt_stat.f_iosize; error = VOP_GETATTR(vp, &va, td->td_ucred); if (error != 0) goto out; *obj_size = va.va_size; obj = vp->v_object; if (obj == NULL) { error = EINVAL; goto out; } } else if (fp->f_type == DTYPE_SHM) { error = 0; shmfd = fp->f_data; obj = shmfd->shm_object; *obj_size = shmfd->shm_size; } else { error = EINVAL; goto out; } VM_OBJECT_WLOCK(obj); if ((obj->flags & OBJ_DEAD) != 0) { VM_OBJECT_WUNLOCK(obj); error = EBADF; goto out; } /* * Temporarily increase the backing VM object's reference * count so that a forced reclamation of its vnode does not * immediately destroy it. */ vm_object_reference_locked(obj); VM_OBJECT_WUNLOCK(obj); *obj_res = obj; *vp_res = vp; *shmfd_res = shmfd; out: if (vp != NULL) VOP_UNLOCK(vp, 0); return (error); } static int kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp, struct socket **so) { cap_rights_t rights; int error; *sock_fp = NULL; *so = NULL; /* * The socket must be a stream socket and connected. */ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SEND), sock_fp, NULL); if (error != 0) return (error); *so = (*sock_fp)->f_data; if ((*so)->so_type != SOCK_STREAM) return (EINVAL); if (((*so)->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); return (0); } int vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, int kflags, struct thread *td) { struct file *sock_fp; struct vnode *vp; struct vm_object *obj; struct socket *so; - struct mbuf *m; + struct mbuf *m, *mh, *mhtail; struct sf_buf *sf; - struct vm_page *pg; struct shmfd *shmfd; struct sendfile_sync *sfs; struct vattr va; - off_t off, xfsize, fsbytes, sbytes, rem, obj_size; - int error, bsize, nd, hdrlen, mnw; + off_t off, sbytes, rem, obj_size; + int error, softerr, bsize, hdrlen; - pg = NULL; obj = NULL; so = NULL; - m = NULL; + m = mh = NULL; sfs = NULL; - fsbytes = sbytes = 0; - hdrlen = mnw = 0; - rem = nbytes; - obj_size = 0; + sbytes = 0; + softerr = 0; error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); if (error != 0) return (error); - if (rem == 0) - rem = obj_size; error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so); if (error != 0) goto out; - /* - * Do not wait on memory allocations but return ENOMEM for - * caller to retry later. - * XXX: Experimental. - */ - if (flags & SF_MNOWAIT) - mnw = 1; +#ifdef MAC + error = mac_socket_check_send(td->td_ucred, so); + if (error != 0) + goto out; +#endif + SFSTAT_INC(sf_syscalls); + SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags)); + if (flags & SF_SYNC) { sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); cv_init(&sfs->cv, "sendfile"); } -#ifdef MAC - error = mac_socket_check_send(td->td_ucred, so); - if (error != 0) - goto out; -#endif - /* If headers are specified copy them into mbufs. */ - if (hdr_uio != NULL) { + if (hdr_uio != NULL && hdr_uio->uio_resid > 0) { hdr_uio->uio_td = td; hdr_uio->uio_rw = UIO_WRITE; - if (hdr_uio->uio_resid > 0) { - /* - * In FBSD < 5.0 the nbytes to send also included - * the header. If compat is specified subtract the - * header size from nbytes. - */ - if (kflags & SFK_COMPAT) { - if (nbytes > hdr_uio->uio_resid) - nbytes -= hdr_uio->uio_resid; - else - nbytes = 0; - } - m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), - 0, 0, 0); - if (m == NULL) { - error = mnw ? EAGAIN : ENOBUFS; - goto out; - } - hdrlen = m_length(m, NULL); + /* + * In FBSD < 5.0 the nbytes to send also included + * the header. If compat is specified subtract the + * header size from nbytes. + */ + if (kflags & SFK_COMPAT) { + if (nbytes > hdr_uio->uio_resid) + nbytes -= hdr_uio->uio_resid; + else + nbytes = 0; } - } + mh = m_uiotombuf(hdr_uio, M_WAITOK, 0, 0, 0); + hdrlen = m_length(mh, &mhtail); + } else + hdrlen = 0; + rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset; + /* * Protect against multiple writers to the socket. * * XXXRW: Historically this has assumed non-interruptibility, so now * we implement that, but possibly shouldn't. */ (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); /* * Loop through the pages of the file, starting with the requested * offset. Get a file page (do I/O if necessary), map the file page * into an sf_buf, attach an mbuf header to the sf_buf, and queue * it on the socket. * This is done in two loops. The inner loop turns as many pages * as it can, up to available socket buffer space, without blocking * into mbufs to have it bulk delivered into the socket send buffer. * The outer loop checks the state and available space of the socket * and takes care of the overall progress. */ - for (off = offset; ; ) { + for (off = offset; rem > 0; ) { + struct sf_io *sfio; + vm_page_t *pa; struct mbuf *mtail; - int loopbytes; - int space; - int done; + int nios, space, npages, rhpages; - if ((nbytes != 0 && nbytes == fsbytes) || - (nbytes == 0 && obj_size == fsbytes)) - break; - mtail = NULL; - loopbytes = 0; - space = 0; - done = 0; - /* * Check the socket state for ongoing connection, * no errors and space in socket buffer. * If space is low allow for the remainder of the * file to be processed if it fits the socket buffer. * Otherwise block in waiting for sufficient space * to proceed, or if the socket is nonblocking, return * to userland with EAGAIN while reporting how far * we've come. * We wait until the socket buffer has significant free * space to do bulk sends. This makes good use of file * system read ahead and allows packet segmentation * offloading hardware to take over lots of work. If * we were not careful here we would send off only one * sfbuf at a time. */ SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; retry_space: if (so->so_snd.sb_state & SBS_CANTSENDMORE) { error = EPIPE; SOCKBUF_UNLOCK(&so->so_snd); goto done; } else if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto done; } space = sbspace(&so->so_snd); if (space < rem && (space <= 0 || space < so->so_snd.sb_lowat)) { if (so->so_state & SS_NBIO) { SOCKBUF_UNLOCK(&so->so_snd); error = EAGAIN; goto done; } /* * sbwait drops the lock while sleeping. * When we loop back to retry_space the * state may have changed and we retest * for it. */ error = sbwait(&so->so_snd); /* * An error from sbwait usually indicates that we've * been interrupted by a signal. If we've sent anything * then return bytes sent, otherwise return the error. */ if (error != 0) { SOCKBUF_UNLOCK(&so->so_snd); goto done; } goto retry_space; } SOCKBUF_UNLOCK(&so->so_snd); /* * Reduce space in the socket buffer by the size of * the header mbuf chain. * hdrlen is set to 0 after the first loop. */ space -= hdrlen; if (vp != NULL) { error = vn_lock(vp, LK_SHARED); if (error != 0) goto done; error = VOP_GETATTR(vp, &va, td->td_ucred); if (error != 0 || off >= va.va_size) { VOP_UNLOCK(vp, 0); goto done; } - obj_size = va.va_size; + if (va.va_size != obj_size) { + if (nbytes == 0) + rem += va.va_size - obj_size; + else if (offset + nbytes > va.va_size) + rem -= (offset + nbytes - va.va_size); + obj_size = va.va_size; + } } + if (space > rem) + space = rem; + + npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE); + /* + * Calculate maximum allowed number of pages for readahead + * at this iteration. First, we allow readahead up to "rem". + * If application wants more, let it be, but there is no + * reason to go above MAXPHYS. Also check against "obj_size", + * since vm_pager_has_page() can hint beyond EOF. + */ + rhpages = howmany(rem + (off & PAGE_MASK), PAGE_SIZE) - npages; + rhpages += SF_READAHEAD(flags); + rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages); + rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) - + npages, rhpages); + + sfio = malloc(sizeof(struct sf_io) + + npages * sizeof(vm_page_t), M_TEMP, M_WAITOK); + refcount_init(&sfio->nios, 1); + sfio->error = 0; + + nios = sendfile_swapin(obj, sfio, off, space, npages, rhpages, + flags); + + /* * Loop and construct maximum sized mbuf chain to be bulk * dumped into socket buffer. */ - while (space > loopbytes) { - vm_offset_t pgoff; + pa = sfio->pa; + for (int i = 0; i < npages; i++) { struct mbuf *m0; /* - * Calculate the amount to transfer. - * Not to exceed a page, the EOF, - * or the passed in nbytes. + * If a page wasn't grabbed successfully, then + * trim the array. Can happen only with SF_NODISKIO. */ - pgoff = (vm_offset_t)(off & PAGE_MASK); - rem = obj_size - offset; - if (nbytes != 0) - rem = omin(rem, nbytes); - rem -= fsbytes + loopbytes; - xfsize = omin(PAGE_SIZE - pgoff, rem); - xfsize = omin(space - loopbytes, xfsize); - if (xfsize <= 0) { - done = 1; /* all data sent */ + if (pa[i] == NULL) { + SFSTAT_INC(sf_busy); + fixspace(npages, i, off, &space); + npages = i; + softerr = EBUSY; break; } /* - * Attempt to look up the page. Allocate - * if not found or wait and loop if busy. - */ - if (m != NULL) - nd = EAGAIN; /* send what we already got */ - else if ((flags & SF_NODISKIO) != 0) - nd = EBUSY; - else - nd = 0; - error = sendfile_readpage(obj, vp, nd, off, - xfsize, bsize, td, &pg); - if (error != 0) { - if (error == EAGAIN) - error = 0; /* not a real error */ - break; - } - - /* * Get a sendfile buf. When allocating the * first buffer for mbuf chain, we usually * wait as long as necessary, but this wait * can be interrupted. For consequent * buffers, do not sleep, since several * threads might exhaust the buffers and then * deadlock. */ - sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : - SFB_CATCH); + sf = sf_buf_alloc(pa[i], + m != NULL ? SFB_NOWAIT : SFB_CATCH); if (sf == NULL) { SFSTAT_INC(sf_allocfail); - vm_page_lock(pg); - vm_page_unwire(pg, PQ_INACTIVE); - KASSERT(pg->object != NULL, - ("%s: object disappeared", __func__)); - vm_page_unlock(pg); + for (int j = i; j < npages; j++) { + vm_page_lock(pa[j]); + vm_page_unwire(pa[j], PQ_INACTIVE); + vm_page_unlock(pa[j]); + } if (m == NULL) - error = (mnw ? EAGAIN : EINTR); + softerr = ENOBUFS; + fixspace(npages, i, off, &space); + npages = i; break; } - /* - * Get an mbuf and set it up as having - * external storage. - */ - m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); - if (m0 == NULL) { - error = (mnw ? EAGAIN : ENOBUFS); - sf_ext_free(sf, NULL); - break; - } - /* - * Attach EXT_SFBUF external storage. - */ - m0->m_ext.ext_buf = (caddr_t )sf_buf_kva(sf); + m0 = m_get(M_WAITOK, MT_DATA); + m0->m_ext.ext_buf = (char *)sf_buf_kva(sf); m0->m_ext.ext_size = PAGE_SIZE; m0->m_ext.ext_arg1 = sf; m0->m_ext.ext_arg2 = sfs; - m0->m_ext.ext_type = EXT_SFBUF; + /* + * SF_NOCACHE sets the page as being freed upon send. + * However, we ignore it for the last page in 'space', + * if the page is truncated, and we got more data to + * send (rem > space), or if we have readahead + * configured (rhpages > 0). + */ + if ((flags & SF_NOCACHE) == 0 || + (i == npages - 1 && + ((off + space) & PAGE_MASK) && + (rem > space || rhpages > 0))) + m0->m_ext.ext_type = EXT_SFBUF; + else + m0->m_ext.ext_type = EXT_SFBUF_NOCACHE; m0->m_ext.ext_flags = 0; - m0->m_flags |= (M_EXT|M_RDONLY); - m0->m_data = (char *)sf_buf_kva(sf) + pgoff; - m0->m_len = xfsize; + m0->m_flags |= (M_EXT | M_RDONLY); + if (nios) + m0->m_flags |= M_NOTREADY; + m0->m_data = (char *)sf_buf_kva(sf) + + (vmoff(i, off) & PAGE_MASK); + m0->m_len = xfsize(i, npages, off, space); + if (i == 0) + sfio->m = m0; + /* Append to mbuf chain. */ if (mtail != NULL) mtail->m_next = m0; - else if (m != NULL) - m_last(m)->m_next = m0; else m = m0; mtail = m0; - /* Keep track of bits processed. */ - loopbytes += xfsize; - off += xfsize; - if (sfs != NULL) { mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); } } if (vp != NULL) VOP_UNLOCK(vp, 0); + /* Keep track of bytes processed. */ + off += space; + rem -= space; + + /* Prepend header, if any. */ + if (hdrlen) { + mhtail->m_next = m; + m = mh; + mh = NULL; + } + + if (m == NULL) { + KASSERT(softerr, ("%s: m NULL, no error", __func__)); + error = softerr; + free(sfio, M_TEMP); + goto done; + } + /* Add the buffer chain to the socket buffer. */ - if (m != NULL) { - int mlen, err; + KASSERT(m_length(m, NULL) == space + hdrlen, + ("%s: mlen %u space %d hdrlen %d", + __func__, m_length(m, NULL), space, hdrlen)); - mlen = m_length(m, NULL); - SOCKBUF_LOCK(&so->so_snd); - if (so->so_snd.sb_state & SBS_CANTSENDMORE) { - error = EPIPE; - SOCKBUF_UNLOCK(&so->so_snd); - goto done; - } - SOCKBUF_UNLOCK(&so->so_snd); - CURVNET_SET(so->so_vnet); - /* Avoid error aliasing. */ - err = (*so->so_proto->pr_usrreqs->pru_send) - (so, 0, m, NULL, NULL, td); - CURVNET_RESTORE(); - if (err == 0) { - /* - * We need two counters to get the - * file offset and nbytes to send - * right: - * - sbytes contains the total amount - * of bytes sent, including headers. - * - fsbytes contains the total amount - * of bytes sent from the file. - */ - sbytes += mlen; - fsbytes += mlen; - if (hdrlen) { - fsbytes -= hdrlen; - hdrlen = 0; - } - } else if (error == 0) - error = err; - m = NULL; /* pru_send always consumes */ + CURVNET_SET(so->so_vnet); + if (nios == 0) { + /* + * If sendfile_swapin() didn't initiate any I/Os, + * which happens if all data is cached in VM, then + * we can send data right now without the + * PRUS_NOTREADY flag. + */ + free(sfio, M_TEMP); + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, m, NULL, NULL, td); + } else { + sfio->sock_fp = sock_fp; + sfio->npages = npages; + fhold(sock_fp); + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, PRUS_NOTREADY, m, NULL, NULL, td); + sf_iodone(sfio, NULL, 0, 0); } + CURVNET_RESTORE(); - /* Quit outer loop on error or when we're done. */ - if (done) - break; - if (error != 0) + m = NULL; /* pru_send always consumes */ + if (error) goto done; + sbytes += space + hdrlen; + if (hdrlen) + hdrlen = 0; + if (softerr) { + error = softerr; + goto done; + } } /* * Send trailers. Wimp out and use writev(2). */ if (trl_uio != NULL) { sbunlock(&so->so_snd); error = kern_writev(td, sockfd, trl_uio); if (error == 0) sbytes += td->td_retval[0]; goto out; } done: sbunlock(&so->so_snd); out: /* * If there was no error we have to clear td->td_retval[0] * because it may have been set by writev. */ if (error == 0) { td->td_retval[0] = 0; } if (sent != NULL) { (*sent) = sbytes; } if (obj != NULL) vm_object_deallocate(obj); if (so) fdrop(sock_fp, td); if (m) m_freem(m); + if (mh) + m_freem(mh); if (sfs != NULL) { mtx_lock(&sfs->mtx); if (sfs->count != 0) cv_wait(&sfs->cv, &sfs->mtx); KASSERT(sfs->count == 0, ("sendfile sync still busy")); cv_destroy(&sfs->cv); mtx_destroy(&sfs->mtx); free(sfs, M_TEMP); } if (error == ERESTART) error = EINTR; return (error); } Index: projects/clang380-import/sys/kern/uipc_usrreq.c =================================================================== --- projects/clang380-import/sys/kern/uipc_usrreq.c (revision 293686) +++ projects/clang380-import/sys/kern/uipc_usrreq.c (revision 293687) @@ -1,2554 +1,2554 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. * Copyright (c) 2004-2009 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 */ /* * UNIX Domain (Local) Sockets * * This is an implementation of UNIX (local) domain sockets. Each socket has * an associated struct unpcb (UNIX protocol control block). Stream sockets * may be connected to 0 or 1 other socket. Datagram sockets may be * connected to 0, 1, or many other sockets. Sockets may be created and * connected in pairs (socketpair(2)), or bound/connected to using the file * system name space. For most purposes, only the receive socket buffer is * used, as sending on one socket delivers directly to the receive socket * buffer of a second socket. * * The implementation is substantially complicated by the fact that * "ancillary data", such as file descriptors or credentials, may be passed * across UNIX domain sockets. The potential for passing UNIX domain sockets * over other UNIX domain sockets requires the implementation of a simple * garbage collector to find and tear down cycles of disconnected sockets. * * TODO: * RDM * rethink name space problems * need a proper out-of-band */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include /* XXX must be before */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include MALLOC_DECLARE(M_FILECAPS); /* * Locking key: * (l) Locked using list lock * (g) Locked using linkage lock */ static uma_zone_t unp_zone; static unp_gen_t unp_gencnt; /* (l) */ static u_int unp_count; /* (l) Count of local sockets. */ static ino_t unp_ino; /* Prototype for fake inode numbers. */ static int unp_rights; /* (g) File descriptors in flight. */ static struct unp_head unp_shead; /* (l) List of stream sockets. */ static struct unp_head unp_dhead; /* (l) List of datagram sockets. */ static struct unp_head unp_sphead; /* (l) List of seqpacket sockets. */ struct unp_defer { SLIST_ENTRY(unp_defer) ud_link; struct file *ud_fp; }; static SLIST_HEAD(, unp_defer) unp_defers; static int unp_defers_count; static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; /* * Garbage collection of cyclic file descriptor/socket references occurs * asynchronously in a taskqueue context in order to avoid recursion and * reentrance in the UNIX domain socket, file descriptor, and socket layer * code. See unp_gc() for a full description. */ static struct timeout_task unp_gc_task; /* * The close of unix domain sockets attached as SCM_RIGHTS is * postponed to the taskqueue, to avoid arbitrary recursion depth. * The attached sockets might have another sockets attached. */ static struct task unp_defer_task; /* * Both send and receive buffers are allocated PIPSIZ bytes of buffering for * stream sockets, although the total for sender and receiver is actually * only PIPSIZ. * * Datagram sockets really use the sendspace as the maximum datagram size, * and don't really want to reserve the sendspace. Their recvspace should be * large enough for at least one max-size datagram plus address. */ #ifndef PIPSIZ #define PIPSIZ 8192 #endif static u_long unpst_sendspace = PIPSIZ; static u_long unpst_recvspace = PIPSIZ; static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ static u_long unpdg_recvspace = 4*1024; static u_long unpsp_sendspace = PIPSIZ; /* really max datagram size */ static u_long unpsp_recvspace = PIPSIZ; static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM"); static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM"); static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0, "SOCK_SEQPACKET"); SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, &unpst_sendspace, 0, "Default stream send space."); SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, &unpst_recvspace, 0, "Default stream receive space."); SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, &unpdg_sendspace, 0, "Default datagram send space."); SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, &unpdg_recvspace, 0, "Default datagram receive space."); SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW, &unpsp_sendspace, 0, "Default seqpacket send space."); SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW, &unpsp_recvspace, 0, "Default seqpacket receive space."); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "File descriptors in flight."); SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD, &unp_defers_count, 0, "File descriptors deferred to taskqueue for close."); /* * Locking and synchronization: * * Three types of locks exit in the local domain socket implementation: a * global list mutex, a global linkage rwlock, and per-unpcb mutexes. Of the * global locks, the list lock protects the socket count, global generation * number, and stream/datagram global lists. The linkage lock protects the * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be * held exclusively over the acquisition of multiple unpcb locks to prevent * deadlock. * * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, * allocated in pru_attach() and freed in pru_detach(). The validity of that * pointer is an invariant, so no lock is required to dereference the so_pcb * pointer if a valid socket reference is held by the caller. In practice, * this is always true during operations performed on a socket. Each unpcb * has a back-pointer to its socket, unp_socket, which will be stable under * the same circumstances. * * This pointer may only be safely dereferenced as long as a valid reference * to the unpcb is held. Typically, this reference will be from the socket, * or from another unpcb when the referring unpcb's lock is held (in order * that the reference not be invalidated during use). For example, to follow * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn, * as unp_socket remains valid as long as the reference to unp_conn is valid. * * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual * atomic reads without the lock may be performed "lockless", but more * complex reads and read-modify-writes require the mutex to be held. No * lock order is defined between unpcb locks -- multiple unpcb locks may be * acquired at the same time only when holding the linkage rwlock * exclusively, which prevents deadlocks. * * Blocking with UNIX domain sockets is a tricky issue: unlike most network * protocols, bind() is a non-atomic operation, and connect() requires * potential sleeping in the protocol, due to potentially waiting on local or * distributed file systems. We try to separate "lookup" operations, which * may sleep, and the IPC operations themselves, which typically can occur * with relative atomicity as locks can be held over the entire operation. * * Another tricky issue is simultaneous multi-threaded or multi-process * access to a single UNIX domain socket. These are handled by the flags * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or * binding, both of which involve dropping UNIX domain socket locks in order * to perform namei() and other file system operations. */ static struct rwlock unp_link_rwlock; static struct mtx unp_list_lock; static struct mtx unp_defers_lock; #define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \ "unp_link_rwlock") #define UNP_LINK_LOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_LOCKED) #define UNP_LINK_UNLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_UNLOCKED) #define UNP_LINK_RLOCK() rw_rlock(&unp_link_rwlock) #define UNP_LINK_RUNLOCK() rw_runlock(&unp_link_rwlock) #define UNP_LINK_WLOCK() rw_wlock(&unp_link_rwlock) #define UNP_LINK_WUNLOCK() rw_wunlock(&unp_link_rwlock) #define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_WLOCKED) #define UNP_LIST_LOCK_INIT() mtx_init(&unp_list_lock, \ "unp_list_lock", NULL, MTX_DEF) #define UNP_LIST_LOCK() mtx_lock(&unp_list_lock) #define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock) #define UNP_DEFERRED_LOCK_INIT() mtx_init(&unp_defers_lock, \ "unp_defer", NULL, MTX_DEF) #define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock) #define UNP_DEFERRED_UNLOCK() mtx_unlock(&unp_defers_lock) #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp_mtx", "unp_mtx", \ MTX_DUPOK|MTX_DEF|MTX_RECURSE) #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) static int uipc_connect2(struct socket *, struct socket *); static int uipc_ctloutput(struct socket *, struct sockopt *); static int unp_connect(struct socket *, struct sockaddr *, struct thread *); static int unp_connectat(int, struct socket *, struct sockaddr *, struct thread *); static int unp_connect2(struct socket *so, struct socket *so2, int); static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); static void unp_dispose(struct mbuf *); static void unp_dispose_so(struct socket *so); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int)); static void unp_discard(struct file *); static void unp_freerights(struct filedescent **, int); static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); static int unp_externalize(struct mbuf *, struct mbuf **, int); static int unp_externalize_fp(struct file *); static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); static void unp_process_defers(void * __unused, int); /* * Definitions of protocols supported in the LOCAL domain. */ static struct domain localdomain; static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream; static struct pr_usrreqs uipc_usrreqs_seqpacket; static struct protosw localsw[] = { { .pr_type = SOCK_STREAM, .pr_domain = &localdomain, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_stream }, { .pr_type = SOCK_DGRAM, .pr_domain = &localdomain, .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_dgram }, { .pr_type = SOCK_SEQPACKET, .pr_domain = &localdomain, /* * XXXRW: For now, PR_ADDR because soreceive will bump into them * due to our use of sbappendaddr. A new sbappend variants is needed * that supports both atomic record writes and control data. */ .pr_flags = PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD| PR_RIGHTS, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_seqpacket, }, }; static struct domain localdomain = { .dom_family = AF_LOCAL, .dom_name = "local", .dom_init = unp_init, .dom_externalize = unp_externalize, .dom_dispose = unp_dispose_so, .dom_protosw = localsw, .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])] }; DOMAIN_SET(local); static void uipc_abort(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_drop(unp2, ECONNABORTED); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); } static int uipc_accept(struct socket *so, struct sockaddr **nam) { struct unpcb *unp, *unp2; const struct sockaddr *sa; /* * Pass back name of connected socket, if it was bound and we are * still connected (our peer may have closed already!). */ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_LINK_RLOCK(); unp2 = unp->unp_conn; if (unp2 != NULL && unp2->unp_addr != NULL) { UNP_PCB_LOCK(unp2); sa = (struct sockaddr *) unp2->unp_addr; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp2); } else { sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); } UNP_LINK_RUNLOCK(); return (0); } static int uipc_attach(struct socket *so, int proto, struct thread *td) { u_long sendspace, recvspace; struct unpcb *unp; int error; KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { switch (so->so_type) { case SOCK_STREAM: sendspace = unpst_sendspace; recvspace = unpst_recvspace; break; case SOCK_DGRAM: sendspace = unpdg_sendspace; recvspace = unpdg_recvspace; break; case SOCK_SEQPACKET: sendspace = unpsp_sendspace; recvspace = unpsp_recvspace; break; default: panic("uipc_attach"); } error = soreserve(so, sendspace, recvspace); if (error) return (error); } unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO); if (unp == NULL) return (ENOBUFS); LIST_INIT(&unp->unp_refs); UNP_PCB_LOCK_INIT(unp); unp->unp_socket = so; so->so_pcb = unp; unp->unp_refcount = 1; UNP_LIST_LOCK(); unp->unp_gencnt = ++unp_gencnt; unp_count++; switch (so->so_type) { case SOCK_STREAM: LIST_INSERT_HEAD(&unp_shead, unp, unp_link); break; case SOCK_DGRAM: LIST_INSERT_HEAD(&unp_dhead, unp, unp_link); break; case SOCK_SEQPACKET: LIST_INSERT_HEAD(&unp_sphead, unp, unp_link); break; default: panic("uipc_attach"); } UNP_LIST_UNLOCK(); return (0); } static int uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; struct vattr vattr; int error, namelen; struct nameidata nd; struct unpcb *unp; struct vnode *vp; struct mount *mp; cap_rights_t rights; char *buf; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); if (soun->sun_len > sizeof(struct sockaddr_un)) return (EINVAL); namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); if (namelen <= 0) return (EINVAL); /* * We don't allow simultaneous bind() calls on a single UNIX domain * socket, so flag in-progress operations, and return an error if an * operation is already in progress. * * Historically, we have not allowed a socket to be rebound, so this * also returns an error. Not allowing re-binding simplifies the * implementation and avoids a great many possible failure modes. */ UNP_PCB_LOCK(unp); if (unp->unp_vnode != NULL) { UNP_PCB_UNLOCK(unp); return (EINVAL); } if (unp->unp_flags & UNP_BINDING) { UNP_PCB_UNLOCK(unp); return (EALREADY); } unp->unp_flags |= UNP_BINDING; UNP_PCB_UNLOCK(unp); buf = malloc(namelen + 1, M_TEMP, M_WAITOK); bcopy(soun->sun_path, buf, namelen); buf[namelen] = 0; restart: NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE, UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) goto error; vp = nd.ni_vp; if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp != NULL) { vrele(vp); error = EADDRINUSE; goto error; } error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error) goto error; goto restart; } VATTR_NULL(&vattr); vattr.va_type = VSOCK; vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (error == 0) error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (error) { vn_finished_write(mp); goto error; } vp = nd.ni_vp; ASSERT_VOP_ELOCKED(vp, "uipc_bind"); soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); VOP_UNP_BIND(vp, unp->unp_socket); unp->unp_vnode = vp; unp->unp_addr = soun; unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); VOP_UNLOCK(vp, 0); vn_finished_write(mp); free(buf, M_TEMP); return (0); error: UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); free(buf, M_TEMP); return (error); } static int uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return (uipc_bindat(AT_FDCWD, so, nam, td)); } static int uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int error; KASSERT(td == curthread, ("uipc_connect: td != curthread")); UNP_LINK_WLOCK(); error = unp_connect(so, nam, td); UNP_LINK_WUNLOCK(); return (error); } static int uipc_connectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; KASSERT(td == curthread, ("uipc_connectat: td != curthread")); UNP_LINK_WLOCK(); error = unp_connectat(fd, so, nam, td); UNP_LINK_WUNLOCK(); return (error); } static void uipc_close(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_close: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); } static int uipc_connect2(struct socket *so1, struct socket *so2) { struct unpcb *unp, *unp2; int error; UNP_LINK_WLOCK(); unp = so1->so_pcb; KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); UNP_PCB_LOCK(unp); unp2 = so2->so_pcb; KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); UNP_PCB_LOCK(unp2); error = unp_connect2(so1, so2, PRU_CONNECT2); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); return (error); } static void uipc_detach(struct socket *so) { struct unpcb *unp, *unp2; struct sockaddr_un *saved_unp_addr; struct vnode *vp; int freeunp, local_unp_rights; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); UNP_LINK_WLOCK(); UNP_LIST_LOCK(); UNP_PCB_LOCK(unp); LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; UNP_LIST_UNLOCK(); /* * XXXRW: Should assert vp->v_socket == so. */ if ((vp = unp->unp_vnode) != NULL) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } /* * We hold the linkage lock exclusively, so it's OK to acquire * multiple pcb locks at a time. */ while (!LIST_EMPTY(&unp->unp_refs)) { struct unpcb *ref = LIST_FIRST(&unp->unp_refs); UNP_PCB_LOCK(ref); unp_drop(ref, ECONNRESET); UNP_PCB_UNLOCK(ref); } local_unp_rights = unp_rights; UNP_LINK_WUNLOCK(); unp->unp_socket->so_pcb = NULL; saved_unp_addr = unp->unp_addr; unp->unp_addr = NULL; unp->unp_refcount--; freeunp = (unp->unp_refcount == 0); if (saved_unp_addr != NULL) free(saved_unp_addr, M_SONAME); if (freeunp) { UNP_PCB_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); } else UNP_PCB_UNLOCK(unp); if (vp) vrele(vp); if (local_unp_rights) taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); } static int uipc_disconnect(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); return (0); } static int uipc_listen(struct socket *so, int backlog, struct thread *td) { struct unpcb *unp; int error; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); UNP_PCB_LOCK(unp); if (unp->unp_vnode == NULL) { /* Already connected or not bound to an address. */ error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ; UNP_PCB_UNLOCK(unp); return (error); } SOCK_LOCK(so); error = solisten_proto_check(so); if (error == 0) { cru2x(td->td_ucred, &unp->unp_peercred); unp->unp_flags |= UNP_HAVEPCCACHED; solisten_proto(so, backlog); } SOCK_UNLOCK(so); UNP_PCB_UNLOCK(unp); return (error); } static int uipc_peeraddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp, *unp2; const struct sockaddr *sa; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_LINK_RLOCK(); /* * XXX: It seems that this test always fails even when connection is * established. So, this else clause is added as workaround to * return PF_LOCAL sockaddr. */ unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); if (unp2->unp_addr != NULL) sa = (struct sockaddr *) unp2->unp_addr; else sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp2); } else { sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); } UNP_LINK_RUNLOCK(); return (0); } static int uipc_rcvd(struct socket *so, int flags) { struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET, ("%s: socktype %d", __func__, so->so_type)); /* * Adjust backpressure on sender and wakeup any waiting to write. * * The unp lock is acquired to maintain the validity of the unp_conn * pointer; no lock on unp2 is required as unp2->unp_socket will be * static as long as we don't permit unp2 to disconnect from unp, * which is prevented by the lock on unp. We cache values from * so_rcv to avoid holding the so_rcv lock over the entire * transaction on the remote so_snd. */ SOCKBUF_LOCK(&so->so_rcv); mbcnt = so->so_rcv.sb_mbcnt; sbcc = sbavail(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); /* * There is a benign race condition at this point. If we're planning to * clear SB_STOP, but uipc_send is called on the connected socket at * this instant, it might add data to the sockbuf and set SB_STOP. Then * we would erroneously clear SB_STOP below, even though the sockbuf is * full. The race is benign because the only ill effect is to allow the * sockbuf to exceed its size limit, and the size limits are not * strictly guaranteed anyway. */ UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 == NULL) { UNP_PCB_UNLOCK(unp); return (0); } so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_snd); if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax) so2->so_snd.sb_flags &= ~SB_STOP; sowwakeup_locked(so2); UNP_PCB_UNLOCK(unp); return (0); } static int uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; int error = 0; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM || so->so_type == SOCK_SEQPACKET, ("%s: socktype %d", __func__, so->so_type)); if (flags & PRUS_OOB) { error = EOPNOTSUPP; goto release; } if (control != NULL && (error = unp_internalize(&control, td))) goto release; if ((nam != NULL) || (flags & PRUS_EOF)) UNP_LINK_WLOCK(); else UNP_LINK_RLOCK(); switch (so->so_type) { case SOCK_DGRAM: { const struct sockaddr *from; unp2 = unp->unp_conn; if (nam != NULL) { UNP_LINK_WLOCK_ASSERT(); if (unp2 != NULL) { error = EISCONN; break; } error = unp_connect(so, nam, td); if (error) break; unp2 = unp->unp_conn; } /* * Because connect() and send() are non-atomic in a sendto() * with a target address, it's possible that the socket will * have disconnected before the send() can run. In that case * return the slightly counter-intuitive but otherwise * correct error that the socket is not connected. */ if (unp2 == NULL) { error = ENOTCONN; break; } /* Lockless read. */ if (unp2->unp_flags & UNP_WANTCRED) control = unp_addsockcred(td, control); UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { sorwakeup_locked(so2); m = NULL; control = NULL; } else { SOCKBUF_UNLOCK(&so2->so_rcv); error = ENOBUFS; } if (nam != NULL) { UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); break; } case SOCK_SEQPACKET: case SOCK_STREAM: if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { UNP_LINK_WLOCK_ASSERT(); error = unp_connect(so, nam, td); if (error) break; /* XXX */ } else { error = ENOTCONN; break; } } /* Lockless read. */ if (so->so_snd.sb_state & SBS_CANTSENDMORE) { error = EPIPE; break; } /* * Because connect() and send() are non-atomic in a sendto() * with a target address, it's possible that the socket will * have disconnected before the send() can run. In that case * return the slightly counter-intuitive but otherwise * correct error that the socket is not connected. * * Locking here must be done carefully: the linkage lock * prevents interconnections between unpcbs from changing, so * we can traverse from unp to unp2 without acquiring unp's * lock. Socket buffer locks follow unpcb locks, so we can * acquire both remote and lock socket buffer locks. */ unp2 = unp->unp_conn; if (unp2 == NULL) { error = ENOTCONN; break; } so2 = unp2->unp_socket; UNP_PCB_LOCK(unp2); SOCKBUF_LOCK(&so2->so_rcv); if (unp2->unp_flags & UNP_WANTCRED) { /* * Credentials are passed only once on SOCK_STREAM * and SOCK_SEQPACKET. */ unp2->unp_flags &= ~UNP_WANTCRED; control = unp_addsockcred(td, control); } /* * Send to paired receive port, and then reduce send buffer * hiwater marks to maintain backpressure. Wake up readers. */ switch (so->so_type) { case SOCK_STREAM: if (control != NULL) { if (sbappendcontrol_locked(&so2->so_rcv, m, control)) control = NULL; } else - sbappend_locked(&so2->so_rcv, m); + sbappend_locked(&so2->so_rcv, m, flags); break; case SOCK_SEQPACKET: { const struct sockaddr *from; from = &sun_noname; /* * Don't check for space available in so2->so_rcv. * Unix domain sockets only check for space in the * sending sockbuf, and that check is performed one * level up the stack. */ if (sbappendaddr_nospacecheck_locked(&so2->so_rcv, from, m, control)) control = NULL; break; } } mbcnt = so2->so_rcv.sb_mbcnt; sbcc = sbavail(&so2->so_rcv); if (sbcc) sorwakeup_locked(so2); else SOCKBUF_UNLOCK(&so2->so_rcv); /* * The PCB lock on unp2 protects the SB_STOP flag. Without it, * it would be possible for uipc_rcvd to be called at this * point, drain the receiving sockbuf, clear SB_STOP, and then * we would set SB_STOP below. That could lead to an empty * sockbuf having SB_STOP set */ SOCKBUF_LOCK(&so->so_snd); if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax) so->so_snd.sb_flags |= SB_STOP; SOCKBUF_UNLOCK(&so->so_snd); UNP_PCB_UNLOCK(unp2); m = NULL; break; } /* * PRUS_EOF is equivalent to pru_send followed by pru_shutdown. */ if (flags & PRUS_EOF) { UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); } if ((nam != NULL) || (flags & PRUS_EOF)) UNP_LINK_WUNLOCK(); else UNP_LINK_RUNLOCK(); if (control != NULL && error != 0) unp_dispose(control); release: if (control != NULL) m_freem(control); if (m != NULL) m_freem(m); return (error); } static int uipc_ready(struct socket *so, struct mbuf *m, int count) { struct unpcb *unp, *unp2; struct socket *so2; int error; unp = sotounpcb(so); UNP_LINK_RLOCK(); unp2 = unp->unp_conn; UNP_PCB_LOCK(unp2); so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); if ((error = sbready(&so2->so_rcv, m, count)) == 0) sorwakeup_locked(so2); else SOCKBUF_UNLOCK(&so2->so_rcv); UNP_PCB_UNLOCK(unp2); UNP_LINK_RUNLOCK(); return (error); } static int uipc_sense(struct socket *so, struct stat *sb) { struct unpcb *unp; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); sb->st_blksize = so->so_snd.sb_hiwat; UNP_PCB_LOCK(unp); sb->st_dev = NODEV; if (unp->unp_ino == 0) unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; sb->st_ino = unp->unp_ino; UNP_PCB_UNLOCK(unp); return (0); } static int uipc_shutdown(struct socket *so) { struct unpcb *unp; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); return (0); } static int uipc_sockaddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp; const struct sockaddr *sa; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) sa = (struct sockaddr *) unp->unp_addr; else sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp); return (0); } static struct pr_usrreqs uipc_usrreqs_dgram = { .pru_abort = uipc_abort, .pru_accept = uipc_accept, .pru_attach = uipc_attach, .pru_bind = uipc_bind, .pru_bindat = uipc_bindat, .pru_connect = uipc_connect, .pru_connectat = uipc_connectat, .pru_connect2 = uipc_connect2, .pru_detach = uipc_detach, .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_sense = uipc_sense, .pru_shutdown = uipc_shutdown, .pru_sockaddr = uipc_sockaddr, .pru_soreceive = soreceive_dgram, .pru_close = uipc_close, }; static struct pr_usrreqs uipc_usrreqs_seqpacket = { .pru_abort = uipc_abort, .pru_accept = uipc_accept, .pru_attach = uipc_attach, .pru_bind = uipc_bind, .pru_bindat = uipc_bindat, .pru_connect = uipc_connect, .pru_connectat = uipc_connectat, .pru_connect2 = uipc_connect2, .pru_detach = uipc_detach, .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_sense = uipc_sense, .pru_shutdown = uipc_shutdown, .pru_sockaddr = uipc_sockaddr, .pru_soreceive = soreceive_generic, /* XXX: or...? */ .pru_close = uipc_close, }; static struct pr_usrreqs uipc_usrreqs_stream = { .pru_abort = uipc_abort, .pru_accept = uipc_accept, .pru_attach = uipc_attach, .pru_bind = uipc_bind, .pru_bindat = uipc_bindat, .pru_connect = uipc_connect, .pru_connectat = uipc_connectat, .pru_connect2 = uipc_connect2, .pru_detach = uipc_detach, .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_ready = uipc_ready, .pru_sense = uipc_sense, .pru_shutdown = uipc_shutdown, .pru_sockaddr = uipc_sockaddr, .pru_soreceive = soreceive_generic, .pru_close = uipc_close, }; static int uipc_ctloutput(struct socket *so, struct sockopt *sopt) { struct unpcb *unp; struct xucred xu; int error, optval; if (sopt->sopt_level != 0) return (EINVAL); unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case LOCAL_PEERCRED: UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_HAVEPC) xu = unp->unp_peercred; else { if (so->so_type == SOCK_STREAM) error = ENOTCONN; else error = EINVAL; } UNP_PCB_UNLOCK(unp); if (error == 0) error = sooptcopyout(sopt, &xu, sizeof(xu)); break; case LOCAL_CREDS: /* Unlocked read. */ optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; case LOCAL_CONNWAIT: /* Unlocked read. */ optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EOPNOTSUPP; break; } break; case SOPT_SET: switch (sopt->sopt_name) { case LOCAL_CREDS: case LOCAL_CONNWAIT: error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; #define OPTSET(bit) do { \ UNP_PCB_LOCK(unp); \ if (optval) \ unp->unp_flags |= bit; \ else \ unp->unp_flags &= ~bit; \ UNP_PCB_UNLOCK(unp); \ } while (0) switch (sopt->sopt_name) { case LOCAL_CREDS: OPTSET(UNP_WANTCRED); break; case LOCAL_CONNWAIT: OPTSET(UNP_CONNWAIT); break; default: break; } break; #undef OPTSET default: error = ENOPROTOOPT; break; } break; default: error = EOPNOTSUPP; break; } return (error); } static int unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (unp_connectat(AT_FDCWD, so, nam, td)); } static int unp_connectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; struct vnode *vp; struct socket *so2, *so3; struct unpcb *unp, *unp2, *unp3; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; cap_rights_t rights; int error, len; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); UNP_LINK_WLOCK_ASSERT(); unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); if (nam->sa_len > sizeof(struct sockaddr_un)) return (EINVAL); len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return (EINVAL); bcopy(soun->sun_path, buf, len); buf[len] = 0; UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_CONNECTING) { UNP_PCB_UNLOCK(unp); return (EALREADY); } UNP_LINK_WUNLOCK(); unp->unp_flags |= UNP_CONNECTING; UNP_PCB_UNLOCK(unp); sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td); error = namei(&nd); if (error) vp = NULL; else vp = nd.ni_vp; ASSERT_VOP_LOCKED(vp, "unp_connect"); NDFREE(&nd, NDF_ONLY_PNBUF); if (error) goto bad; if (vp->v_type != VSOCK) { error = ENOTSOCK; goto bad; } #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD); if (error) goto bad; #endif error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); if (error) goto bad; unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); /* * Lock linkage lock for two reasons: make sure v_socket is stable, * and to protect simultaneous locking of multiple pcbs. */ UNP_LINK_WLOCK(); VOP_UNP_CONNECT(vp, &so2); if (so2 == NULL) { error = ECONNREFUSED; goto bad2; } if (so->so_type != so2->so_type) { error = EPROTOTYPE; goto bad2; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if (so2->so_options & SO_ACCEPTCONN) { CURVNET_SET(so2->so_vnet); so3 = sonewconn(so2, 0); CURVNET_RESTORE(); } else so3 = NULL; if (so3 == NULL) { error = ECONNREFUSED; goto bad2; } unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); UNP_PCB_LOCK(unp); UNP_PCB_LOCK(unp2); UNP_PCB_LOCK(unp3); if (unp2->unp_addr != NULL) { bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); unp3->unp_addr = (struct sockaddr_un *) sa; sa = NULL; } /* * The connector's (client's) credentials are copied from its * process structure at the time of connect() (which is now). */ cru2x(td->td_ucred, &unp3->unp_peercred); unp3->unp_flags |= UNP_HAVEPC; /* * The receiver's (server's) credentials are copied from the * unp_peercred member of socket on which the former called * listen(); uipc_listen() cached that process's credentials * at that time so we can use them now. */ KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, ("unp_connect: listener without cached peercred")); memcpy(&unp->unp_peercred, &unp2->unp_peercred, sizeof(unp->unp_peercred)); unp->unp_flags |= UNP_HAVEPC; if (unp2->unp_flags & UNP_WANTCRED) unp3->unp_flags |= UNP_WANTCRED; UNP_PCB_UNLOCK(unp3); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); #ifdef MAC mac_socketpeer_set_from_socket(so, so3); mac_socketpeer_set_from_socket(so3, so); #endif so2 = so3; } unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); UNP_PCB_LOCK(unp); UNP_PCB_LOCK(unp2); error = unp_connect2(so, so2, PRU_CONNECT); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); bad2: UNP_LINK_WUNLOCK(); bad: if (vp != NULL) vput(vp); free(sa, M_SONAME); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_CONNECTING; UNP_PCB_UNLOCK(unp); return (error); } static int unp_connect2(struct socket *so, struct socket *so2, int req) { struct unpcb *unp; struct unpcb *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); if (so2->so_type != so->so_type) return (EPROTOTYPE); unp->unp_conn = unp2; switch (so->so_type) { case SOCK_DGRAM: LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); soisconnected(so); break; case SOCK_STREAM: case SOCK_SEQPACKET: unp2->unp_conn = unp; if (req == PRU_CONNECT && ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) soisconnecting(so); else soisconnected(so); soisconnected(so2); break; default: panic("unp_connect2"); } return (0); } static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2) { struct socket *so; KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); unp->unp_conn = NULL; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: LIST_REMOVE(unp, unp_reflink); so = unp->unp_socket; SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); break; case SOCK_STREAM: case SOCK_SEQPACKET: soisdisconnected(unp->unp_socket); unp2->unp_conn = NULL; soisdisconnected(unp2->unp_socket); break; } } /* * unp_pcblist() walks the global list of struct unpcb's to generate a * pointer list, bumping the refcount on each unpcb. It then copies them out * sequentially, validating the generation number on each to see if it has * been detached. All of this is necessary because copyout() may sleep on * disk I/O. */ static int unp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; int freeunp; struct unpcb *unp, **unp_list; unp_gen_t gencnt; struct xunpgen *xug; struct unp_head *head; struct xunpcb *xu; switch ((intptr_t)arg1) { case SOCK_STREAM: head = &unp_shead; break; case SOCK_DGRAM: head = &unp_dhead; break; case SOCK_SEQPACKET: head = &unp_sphead; break; default: panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1); } /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = unp_count; req->oldidx = 2 * (sizeof *xug) + (n + n/8) * sizeof(struct xunpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); UNP_LIST_LOCK(); gencnt = unp_gencnt; n = unp_count; UNP_LIST_UNLOCK(); xug->xug_len = sizeof *xug; xug->xug_count = n; xug->xug_gen = gencnt; xug->xug_sogen = so_gencnt; error = SYSCTL_OUT(req, xug, sizeof *xug); if (error) { free(xug, M_TEMP); return (error); } unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); UNP_LIST_LOCK(); for (unp = LIST_FIRST(head), i = 0; unp && i < n; unp = LIST_NEXT(unp, unp_link)) { UNP_PCB_LOCK(unp); if (unp->unp_gencnt <= gencnt) { if (cr_cansee(req->td->td_ucred, unp->unp_socket->so_cred)) { UNP_PCB_UNLOCK(unp); continue; } unp_list[i++] = unp; unp->unp_refcount++; } UNP_PCB_UNLOCK(unp); } UNP_LIST_UNLOCK(); n = i; /* In case we lost some during malloc. */ error = 0; xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); for (i = 0; i < n; i++) { unp = unp_list[i]; UNP_PCB_LOCK(unp); unp->unp_refcount--; if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) { xu->xu_len = sizeof *xu; xu->xu_unpp = unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. */ if (unp->unp_addr != NULL) bcopy(unp->unp_addr, &xu->xu_addr, unp->unp_addr->sun_len); if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) bcopy(unp->unp_conn->unp_addr, &xu->xu_caddr, unp->unp_conn->unp_addr->sun_len); bcopy(unp, &xu->xu_unp, sizeof *unp); sotoxsocket(unp->unp_socket, &xu->xu_socket); UNP_PCB_UNLOCK(unp); error = SYSCTL_OUT(req, xu, sizeof *xu); } else { freeunp = (unp->unp_refcount == 0); UNP_PCB_UNLOCK(unp); if (freeunp) { UNP_PCB_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); } } } free(xu, M_TEMP); if (!error) { /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ xug->xug_gen = unp_gencnt; xug->xug_sogen = so_gencnt; xug->xug_count = unp_count; error = SYSCTL_OUT(req, xug, sizeof *xug); } free(unp_list, M_TEMP); free(xug, M_TEMP); return (error); } SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", "List of active local datagram sockets"); SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", "List of active local stream sockets"); SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", "List of active local seqpacket sockets"); static void unp_shutdown(struct unpcb *unp) { struct unpcb *unp2; struct socket *so; UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; if ((unp->unp_socket->so_type == SOCK_STREAM || (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) { so = unp2->unp_socket; if (so != NULL) socantrcvmore(so); } } static void unp_drop(struct unpcb *unp, int errno) { struct socket *so = unp->unp_socket; struct unpcb *unp2; UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); so->so_error = errno; unp2 = unp->unp_conn; if (unp2 == NULL) return; UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } static void unp_freerights(struct filedescent **fdep, int fdcount) { struct file *fp; int i; KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount)); for (i = 0; i < fdcount; i++) { fp = fdep[i]->fde_file; filecaps_free(&fdep[i]->fde_caps); unp_discard(fp); } free(fdep[0], M_FILECAPS); } static int unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags) { struct thread *td = curthread; /* XXX */ struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; struct filedesc *fdesc = td->td_proc->p_fd; struct filedescent **fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; u_int newlen; UNP_LINK_UNLOCK_ASSERT(); error = 0; if (controlp != NULL) /* controlp == NULL => free control messages */ *controlp = NULL; while (cm != NULL) { if (sizeof(*cm) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { newfds = datalen / sizeof(*fdep); if (newfds == 0) goto next; fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { unp_freerights(fdep, newfds); goto next; } FILEDESC_XLOCK(fdesc); /* * Now change each pointer to an fd in the global * table to an integer that is the index to the local * fd table entry that we set up to point to the * global one we are transferring. */ newlen = newfds * sizeof(int); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { FILEDESC_XUNLOCK(fdesc); error = E2BIG; unp_freerights(fdep, newfds); goto next; } fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); if (fdallocn(td, 0, fdp, newfds) != 0) { FILEDESC_XUNLOCK(fdesc); error = EMSGSIZE; unp_freerights(fdep, newfds); m_freem(*controlp); *controlp = NULL; goto next; } for (i = 0; i < newfds; i++, fdp++) { _finstall(fdesc, fdep[i]->fde_file, *fdp, (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0, &fdep[i]->fde_caps); unp_externalize_fp(fdep[i]->fde_file); } FILEDESC_XUNLOCK(fdesc); free(fdep[0], M_FILECAPS); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) goto next; *controlp = sbcreatecontrol(NULL, datalen, cm->cmsg_type, cm->cmsg_level); if (*controlp == NULL) { error = ENOBUFS; goto next; } bcopy(data, CMSG_DATA(mtod(*controlp, struct cmsghdr *)), datalen); } controlp = &(*controlp)->m_next; next: if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m_freem(control); return (error); } static void unp_zone_change(void *tag) { uma_zone_set_max(unp_zone, maxsockets); } static void unp_init(void) { #ifdef VIMAGE if (!IS_DEFAULT_VNET(curvnet)) return; #endif unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); if (unp_zone == NULL) panic("unp_init"); uma_zone_set_max(unp_zone, maxsockets); uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached"); EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, NULL, EVENTHANDLER_PRI_ANY); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); LIST_INIT(&unp_sphead); SLIST_INIT(&unp_defers); TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL); TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL); UNP_LINK_LOCK_INIT(); UNP_LIST_LOCK_INIT(); UNP_DEFERRED_LOCK_INIT(); } static int unp_internalize(struct mbuf **controlp, struct thread *td) { struct mbuf *control = *controlp; struct proc *p = td->td_proc; struct filedesc *fdesc = p->p_fd; struct bintime *bt; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct cmsgcred *cmcred; struct filedescent *fde, **fdep, *fdev; struct file *fp; struct timeval *tv; int i, *fdp; void *data; socklen_t clen = control->m_len, datalen; int error, oldfds; u_int newlen; UNP_LINK_UNLOCK_ASSERT(); error = 0; *controlp = NULL; while (cm != NULL) { if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) { error = EINVAL; goto out; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; switch (cm->cmsg_type) { /* * Fill in credential information. */ case SCM_CREDS: *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), SCM_CREDS, SOL_SOCKET); if (*controlp == NULL) { error = ENOBUFS; goto out; } cmcred = (struct cmsgcred *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); cmcred->cmcred_pid = p->p_pid; cmcred->cmcred_uid = td->td_ucred->cr_ruid; cmcred->cmcred_gid = td->td_ucred->cr_rgid; cmcred->cmcred_euid = td->td_ucred->cr_uid; cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); for (i = 0; i < cmcred->cmcred_ngroups; i++) cmcred->cmcred_groups[i] = td->td_ucred->cr_groups[i]; break; case SCM_RIGHTS: oldfds = datalen / sizeof (int); if (oldfds == 0) break; /* * Check that all the FDs passed in refer to legal * files. If not, reject the entire operation. */ fdp = data; FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++, fdp++) { fp = fget_locked(fdesc, *fdp); if (fp == NULL) { FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } } /* * Now replace the integer FDs with pointers to the * file structure and capability rights. */ newlen = oldfds * sizeof(fdep[0]); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { FILEDESC_SUNLOCK(fdesc); error = E2BIG; goto out; } fdp = data; fdep = (struct filedescent **) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS, M_WAITOK); for (i = 0; i < oldfds; i++, fdev++, fdp++) { fde = &fdesc->fd_ofiles[*fdp]; fdep[i] = fdev; fdep[i]->fde_file = fde->fde_file; filecaps_copy(&fde->fde_caps, &fdep[i]->fde_caps, true); unp_internalize_fp(fdep[i]->fde_file); } FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: *controlp = sbcreatecontrol(NULL, sizeof(*tv), SCM_TIMESTAMP, SOL_SOCKET); if (*controlp == NULL) { error = ENOBUFS; goto out; } tv = (struct timeval *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); microtime(tv); break; case SCM_BINTIME: *controlp = sbcreatecontrol(NULL, sizeof(*bt), SCM_BINTIME, SOL_SOCKET); if (*controlp == NULL) { error = ENOBUFS; goto out; } bt = (struct bintime *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); bintime(bt); break; default: error = EINVAL; goto out; } controlp = &(*controlp)->m_next; if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } out: m_freem(control); return (error); } static struct mbuf * unp_addsockcred(struct thread *td, struct mbuf *control) { struct mbuf *m, *n, *n_prev; struct sockcred *sc; const struct cmsghdr *cm; int ngroups; int i; ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); if (m == NULL) return (control); sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); sc->sc_uid = td->td_ucred->cr_ruid; sc->sc_euid = td->td_ucred->cr_uid; sc->sc_gid = td->td_ucred->cr_rgid; sc->sc_egid = td->td_ucred->cr_gid; sc->sc_ngroups = ngroups; for (i = 0; i < sc->sc_ngroups; i++) sc->sc_groups[i] = td->td_ucred->cr_groups[i]; /* * Unlink SCM_CREDS control messages (struct cmsgcred), since just * created SCM_CREDS control message (struct sockcred) has another * format. */ if (control != NULL) for (n = control, n_prev = NULL; n != NULL;) { cm = mtod(n, struct cmsghdr *); if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_CREDS) { if (n_prev == NULL) control = n->m_next; else n_prev->m_next = n->m_next; n = m_free(n); } else { n_prev = n; n = n->m_next; } } /* Prepend it to the head. */ m->m_next = control; return (m); } static struct unpcb * fptounp(struct file *fp) { struct socket *so; if (fp->f_type != DTYPE_SOCKET) return (NULL); if ((so = fp->f_data) == NULL) return (NULL); if (so->so_proto->pr_domain != &localdomain) return (NULL); return sotounpcb(so); } static void unp_discard(struct file *fp) { struct unp_defer *dr; if (unp_externalize_fp(fp)) { dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK); dr->ud_fp = fp; UNP_DEFERRED_LOCK(); SLIST_INSERT_HEAD(&unp_defers, dr, ud_link); UNP_DEFERRED_UNLOCK(); atomic_add_int(&unp_defers_count, 1); taskqueue_enqueue(taskqueue_thread, &unp_defer_task); } else (void) closef(fp, (struct thread *)NULL); } static void unp_process_defers(void *arg __unused, int pending) { struct unp_defer *dr; SLIST_HEAD(, unp_defer) drl; int count; SLIST_INIT(&drl); for (;;) { UNP_DEFERRED_LOCK(); if (SLIST_FIRST(&unp_defers) == NULL) { UNP_DEFERRED_UNLOCK(); break; } SLIST_SWAP(&unp_defers, &drl, unp_defer); UNP_DEFERRED_UNLOCK(); count = 0; while ((dr = SLIST_FIRST(&drl)) != NULL) { SLIST_REMOVE_HEAD(&drl, ud_link); closef(dr->ud_fp, NULL); free(dr, M_TEMP); count++; } atomic_add_int(&unp_defers_count, -count); } } static void unp_internalize_fp(struct file *fp) { struct unpcb *unp; UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) { unp->unp_file = fp; unp->unp_msgcount++; } fhold(fp); unp_rights++; UNP_LINK_WUNLOCK(); } static int unp_externalize_fp(struct file *fp) { struct unpcb *unp; int ret; UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) { unp->unp_msgcount--; ret = 1; } else ret = 0; unp_rights--; UNP_LINK_WUNLOCK(); return (ret); } /* * unp_defer indicates whether additional work has been defered for a future * pass through unp_gc(). It is thread local and does not require explicit * synchronization. */ static int unp_marked; static int unp_unreachable; static void unp_accessable(struct filedescent **fdep, int fdcount) { struct unpcb *unp; struct file *fp; int i; for (i = 0; i < fdcount; i++) { fp = fdep[i]->fde_file; if ((unp = fptounp(fp)) == NULL) continue; if (unp->unp_gcflag & UNPGC_REF) continue; unp->unp_gcflag &= ~UNPGC_DEAD; unp->unp_gcflag |= UNPGC_REF; unp_marked++; } } static void unp_gc_process(struct unpcb *unp) { struct socket *soa; struct socket *so; struct file *fp; /* Already processed. */ if (unp->unp_gcflag & UNPGC_SCANNED) return; fp = unp->unp_file; /* * Check for a socket potentially in a cycle. It must be in a * queue as indicated by msgcount, and this must equal the file * reference count. Note that when msgcount is 0 the file is NULL. */ if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp && unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) { unp->unp_gcflag |= UNPGC_DEAD; unp_unreachable++; return; } /* * Mark all sockets we reference with RIGHTS. */ so = unp->unp_socket; if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) { SOCKBUF_LOCK(&so->so_rcv); unp_scan(so->so_rcv.sb_mb, unp_accessable); SOCKBUF_UNLOCK(&so->so_rcv); } /* * Mark all sockets in our accept queue. */ ACCEPT_LOCK(); TAILQ_FOREACH(soa, &so->so_comp, so_list) { if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0) continue; SOCKBUF_LOCK(&soa->so_rcv); unp_scan(soa->so_rcv.sb_mb, unp_accessable); SOCKBUF_UNLOCK(&soa->so_rcv); } ACCEPT_UNLOCK(); unp->unp_gcflag |= UNPGC_SCANNED; } static int unp_recycled; SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "Number of unreachable sockets claimed by the garbage collector."); static int unp_taskcount; SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "Number of times the garbage collector has run."); static void unp_gc(__unused void *arg, int pending) { struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead, NULL }; struct unp_head **head; struct file *f, **unref; struct unpcb *unp; int i, total; unp_taskcount++; UNP_LIST_LOCK(); /* * First clear all gc flags from previous runs, apart from * UNPGC_IGNORE_RIGHTS. */ for (head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) unp->unp_gcflag = (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS); /* * Scan marking all reachable sockets with UNPGC_REF. Once a socket * is reachable all of the sockets it references are reachable. * Stop the scan once we do a complete loop without discovering * a new reachable socket. */ do { unp_unreachable = 0; unp_marked = 0; for (head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) unp_gc_process(unp); } while (unp_marked); UNP_LIST_UNLOCK(); if (unp_unreachable == 0) return; /* * Allocate space for a local list of dead unpcbs. */ unref = malloc(unp_unreachable * sizeof(struct file *), M_TEMP, M_WAITOK); /* * Iterate looking for sockets which have been specifically marked * as as unreachable and store them locally. */ UNP_LINK_RLOCK(); UNP_LIST_LOCK(); for (total = 0, head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) if ((unp->unp_gcflag & UNPGC_DEAD) != 0) { f = unp->unp_file; if (unp->unp_msgcount == 0 || f == NULL || f->f_count != unp->unp_msgcount) continue; unref[total++] = f; fhold(f); KASSERT(total <= unp_unreachable, ("unp_gc: incorrect unreachable count.")); } UNP_LIST_UNLOCK(); UNP_LINK_RUNLOCK(); /* * Now flush all sockets, free'ing rights. This will free the * struct files associated with these sockets but leave each socket * with one remaining ref. */ for (i = 0; i < total; i++) { struct socket *so; so = unref[i]->f_data; CURVNET_SET(so->so_vnet); sorflush(so); CURVNET_RESTORE(); } /* * And finally release the sockets so they can be reclaimed. */ for (i = 0; i < total; i++) fdrop(unref[i], NULL); unp_recycled += total; free(unref, M_TEMP); } static void unp_dispose(struct mbuf *m) { if (m) unp_scan(m, unp_freerights); } /* * Synchronize against unp_gc, which can trip over data as we are freeing it. */ static void unp_dispose_so(struct socket *so) { struct unpcb *unp; unp = sotounpcb(so); UNP_LIST_LOCK(); unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS; UNP_LIST_UNLOCK(); unp_dispose(so->so_rcv.sb_mb); } static void unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int)) { struct mbuf *m; struct cmsghdr *cm; void *data; socklen_t clen, datalen; while (m0 != NULL) { for (m = m0; m; m = m->m_next) { if (m->m_type != MT_CONTROL) continue; cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(*cm) > clen || cm->cmsg_len > clen) break; data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { (*op)(data, datalen / sizeof(struct filedescent *)); } if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } } m0 = m0->m_nextpkt; } } /* * A helper function called by VFS before socket-type vnode reclamation. * For an active vnode it clears unp_vnode pointer and decrements unp_vnode * use count. */ void vfs_unp_reclaim(struct vnode *vp) { struct socket *so; struct unpcb *unp; int active; ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim"); KASSERT(vp->v_type == VSOCK, ("vfs_unp_reclaim: vp->v_type != VSOCK")); active = 0; UNP_LINK_WLOCK(); VOP_UNP_CONNECT(vp, &so); if (so == NULL) goto done; unp = sotounpcb(so); if (unp == NULL) goto done; UNP_PCB_LOCK(unp); if (unp->unp_vnode == vp) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; active = 1; } UNP_PCB_UNLOCK(unp); done: UNP_LINK_WUNLOCK(); if (active) vunref(vp); } #ifdef DDB static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_unpflags(int unp_flags) { int comma; comma = 0; if (unp_flags & UNP_HAVEPC) { db_printf("%sUNP_HAVEPC", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_HAVEPCCACHED) { db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_WANTCRED) { db_printf("%sUNP_WANTCRED", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_CONNWAIT) { db_printf("%sUNP_CONNWAIT", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_CONNECTING) { db_printf("%sUNP_CONNECTING", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_BINDING) { db_printf("%sUNP_BINDING", comma ? ", " : ""); comma = 1; } } static void db_print_xucred(int indent, struct xucred *xu) { int comma, i; db_print_indent(indent); db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n", xu->cr_version, xu->cr_uid, xu->cr_ngroups); db_print_indent(indent); db_printf("cr_groups: "); comma = 0; for (i = 0; i < xu->cr_ngroups; i++) { db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]); comma = 1; } db_printf("\n"); } static void db_print_unprefs(int indent, struct unp_head *uh) { struct unpcb *unp; int counter; counter = 0; LIST_FOREACH(unp, uh, unp_reflink) { if (counter % 4 == 0) db_print_indent(indent); db_printf("%p ", unp); if (counter % 4 == 3) db_printf("\n"); counter++; } if (counter != 0 && counter % 4 != 0) db_printf("\n"); } DB_SHOW_COMMAND(unpcb, db_show_unpcb) { struct unpcb *unp; if (!have_addr) { db_printf("usage: show unpcb \n"); return; } unp = (struct unpcb *)addr; db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket, unp->unp_vnode); db_printf("unp_ino: %ju unp_conn: %p\n", (uintmax_t)unp->unp_ino, unp->unp_conn); db_printf("unp_refs:\n"); db_print_unprefs(2, &unp->unp_refs); /* XXXRW: Would be nice to print the full address, if any. */ db_printf("unp_addr: %p\n", unp->unp_addr); db_printf("unp_gencnt: %llu\n", (unsigned long long)unp->unp_gencnt); db_printf("unp_flags: %x (", unp->unp_flags); db_print_unpflags(unp->unp_flags); db_printf(")\n"); db_printf("unp_peercred:\n"); db_print_xucred(2, &unp->unp_peercred); db_printf("unp_refcount: %u\n", unp->unp_refcount); } #endif Index: projects/clang380-import/sys/mips/mips/elf_machdep.c =================================================================== --- projects/clang380-import/sys/mips/mips/elf_machdep.c (revision 293686) +++ projects/clang380-import/sys/mips/mips/elf_machdep.c (revision 293687) @@ -1,351 +1,353 @@ /*- * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: src/sys/i386/i386/elf_machdep.c,v 1.20 2004/08/11 02:35:05 marcel */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __mips_n64 struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .flags = 0 }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } #else struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .flags = 0 }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } #endif /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf32_Addr *where = (Elf32_Addr *)NULL; Elf_Addr addr; Elf_Addr addend = (Elf_Addr)0; Elf_Word rtype = (Elf_Word)0, symidx; const Elf_Rel *rel = NULL; const Elf_Rela *rela = NULL; int error; /* * Stash R_MIPS_HI16 info so we can use it when processing R_MIPS_LO16 */ static Elf_Addr ahl; static Elf32_Addr *where_hi16; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf32_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); switch (rtype) { case R_MIPS_64: addend = *(Elf64_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf32_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } switch (rtype) { case R_MIPS_NONE: /* none */ break; case R_MIPS_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; if (*where != addr) *where = (Elf32_Addr)addr; break; case R_MIPS_26: /* ((A << 2) | (P & 0xf0000000) + S) >> 2 */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addend &= 0x03ffffff; /* * Addendum for .rela R_MIPS_26 is not shifted right */ if (rela == NULL) addend <<= 2; addr += ((Elf_Addr)where & 0xf0000000) | addend; addr >>= 2; *where &= ~0x03ffffff; *where |= addr & 0x03ffffff; break; case R_MIPS_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; if (*(Elf64_Addr*)where != addr) *(Elf64_Addr*)where = addr; break; case R_MIPS_HI16: /* ((AHL + S) - ((short)(AHL + S)) >> 16 */ if (rela != NULL) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= ((((long long) addr + 0x8000LL) >> 16) & 0xffff); } else { ahl = addend << 16; where_hi16 = where; } break; case R_MIPS_LO16: /* AHL + S */ if (rela != NULL) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= addr & 0xffff; } else { ahl += (int16_t)addend; error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addend &= 0xffff0000; addend |= (uint16_t)(ahl + addr); *where = addend; addend = *where_hi16; addend &= 0xffff0000; addend |= ((ahl + addr) - (int16_t)(ahl + addr)) >> 16; *where_hi16 = addend; } break; case R_MIPS_HIGHER: /* %higher(A+S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= (((long long)addr + 0x80008000LL) >> 32) & 0xffff; break; case R_MIPS_HIGHEST: /* %highest(A+S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= (((long long)addr + 0x800080008000LL) >> 48) & 0xffff; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return (-1); } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { /* * Sync the I and D caches to make sure our relocations are visible. */ mips_icache_sync_all(); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/clang380-import/sys/mips/mips/freebsd32_machdep.c =================================================================== --- projects/clang380-import/sys/mips/mips/freebsd32_machdep.c (revision 293686) +++ projects/clang380-import/sys/mips/mips/freebsd32_machdep.c (revision 293687) @@ -1,488 +1,489 @@ /*- * Copyright (c) 2012 Juli Mallett * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Based on nwhitehorn's COMPAT_FREEBSD32 support code for PowerPC64. */ #include "opt_compat.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void freebsd32_exec_setregs(struct thread *, struct image_params *, u_long); static int get_mcontext32(struct thread *, mcontext32_t *, int); static int set_mcontext32(struct thread *, mcontext32_t *); static void freebsd32_sendsig(sig_t, ksiginfo_t *, sigset_t *); extern const char *freebsd32_syscallnames[]; struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = freebsd32_sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = ((vm_offset_t)0x80000000), .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = freebsd32_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .flags = 0 }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static void freebsd32_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { exec_setregs(td, imgp, stack); /* * See comment in exec_setregs about running 32-bit binaries with 64-bit * registers. */ td->td_frame->sp -= 65536; /* * Clear extended address space bit for userland. */ td->td_frame->sr &= ~MIPS_SR_UX; } int set_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; for (i = 0; i < NUMSAVEREGS; i++) r.r_regs[i] = regs->r_regs[i]; return (set_regs(td, &r)); } int fill_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; int error; error = fill_regs(td, &r); if (error != 0) return (error); for (i = 0; i < NUMSAVEREGS; i++) regs->r_regs[i] = r.r_regs[i]; return (0); } int set_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; for (i = 0; i < NUMFPREGS; i++) fp.r_regs[i] = fpregs->r_regs[i]; return (set_fpregs(td, &fp)); } int fill_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; int error; error = fill_fpregs(td, &fp); if (error != 0) return (error); for (i = 0; i < NUMFPREGS; i++) fpregs->r_regs[i] = fp.r_regs[i]; return (0); } static int get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { mcontext_t mcp64; unsigned i; int error; error = get_mcontext(td, &mcp64, flags); if (error != 0) return (error); mcp->mc_onstack = mcp64.mc_onstack; mcp->mc_pc = mcp64.mc_pc; for (i = 0; i < 32; i++) mcp->mc_regs[i] = mcp64.mc_regs[i]; mcp->sr = mcp64.sr; mcp->mullo = mcp64.mullo; mcp->mulhi = mcp64.mulhi; mcp->mc_fpused = mcp64.mc_fpused; for (i = 0; i < 33; i++) mcp->mc_fpregs[i] = mcp64.mc_fpregs[i]; mcp->mc_fpc_eir = mcp64.mc_fpc_eir; mcp->mc_tls = (int32_t)(intptr_t)mcp64.mc_tls; return (0); } static int set_mcontext32(struct thread *td, mcontext32_t *mcp) { mcontext_t mcp64; unsigned i; mcp64.mc_onstack = mcp->mc_onstack; mcp64.mc_pc = mcp->mc_pc; for (i = 0; i < 32; i++) mcp64.mc_regs[i] = mcp->mc_regs[i]; mcp64.sr = mcp->sr; mcp64.mullo = mcp->mullo; mcp64.mulhi = mcp->mulhi; mcp64.mc_fpused = mcp->mc_fpused; for (i = 0; i < 33; i++) mcp64.mc_fpregs[i] = mcp->mc_fpregs[i]; mcp64.mc_fpc_eir = mcp->mc_fpc_eir; mcp64.mc_tls = (void *)(intptr_t)mcp->mc_tls; return (set_mcontext(td, &mcp64)); } int freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) { ucontext32_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext32(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); #if 0 CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); #endif return (EJUSTRETURN); } /* * The first two fields of a ucontext_t are the signal mask and the machine * context. The next field is uc_link; we want to avoid destroying the link * when copying out contexts. */ #define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret == 0 ? EJUSTRETURN : ret); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { ucontext32_t uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } } return (ret == 0 ? EJUSTRETURN : ret); } #define UCONTEXT_MAGIC 0xACEDBADE /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct fpreg32 fpregs; struct reg32 regs; struct sigacts *psp; struct sigframe32 sf, *sfp; int sig; int oonstack; unsigned i; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); fill_regs32(td, ®s); oonstack = sigonstack(td->td_frame->sp); /* save user context */ bzero(&sf, sizeof sf); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack.ss_sp = (int32_t)(intptr_t)td->td_sigstk.ss_sp; sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_uc.uc_stack.ss_flags = td->td_sigstk.ss_flags; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs.r_regs[PC]; sf.sf_uc.uc_mcontext.mullo = regs.r_regs[MULLO]; sf.sf_uc.uc_mcontext.mulhi = regs.r_regs[MULHI]; sf.sf_uc.uc_mcontext.mc_tls = (int32_t)(intptr_t)td->td_md.md_tls; sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ for (i = 1; i < 32; i++) sf.sf_uc.uc_mcontext.mc_regs[i] = regs.r_regs[i]; sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); fill_fpregs32(td, &fpregs); for (i = 0; i < 33; i++) sf.sf_uc.uc_mcontext.mc_fpregs[i] = fpregs.r_regs[i]; } /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe32 *)((vm_offset_t)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); } else sfp = (struct sigframe32 *)((vm_offset_t)(td->td_frame->sp - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); /* Build the argument list for the signal handler. */ td->td_frame->a0 = sig; td->td_frame->a2 = (register_t)(intptr_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ td->td_frame->a1 = (register_t)(intptr_t)&sfp->sf_si; /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = td->td_frame->badvaddr; } else { /* Old FreeBSD-style arguments. */ td->td_frame->a1 = ksi->ksi_code; td->td_frame->a3 = td->td_frame->badvaddr; /* sf.sf_ahu.sf_handler = catcher; */ } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(struct sigframe32)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); sigexit(td, SIGILL); } td->td_frame->pc = (register_t)(intptr_t)catcher; td->td_frame->t9 = (register_t)(intptr_t)catcher; td->td_frame->sp = (register_t)(intptr_t)sfp; /* * Signal trampoline code is at base of user stack. */ td->td_frame->ra = (register_t)(intptr_t)FREEBSD32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap) { int error; int32_t tlsbase; switch (uap->op) { case MIPS_SET_TLS: td->td_md.md_tls = (void *)(intptr_t)uap->parms; return (0); case MIPS_GET_TLS: tlsbase = (int32_t)(intptr_t)td->td_md.md_tls; error = copyout(&tlsbase, uap->parms, sizeof(tlsbase)); return (error); default: break; } return (EINVAL); } void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } Index: projects/clang380-import/sys/net/if_arcsubr.c =================================================================== --- projects/clang380-import/sys/net/if_arcsubr.c (revision 293686) +++ projects/clang380-import/sys/net/if_arcsubr.c (revision 293687) @@ -1,833 +1,832 @@ /* $NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $ */ /* $FreeBSD$ */ /*- * Copyright (c) 1994, 1995 Ignatios Souvatzis * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: NetBSD: if_ethersubr.c,v 1.9 1994/06/29 06:36:11 cgd Exp * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 * */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #define ARCNET_ALLOW_BROKEN_ARP static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *); static int arc_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); u_int8_t arcbroadcastaddr = 0; #define ARC_LLADDR(ifp) (*(u_int8_t *)IF_LLADDR(ifp)) #define senderr(e) { error = (e); goto bad;} #define SIN(s) ((const struct sockaddr_in *)(s)) /* * ARCnet output routine. * Encapsulate a packet of type family for the local net. * Assumes that ifp is actually pointer to arccom structure. */ int arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct arc_header *ah; int error; u_int8_t atype, adst; int loop_copy = 0; int isphds; #if defined(INET) || defined(INET6) int is_gw = 0; #endif if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) return(ENETDOWN); /* m, m1 aren't initialized yet */ error = 0; #if defined(INET) || defined(INET6) - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #endif switch (dst->sa_family) { #ifdef INET case AF_INET: /* * For now, use the simple IP addr -> ARCnet addr mapping */ if (m->m_flags & (M_BCAST|M_MCAST)) adst = arcbroadcastaddr; /* ARCnet broadcast address */ else if (ifp->if_flags & IFF_NOARP) adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF; else { error = arpresolve(ifp, is_gw, m, dst, &adst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); } atype = (ifp->if_flags & IFF_LINK0) ? ARCTYPE_IP_OLD : ARCTYPE_IP; break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ARCNET); loop_copy = -1; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: atype = ARCTYPE_REVARP; break; case ARPOP_REQUEST: case ARPOP_REPLY: default: atype = ARCTYPE_ARP; break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, &adst, ARC_ADDR_LEN); else bcopy(ar_tha(ah), &adst, ARC_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if ((m->m_flags & M_MCAST) != 0) adst = arcbroadcastaddr; /* ARCnet broadcast address */ else { error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL); if (error != 0) return (error == EWOULDBLOCK ? 0 : error); } atype = ARCTYPE_INET6; break; #endif case AF_UNSPEC: { const struct arc_header *ah; loop_copy = -1; ah = (const struct arc_header *)dst->sa_data; adst = ah->arc_dhost; atype = ah->arc_type; if (atype == ARCTYPE_ARP) { atype = (ifp->if_flags & IFF_LINK0) ? ARCTYPE_ARP_OLD: ARCTYPE_ARP; #ifdef ARCNET_ALLOW_BROKEN_ARP /* * XXX It's not clear per RFC826 if this is needed, but * "assigned numbers" say this is wrong. * However, e.g., AmiTCP 3.0Beta used it... we make this * switchable for emergency cases. Not perfect, but... */ if (ifp->if_flags & IFF_LINK2) mtod(m, struct arphdr *)->ar_pro = atype - 1; #endif } break; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } isphds = arc_isphds(atype); M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); ah = mtod(m, struct arc_header *); ah->arc_type = atype; ah->arc_dhost = adst; ah->arc_shost = ARC_LLADDR(ifp); if (isphds) { ah->arc_flag = 0; ah->arc_seqid = 0; } if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) { if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); (void) if_simloop(ifp, n, dst->sa_family, ARC_HDRLEN); } else if (ah->arc_dhost == ah->arc_shost) { (void) if_simloop(ifp, m, dst->sa_family, ARC_HDRLEN); return (0); /* XXX */ } } BPF_MTAP(ifp, m); error = ifp->if_transmit(ifp, m); return (error); bad: if (m) m_freem(m); return (error); } void arc_frag_init(struct ifnet *ifp) { struct arccom *ac; ac = (struct arccom *)ifp->if_l2com; ac->curr_frag = 0; } struct mbuf * arc_frag_next(struct ifnet *ifp) { struct arccom *ac; struct mbuf *m; struct arc_header *ah; ac = (struct arccom *)ifp->if_l2com; if ((m = ac->curr_frag) == 0) { int tfrags; /* dequeue new packet */ IF_DEQUEUE(&ifp->if_snd, m); if (m == 0) return 0; ah = mtod(m, struct arc_header *); if (!arc_isphds(ah->arc_type)) return m; ++ac->ac_seqid; /* make the seqid unique */ tfrags = (m->m_pkthdr.len + ARC_MAX_DATA - 1) / ARC_MAX_DATA; ac->fsflag = 2 * tfrags - 3; ac->sflag = 0; ac->rsflag = ac->fsflag; ac->arc_dhost = ah->arc_dhost; ac->arc_shost = ah->arc_shost; ac->arc_type = ah->arc_type; m_adj(m, ARC_HDRNEWLEN); ac->curr_frag = m; } /* split out next fragment and return it */ if (ac->sflag < ac->fsflag) { /* we CAN'T have short packets here */ ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT); if (ac->curr_frag == 0) { m_freem(m); return 0; } M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT); if (m == 0) { m_freem(ac->curr_frag); ac->curr_frag = 0; return 0; } ah = mtod(m, struct arc_header *); ah->arc_flag = ac->rsflag; ah->arc_seqid = ac->ac_seqid; ac->sflag += 2; ac->rsflag = ac->sflag; } else if ((m->m_pkthdr.len >= ARC_MIN_FORBID_LEN - ARC_HDRNEWLEN + 2) && (m->m_pkthdr.len <= ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) { ac->curr_frag = 0; M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT); if (m == 0) return 0; ah = mtod(m, struct arc_header *); ah->arc_flag = 0xFF; ah->arc_seqid = 0xFFFF; ah->arc_type2 = ac->arc_type; ah->arc_flag2 = ac->sflag; ah->arc_seqid2 = ac->ac_seqid; } else { ac->curr_frag = 0; M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT); if (m == 0) return 0; ah = mtod(m, struct arc_header *); ah->arc_flag = ac->sflag; ah->arc_seqid = ac->ac_seqid; } ah->arc_dhost = ac->arc_dhost; ah->arc_shost = ac->arc_shost; ah->arc_type = ac->arc_type; return m; } /* * Defragmenter. Returns mbuf if last packet found, else * NULL. frees imcoming mbuf as necessary. */ static __inline struct mbuf * arc_defrag(struct ifnet *ifp, struct mbuf *m) { struct arc_header *ah, *ah1; struct arccom *ac; struct ac_frag *af; struct mbuf *m1; char *s; int newflen; u_char src,dst,typ; ac = (struct arccom *)ifp->if_l2com; if (m->m_len < ARC_HDRNEWLEN) { m = m_pullup(m, ARC_HDRNEWLEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return NULL; } } ah = mtod(m, struct arc_header *); typ = ah->arc_type; if (!arc_isphds(typ)) return m; src = ah->arc_shost; dst = ah->arc_dhost; if (ah->arc_flag == 0xff) { m_adj(m, 4); if (m->m_len < ARC_HDRNEWLEN) { m = m_pullup(m, ARC_HDRNEWLEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return NULL; } } ah = mtod(m, struct arc_header *); } af = &ac->ac_fragtab[src]; m1 = af->af_packet; s = "debug code error"; if (ah->arc_flag & 1) { /* * first fragment. We always initialize, which is * about the right thing to do, as we only want to * accept one fragmented packet per src at a time. */ if (m1 != NULL) m_freem(m1); af->af_packet = m; m1 = m; af->af_maxflag = ah->arc_flag; af->af_lastseen = 0; af->af_seqid = ah->arc_seqid; return NULL; /* notreached */ } else { /* check for unfragmented packet */ if (ah->arc_flag == 0) return m; /* do we have a first packet from that src? */ if (m1 == NULL) { s = "no first frag"; goto outofseq; } ah1 = mtod(m1, struct arc_header *); if (ah->arc_seqid != ah1->arc_seqid) { s = "seqid differs"; goto outofseq; } if (typ != ah1->arc_type) { s = "type differs"; goto outofseq; } if (dst != ah1->arc_dhost) { s = "dest host differs"; goto outofseq; } /* typ, seqid and dst are ok here. */ if (ah->arc_flag == af->af_lastseen) { m_freem(m); return NULL; } if (ah->arc_flag == af->af_lastseen + 2) { /* ok, this is next fragment */ af->af_lastseen = ah->arc_flag; m_adj(m,ARC_HDRNEWLEN); /* * m_cat might free the first mbuf (with pkthdr) * in 2nd chain; therefore: */ newflen = m->m_pkthdr.len; m_cat(m1,m); m1->m_pkthdr.len += newflen; /* is it the last one? */ if (af->af_lastseen > af->af_maxflag) { af->af_packet = NULL; return(m1); } else return NULL; } s = "other reason"; /* if all else fails, it is out of sequence, too */ } outofseq: if (m1) { m_freem(m1); af->af_packet = NULL; } if (m) m_freem(m); log(LOG_INFO,"%s: got out of seq. packet: %s\n", ifp->if_xname, s); return NULL; } /* * return 1 if Packet Header Definition Standard, else 0. * For now: old IP, old ARP aren't obviously. Lacking correct information, * we guess that besides new IP and new ARP also IPX and APPLETALK are PHDS. * (Apple and Novell corporations were involved, among others, in PHDS work). * Easiest is to assume that everybody else uses that, too. */ int arc_isphds(u_int8_t type) { return (type != ARCTYPE_IP_OLD && type != ARCTYPE_ARP_OLD && type != ARCTYPE_DIAGNOSE); } /* * Process a received Arcnet packet; * the packet is in the mbuf chain m with * the ARCnet header. */ void arc_input(struct ifnet *ifp, struct mbuf *m) { struct arc_header *ah; int isr; u_int8_t atype; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } /* possibly defragment: */ m = arc_defrag(ifp, m); if (m == NULL) return; BPF_MTAP(ifp, m); ah = mtod(m, struct arc_header *); /* does this belong to us? */ if ((ifp->if_flags & IFF_PROMISC) == 0 && ah->arc_dhost != arcbroadcastaddr && ah->arc_dhost != ARC_LLADDR(ifp)) { m_freem(m); return; } if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if (ah->arc_dhost == arcbroadcastaddr) { m->m_flags |= M_BCAST|M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } atype = ah->arc_type; switch (atype) { #ifdef INET case ARCTYPE_IP: m_adj(m, ARC_HDRNEWLEN); isr = NETISR_IP; break; case ARCTYPE_IP_OLD: m_adj(m, ARC_HDRLEN); isr = NETISR_IP; break; case ARCTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } m_adj(m, ARC_HDRNEWLEN); isr = NETISR_ARP; #ifdef ARCNET_ALLOW_BROKEN_ARP mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP); #endif break; case ARCTYPE_ARP_OLD: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } m_adj(m, ARC_HDRLEN); isr = NETISR_ARP; #ifdef ARCNET_ALLOW_BROKEN_ARP mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP); #endif break; #endif #ifdef INET6 case ARCTYPE_INET6: m_adj(m, ARC_HDRNEWLEN); isr = NETISR_IPV6; break; #endif default: m_freem(m); return; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } /* * Register (new) link level address. */ void arc_storelladdr(struct ifnet *ifp, u_int8_t lla) { ARC_LLADDR(ifp) = lla; } /* * Perform common duties while attaching to interface list */ void arc_ifattach(struct ifnet *ifp, u_int8_t lla) { struct ifaddr *ifa; struct sockaddr_dl *sdl; struct arccom *ac; if_attach(ifp); ifp->if_addrlen = 1; ifp->if_hdrlen = ARC_HDRLEN; ifp->if_mtu = 1500; ifp->if_resolvemulti = arc_resolvemulti; if (ifp->if_baudrate == 0) ifp->if_baudrate = 2500000; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ARCNET; sdl->sdl_alen = ifp->if_addrlen; if (ifp->if_flags & IFF_BROADCAST) ifp->if_flags |= IFF_MULTICAST|IFF_ALLMULTI; ac = (struct arccom *)ifp->if_l2com; ac->ac_seqid = (time_second) & 0xFFFF; /* try to make seqid unique */ if (lla == 0) { /* XXX this message isn't entirely clear, to me -- cgd */ log(LOG_ERR,"%s: link address 0 reserved for broadcasts. Please change it and ifconfig %s down up\n", ifp->if_xname, ifp->if_xname); } arc_storelladdr(ifp, lla); ifp->if_broadcastaddr = &arcbroadcastaddr; bpfattach(ifp, DLT_ARCNET, ARC_HDRLEN); } void arc_ifdetach(struct ifnet *ifp) { bpfdetach(ifp); if_detach(ifp); } int arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) &ifr->ifr_data; *(u_int8_t *)sa->sa_data = ARC_LLADDR(ifp); } break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == NULL) error = EAFNOSUPPORT; else { switch (ifr->ifr_addr.sa_family) { case AF_INET: case AF_INET6: error = 0; break; default: error = EAFNOSUPPORT; break; } } break; case SIOCSIFMTU: /* * Set the interface MTU. * mtu can't be larger than ARCMTU for RFC1051 * and can't be larger than ARC_PHDS_MTU */ if (((ifp->if_flags & IFF_LINK0) && ifr->ifr_mtu > ARCMTU) || ifr->ifr_mtu > ARC_PHDS_MAXMTU) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; } return (error); } /* based on ether_resolvemulti() */ int arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; if (*LLADDR(sdl) != arcbroadcastaddr) return EADDRNOTAVAIL; *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ARC_ADDR_LEN; *LLADDR(sdl) = 0; *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ARC_ADDR_LEN; *LLADDR(sdl) = 0; *llsa = (struct sockaddr *)sdl; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } static MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals"); static void* arc_alloc(u_char type, struct ifnet *ifp) { struct arccom *ac; ac = malloc(sizeof(struct arccom), M_ARCCOM, M_WAITOK | M_ZERO); ac->ac_ifp = ifp; return (ac); } static void arc_free(void *com, u_char type) { free(com, M_ARCCOM); } static int arc_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: if_register_com_alloc(IFT_ARCNET, arc_alloc, arc_free); break; case MOD_UNLOAD: if_deregister_com_alloc(IFT_ARCNET); break; default: return EOPNOTSUPP; } return (0); } static moduledata_t arc_mod = { "arcnet", arc_modevent, 0 }; DECLARE_MODULE(arcnet, arc_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(arcnet, 1); Index: projects/clang380-import/sys/net/if_ethersubr.c =================================================================== --- projects/clang380-import/sys/net/if_ethersubr.c (revision 293686) +++ projects/clang380-import/sys/net/if_ethersubr.c (revision 293687) @@ -1,1200 +1,1198 @@ /*- * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_netgraph.h" #include "opt_mbuf_profiling.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #endif #ifdef INET6 #include #endif #include #ifdef CTASSERT CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2); CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); #endif VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */ /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m); int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_attach_p)(struct ifnet *ifp); void (*ng_ether_detach_p)(struct ifnet *ifp); void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* if_bridge(4) support */ struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); int (*bridge_output_p)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /* if_lagg(4) support */ struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int ether_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); #ifdef VIMAGE static void ether_reassign(struct ifnet *, struct vnet *, char *); #endif static int ether_requestencap(struct ifnet *, struct if_encap_req *); #define ETHER_IS_BROADCAST(addr) \ (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0) #define senderr(e) do { error = (e); goto bad;} while (0) static void update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst) { int csum_flags = 0; if (src->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA) csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); if (src->m_pkthdr.csum_flags & CSUM_SCTP) csum_flags |= CSUM_SCTP_VALID; dst->m_pkthdr.csum_flags |= csum_flags; if (csum_flags & CSUM_DATA_VALID) dst->m_pkthdr.csum_data = 0xffff; } /* * Handle link-layer encapsulation requests. */ static int ether_requestencap(struct ifnet *ifp, struct if_encap_req *req) { struct ether_header *eh; struct arphdr *ah; uint16_t etype; const u_char *lladdr; if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < ETHER_HDR_LEN) return (ENOMEM); eh = (struct ether_header *)req->buf; lladdr = req->lladdr; req->lladdr_off = 0; switch (req->family) { case AF_INET: etype = htons(ETHERTYPE_IP); break; case AF_INET6: etype = htons(ETHERTYPE_IPV6); break; case AF_ARP: ah = (struct arphdr *)req->hdata; ah->ar_hrd = htons(ARPHRD_ETHER); switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: etype = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: etype = htons(ETHERTYPE_ARP); break; } if (req->flags & IFENCAP_FLAG_BROADCAST) lladdr = ifp->if_broadcastaddr; break; default: return (EAFNOSUPPORT); } memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type)); memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); req->bufsize = sizeof(struct ether_header); return (0); } static int ether_resolve_addr(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro, u_char *phdr, uint32_t *pflags) { struct ether_header *eh; - struct rtentry *rt; uint32_t lleflags = 0; int error = 0; #if defined(INET) || defined(INET6) uint16_t etype; #endif eh = (struct ether_header *)phdr; switch (dst->sa_family) { #ifdef INET case AF_INET: if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) error = arpresolve(ifp, 0, m, dst, phdr, &lleflags); else { if (m->m_flags & M_BCAST) memcpy(eh->ether_dhost, ifp->if_broadcastaddr, ETHER_ADDR_LEN); else { const struct in_addr *a; a = &(((const struct sockaddr_in *)dst)->sin_addr); ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost); } etype = htons(ETHERTYPE_IP); memcpy(&eh->ether_type, &etype, sizeof(etype)); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if ((m->m_flags & M_MCAST) == 0) error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags); else { const struct in6_addr *a6; a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr); ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost); etype = htons(ETHERTYPE_IPV6); memcpy(&eh->ether_type, &etype, sizeof(etype)); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); } break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); if (m != NULL) m_freem(m); return (EAFNOSUPPORT); } if (error == EHOSTDOWN) { - rt = (ro != NULL) ? ro->ro_rt : NULL; - if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) + if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0) error = EHOSTUNREACH; } if (error != 0) return (error); *pflags = RT_MAY_LOOP; if (lleflags & LLE_IFADDR) *pflags |= RT_L2_ME; return (0); } /* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ int ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { int error = 0; char linkhdr[ETHER_HDR_LEN], *phdr; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; int hlen; /* link layer header length */ uint32_t pflags; phdr = NULL; pflags = 0; if (ro != NULL) { phdr = ro->ro_prepend; hlen = ro->ro_plen; pflags = ro->ro_flags; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); if (phdr == NULL) { /* No prepend data supplied. Try to calculate ourselves. */ phdr = linkhdr; hlen = ETHER_HDR_LEN; error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags); if (error != 0) return (error == EWOULDBLOCK ? 0 : error); } if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, dst->sa_family, 0)); } loop_copy = pflags & RT_MAY_LOOP; /* * Add local net header. If no space in first mbuf, * allocate another. * * Note that we do prepend regardless of RT_HAS_HEADER flag. * This is done because BPF code shifts m_data pointer * to the end of ethernet header prior to calling if_output(). */ M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); if ((pflags & RT_HAS_HEADER) == 0) { eh = mtod(m, struct ether_header *); memcpy(eh, phdr, hlen); } /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { struct mbuf *n; /* * Because if_simloop() modifies the packet, we need a * writable copy through m_dup() instead of a readonly * one as m_copy[m] would give us. The alternative would * be to modify if_simloop() to handle the readonly mbuf, * but performancewise it is mostly equivalent (trading * extra data copying vs. extra locking). * * XXX This is a local workaround. A number of less * often used kernel parts suffer from the same bug. * See PR kern/105943 for a proposed general solution. */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); (void)if_simloop(ifp, n, dst->sa_family, hlen); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } /* * Bridges require special output handling. */ if (ifp->if_bridge) { BRIDGE_OUTPUT(ifp, m, error); return (error); } #if defined(INET) || defined(INET6) if (ifp->if_carp && (error = (*carp_output_p)(ifp, m, dst))) goto bad; #endif /* Handle ng_ether(4) processing, if any */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_output_p != NULL, ("ng_ether_output_p is NULL")); if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { bad: if (m != NULL) m_freem(m); return (error); } if (m == NULL) return (0); } /* Continue with link-layer output */ return ether_output_frame(ifp, m); } /* * Ethernet link layer output routine to send a raw frame to the device. * * This assumes that the 14 byte Ethernet header is present and contiguous * in the first mbuf (if BRIDGE'ing). */ int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { int i; if (PFIL_HOOKED(&V_link_pfil_hook)) { i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL); if (i != 0) return (EACCES); if (m == NULL) return (0); } /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. */ return ((ifp->if_transmit)(ifp, m)); } #if defined(INET) || defined(INET6) #endif /* * Process a received Ethernet packet; the packet is in the * mbuf chain m with the ethernet header at the front. */ static void ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } #ifdef DIAGNOSTIC if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n"); m_freem(m); return; } #endif if (m->m_len < ETHER_HDR_LEN) { /* XXX maybe should pullup? */ if_printf(ifp, "discard frame w/o leading ethernet " "header (len %u pkt len %u)\n", m->m_len, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_ETHER); CURVNET_SET_QUIET(ifp->if_vnet); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* * Give bpf a chance at the packet. */ ETHER_BPF_MTAP(ifp, m); /* * If the CRC is still on the packet, trim it off. We do this once * and once only in case we are re-entered. Nothing else on the * Ethernet receive path expects to see the FCS. */ if (m->m_flags & M_HASFCS) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } if (!(ifp->if_capenable & IFCAP_HWSTATS)) if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); CURVNET_RESTORE(); return; } /* Handle input from a lagg(4) port */ if (ifp->if_type == IFT_IEEE8023ADLAG) { KASSERT(lagg_input_p != NULL, ("%s: if_lagg not loaded!", __func__)); m = (*lagg_input_p)(ifp, m); if (m != NULL) ifp = m->m_pkthdr.rcvif; else { CURVNET_RESTORE(); return; } } /* * If the hardware did not process an 802.1Q tag, do this now, * to allow 802.1P priority frames to be passed to the main input * path correctly. * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels. */ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) { struct ether_vlan_header *evl; if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { #ifdef DIAGNOSTIC if_printf(ifp, "cannot pullup VLAN header\n"); #endif if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); CURVNET_RESTORE(); return; } evl = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); m->m_flags |= M_VLANTAG; bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); eh = mtod(m, struct ether_header *); } M_SETFIB(m, ifp->if_fib); /* Allow ng_ether(4) to claim this frame. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_p != NULL, ("%s: ng_ether_input_p is NULL", __func__)); m->m_flags &= ~M_PROMISC; (*ng_ether_input_p)(ifp, &m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } /* * Allow if_bridge(4) to claim this frame. * The BRIDGE_INPUT() macro will update ifp if the bridge changed it * and the frame should be delivered locally. */ if (ifp->if_bridge != NULL) { m->m_flags &= ~M_PROMISC; BRIDGE_INPUT(ifp, m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } #if defined(INET) || defined(INET6) /* * Clear M_PROMISC on frame so that carp(4) will see it when the * mbuf flows up to Layer 3. * FreeBSD's implementation of carp(4) uses the inprotosw * to dispatch IPPROTO_CARP. carp(4) also allocates its own * Ethernet addresses of the form 00:00:5e:00:01:xx, which * is outside the scope of the M_PROMISC test below. * TODO: Maintain a hash table of ethernet addresses other than * ether_dhost which may be active on this ifp. */ if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) { m->m_flags &= ~M_PROMISC; } else #endif { /* * If the frame received was not for our MAC address, set the * M_PROMISC flag on the mbuf chain. The frame may need to * be seen by the rest of the Ethernet input path in case of * re-entry (e.g. bridge, vlan, netgraph) but should not be * seen by upper protocol layers. */ if (!ETHER_IS_MULTICAST(eh->ether_dhost) && bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) m->m_flags |= M_PROMISC; } ether_demux(ifp, m); CURVNET_RESTORE(); } /* * Ethernet input dispatch; by default, direct dispatch here regardless of * global configuration. However, if RSS is enabled, hook up RSS affinity * so that when deferred or hybrid dispatch is enabled, we can redistribute * load based on RSS. * * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or * not it had already done work distribution via multi-queue. Then we could * direct dispatch in the event load balancing was already complete and * handle the case of interfaces with different capabilities better. * * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions * at multiple layers? * * XXXRW: For now, enable all this only if RSS is compiled in, although it * works fine without RSS. Need to characterise the performance overhead * of the detour through the netisr code in the event the result is always * direct dispatch. */ static void ether_nh_input(struct mbuf *m) { M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: NULL interface pointer", __func__)); ether_input_internal(m->m_pkthdr.rcvif, m); } static struct netisr_handler ether_nh = { .nh_name = "ether", .nh_handler = ether_nh_input, .nh_proto = NETISR_ETHER, #ifdef RSS .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_DIRECT, .nh_m2cpuid = rss_m2cpuid, #else .nh_policy = NETISR_POLICY_SOURCE, .nh_dispatch = NETISR_DISPATCH_DIRECT, #endif }; static void ether_init(__unused void *arg) { netisr_register(ðer_nh); } SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); static void vnet_ether_init(__unused void *arg) { int i; /* Initialize packet filter hooks. */ V_link_pfil_hook.ph_type = PFIL_TYPE_AF; V_link_pfil_hook.ph_af = AF_LINK; if ((i = pfil_head_register(&V_link_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil link hook, " "error %d\n", __func__, i); } VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_init, NULL); static void vnet_ether_destroy(__unused void *arg) { int i; if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0) printf("%s: WARNING: unable to unregister pfil link hook, " "error %d\n", __func__, i); } VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_destroy, NULL); static void ether_input(struct ifnet *ifp, struct mbuf *m) { struct mbuf *mn; /* * The drivers are allowed to pass in a chain of packets linked with * m_nextpkt. We split them up into separate packets here and pass * them up. This allows the drivers to amortize the receive lock. */ while (m) { mn = m->m_nextpkt; m->m_nextpkt = NULL; /* * We will rely on rcvif being set properly in the deferred context, * so assert it is correct here. */ KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__)); netisr_dispatch(NETISR_ETHER, m); m = mn; } } /* * Upper layer processing for a received Ethernet packet. */ void ether_demux(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; int i, isr; u_short ether_type; KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); /* Do not grab PROMISC frames in case we are re-entered. */ if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) { i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL); if (i != 0 || m == NULL) return; } eh = mtod(m, struct ether_header *); ether_type = ntohs(eh->ether_type); /* * If this frame has a VLAN tag other than 0, call vlan_input() * if its module is loaded. Otherwise, drop. */ if ((m->m_flags & M_VLANTAG) && EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) { if (ifp->if_vlantrunk == NULL) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!", __func__)); /* Clear before possibly re-entering ether_input(). */ m->m_flags &= ~M_PROMISC; (*vlan_input_p)(ifp, m); return; } /* * Pass promiscuously received frames to the upper layer if the user * requested this by setting IFF_PPROMISC. Otherwise, drop them. */ if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) { m_freem(m); return; } /* * Reset layer specific mbuf flags to avoid confusing upper layers. * Strip off Ethernet header. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); m_adj(m, ETHER_HDR_LEN); /* * Dispatch frame to upper layer. */ switch (ether_type) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } netisr_dispatch(isr, m); return; discard: /* * Packet is to be discarded. If netgraph is present, * hand the packet to it for last chance processing; * otherwise dispose of it. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_orphan_p != NULL, ("ng_ether_input_orphan_p is NULL")); /* * Put back the ethernet header so netgraph has a * consistent view of inbound packets. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); (*ng_ether_input_orphan_p)(ifp, m); return; } m_freem(m); } /* * Convert Ethernet address to printable (loggable) representation. * This routine is for compatibility; it's better to just use * * printf("%6D", , ":"); * * since there's no static buffer involved. */ char * ether_sprintf(const u_char *ap) { static char etherbuf[18]; snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":"); return (etherbuf); } /* * Perform common duties while attaching to interface list */ void ether_ifattach(struct ifnet *ifp, const u_int8_t *lla) { int i; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifp->if_addrlen = ETHER_ADDR_LEN; ifp->if_hdrlen = ETHER_HDR_LEN; if_attach(ifp); ifp->if_mtu = ETHERMTU; ifp->if_output = ether_output; ifp->if_input = ether_input; ifp->if_resolvemulti = ether_resolvemulti; ifp->if_requestencap = ether_requestencap; #ifdef VIMAGE ifp->if_reassign = ether_reassign; #endif if (ifp->if_baudrate == 0) ifp->if_baudrate = IF_Mbps(10); /* just a default */ ifp->if_broadcastaddr = etherbroadcastaddr; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ETHER; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); if (ng_ether_attach_p != NULL) (*ng_ether_attach_p)(ifp); /* Announce Ethernet MAC address if non-zero. */ for (i = 0; i < ifp->if_addrlen; i++) if (lla[i] != 0) break; if (i != ifp->if_addrlen) if_printf(ifp, "Ethernet address: %6D\n", lla, ":"); uuid_ether_add(LLADDR(sdl)); } /* * Perform common duties while detaching an Ethernet interface */ void ether_ifdetach(struct ifnet *ifp) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr); uuid_ether_del(LLADDR(sdl)); if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } bpfdetach(ifp); if_detach(ifp); } #ifdef VIMAGE void ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused) { if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } if (ng_ether_attach_p != NULL) { CURVNET_SET_QUIET(new_vnet); (*ng_ether_attach_p)(ifp); CURVNET_RESTORE(); } } #endif SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); #if 0 /* * This is for reference. We have a table-driven version * of the little-endian crc32 generator, which is faster * than the double-loop. */ uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { size_t i; uint32_t crc; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = (crc ^ data) & 1; crc >>= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_LE); } } return (crc); } #else uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { static const uint32_t crctab[] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; size_t i; uint32_t crc; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { crc ^= buf[i]; crc = (crc >> 4) ^ crctab[crc & 0xf]; crc = (crc >> 4) ^ crctab[crc & 0xf]; } return (crc); } #endif uint32_t ether_crc32_be(const uint8_t *buf, size_t len) { size_t i; uint32_t crc, carry; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01); crc <<= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_BE) | carry; } } return (crc); } int ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, ETHER_ADDR_LEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ETHERMTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } static int ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!ETHER_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } static moduledata_t ether_mod = { .name = "ether", }; void ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen) { struct ether_vlan_header vlan; struct mbuf mv, mb; KASSERT((m->m_flags & M_VLANTAG) != 0, ("%s: vlan information not present", __func__)); KASSERT(m->m_len >= sizeof(struct ether_header), ("%s: mbuf not large enough for header", __func__)); bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header)); vlan.evl_proto = vlan.evl_encap_proto; vlan.evl_encap_proto = htons(ETHERTYPE_VLAN); vlan.evl_tag = htons(m->m_pkthdr.ether_vtag); m->m_len -= sizeof(struct ether_header); m->m_data += sizeof(struct ether_header); /* * If a data link has been supplied by the caller, then we will need to * re-create a stack allocated mbuf chain with the following structure: * * (1) mbuf #1 will contain the supplied data link * (2) mbuf #2 will contain the vlan header * (3) mbuf #3 will contain the original mbuf's packet data * * Otherwise, submit the packet and vlan header via bpf_mtap2(). */ if (data != NULL) { mv.m_next = m; mv.m_data = (caddr_t)&vlan; mv.m_len = sizeof(vlan); mb.m_next = &mv; mb.m_data = data; mb.m_len = dlen; bpf_mtap(bp, &mb); } else bpf_mtap2(bp, &vlan, sizeof(vlan), m); m->m_len += sizeof(struct ether_header); m->m_data -= sizeof(struct ether_header); } struct mbuf * ether_vlanencap(struct mbuf *m, uint16_t tag) { struct ether_vlan_header *evl; M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); if (m == NULL) return (NULL); /* M_PREPEND takes care of m_len, m_pkthdr.len for us */ if (m->m_len < sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (m == NULL) return (NULL); } /* * Transform the Ethernet header into an Ethernet header * with 802.1Q encapsulation. */ evl = mtod(m, struct ether_vlan_header *); bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); evl->evl_encap_proto = htons(ETHERTYPE_VLAN); evl->evl_tag = htons(tag); return (m); } DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(ether, 1); Index: projects/clang380-import/sys/net/if_fddisubr.c =================================================================== --- projects/clang380-import/sys/net/if_fddisubr.c (revision 293686) +++ projects/clang380-import/sys/net/if_fddisubr.c (revision 293687) @@ -1,671 +1,670 @@ /*- * Copyright (c) 1995, 1996 * Matt Thomas . All rights reserved. * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #ifdef DECNET #include #endif #include static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int fddi_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void fddi_input(struct ifnet *ifp, struct mbuf *m); #define senderr(e) do { error = (e); goto bad; } while (0) /* * FDDI output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ static int fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int16_t type; int loop_copy = 0, error = 0, hdrcmplt = 0; u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN]; struct fddi_header *fh; #if defined(INET) || defined(INET6) int is_gw = 0; #endif #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); getmicrotime(&ifp->if_lastchange); #if defined(INET) || defined(INET6) - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #endif switch (dst->sa_family) { #ifdef INET case AF_INET: { error = arpresolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; } case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); loop_copy = -1; /* if this is for us, don't do it */ switch (ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, FDDI_ADDR_LEN); else bcopy(ar_tha(ah), edst, FDDI_ADDR_LEN); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IPV6); break; #endif /* INET6 */ case pseudo_AF_HDRCMPLT: { const struct ether_header *eh; hdrcmplt = 1; eh = (const struct ether_header *)dst->sa_data; bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN); /* FALLTHROUGH */ } case AF_UNSPEC: { const struct ether_header *eh; loop_copy = -1; eh = (const struct ether_header *)dst->sa_data; bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN); if (*edst & 1) m->m_flags |= (M_BCAST|M_MCAST); type = eh->ether_type; break; } case AF_IMPLINK: { fh = mtod(m, struct fddi_header *); error = EPROTONOSUPPORT; switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) { case FDDIFC_LLC_ASYNC: { /* legal priorities are 0 through 7 */ if ((fh->fddi_fc & FDDIFC_Z) > 7) goto bad; break; } case FDDIFC_LLC_SYNC: { /* FDDIFC_Z bits reserved, must be zero */ if (fh->fddi_fc & FDDIFC_Z) goto bad; break; } case FDDIFC_SMT: { /* FDDIFC_Z bits must be non zero */ if ((fh->fddi_fc & FDDIFC_Z) == 0) goto bad; break; } default: { /* anything else is too dangerous */ goto bad; } } error = 0; if (fh->fddi_dhost[0] & 1) m->m_flags |= (M_BCAST|M_MCAST); goto queue_it; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } /* * Add LLC header. */ if (type != 0) { struct llc *l; M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); l = mtod(m, struct llc *); l->llc_control = LLC_UI; l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP; l->llc_snap.org_code[0] = l->llc_snap.org_code[1] = l->llc_snap.org_code[2] = 0; l->llc_snap.ether_type = htons(type); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); fh = mtod(m, struct fddi_header *); fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4; bcopy((caddr_t)edst, (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN); queue_it: if (hdrcmplt) bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN); else bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN); /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) { if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); (void) if_simloop(ifp, n, dst->sa_family, FDDI_HDR_LEN); } else if (bcmp(fh->fddi_dhost, fh->fddi_shost, FDDI_ADDR_LEN) == 0) { (void) if_simloop(ifp, m, dst->sa_family, FDDI_HDR_LEN); return (0); /* XXX */ } } error = (ifp->if_transmit)(ifp, m); if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); bad: if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (m) m_freem(m); return (error); } /* * Process a received FDDI packet. */ static void fddi_input(ifp, m) struct ifnet *ifp; struct mbuf *m; { int isr; struct llc *l; struct fddi_header *fh; /* * Do consistency checks to verify assumptions * made by code past this point. */ if ((m->m_flags & M_PKTHDR) == 0) { if_printf(ifp, "discard frame w/o packet header\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } if (m->m_pkthdr.rcvif == NULL) { if_printf(ifp, "discard frame w/o interface pointer\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } m = m_pullup(m, FDDI_HDR_LEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } fh = mtod(m, struct fddi_header *); /* * Discard packet if interface is not up. */ if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) goto dropanyway; /* * Give bpf a chance at the packet. */ BPF_MTAP(ifp, m); /* * Interface marked for monitoring; discard packet. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); return; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif /* * Update interface statistics. */ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); getmicrotime(&ifp->if_lastchange); /* * Discard non local unicast packets when interface * is in promiscuous mode. */ if ((ifp->if_flags & IFF_PROMISC) && ((fh->fddi_dhost[0] & 1) == 0) && (bcmp(IF_LLADDR(ifp), (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN) != 0)) goto dropanyway; /* * Set mbuf flags for bcast/mcast. */ if (fh->fddi_dhost[0] & 1) { if (bcmp(ifp->if_broadcastaddr, fh->fddi_dhost, FDDI_ADDR_LEN) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } #ifdef M_LINK0 /* * If this has a LLC priority of 0, then mark it so upper * layers have a hint that it really came via a FDDI/Ethernet * bridge. */ if ((fh->fddi_fc & FDDIFC_LLC_PRIO7) == FDDIFC_LLC_PRIO0) m->m_flags |= M_LINK0; #endif /* Strip off FDDI header. */ m_adj(m, FDDI_HDR_LEN); m = m_pullup(m, LLC_SNAPFRAMELEN); if (m == 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } l = mtod(m, struct llc *); switch (l->llc_dsap) { case LLC_SNAP_LSAP: { u_int16_t type; if ((l->llc_control != LLC_UI) || (l->llc_ssap != LLC_SNAP_LSAP)) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } if (l->llc_snap.org_code[0] != 0 || l->llc_snap.org_code[1] != 0 || l->llc_snap.org_code[2] != 0) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } type = ntohs(l->llc_snap.ether_type); m_adj(m, LLC_SNAPFRAMELEN); switch (type) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) goto dropanyway; isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif #ifdef DECNET case ETHERTYPE_DECNET: isr = NETISR_DECNET; break; #endif default: /* printf("fddi_input: unknown protocol 0x%x\n", type); */ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } break; } default: /* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; dropanyway: if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if (m) m_freem(m); return; } /* * Perform common duties while attaching to interface list */ void fddi_ifattach(ifp, lla, bpf) struct ifnet *ifp; const u_int8_t *lla; int bpf; { struct ifaddr *ifa; struct sockaddr_dl *sdl; ifp->if_type = IFT_FDDI; ifp->if_addrlen = FDDI_ADDR_LEN; ifp->if_hdrlen = 21; if_attach(ifp); /* Must be called before additional assignments */ ifp->if_mtu = FDDIMTU; ifp->if_output = fddi_output; ifp->if_input = fddi_input; ifp->if_resolvemulti = fddi_resolvemulti; ifp->if_broadcastaddr = fddibroadcastaddr; ifp->if_baudrate = 100000000; #ifdef IFF_NOTRAILERS ifp->if_flags |= IFF_NOTRAILERS; #endif ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_FDDI; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); if (bpf) bpfattach(ifp, DLT_FDDI, FDDI_HDR_LEN); return; } void fddi_ifdetach(ifp, bpf) struct ifnet *ifp; int bpf; { if (bpf) bpfdetach(ifp); if_detach(ifp); return; } int fddi_ioctl (ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct ifaddr *ifa; struct ifreq *ifr; int error; ifa = (struct ifaddr *) data; ifr = (struct ifreq *) data; error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: /* before arpwhohas */ ifp->if_init(ifp->if_softc); arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, FDDI_ADDR_LEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > FDDIMTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; break; } return (error); } static int fddi_resolvemulti(ifp, llsa, sa) struct ifnet *ifp; struct sockaddr **llsa; struct sockaddr *sa; { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if ((e_addr[0] & 1) != 1) return (EADDRNOTAVAIL); *llsa = 0; return (0); #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return (EADDRNOTAVAIL); sdl = link_init_sdl(ifp, *llsa, IFT_FDDI); sdl->sdl_nlen = 0; sdl->sdl_alen = FDDI_ADDR_LEN; sdl->sdl_slen = 0; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return (0); } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return (EADDRNOTAVAIL); sdl = link_init_sdl(ifp, *llsa, IFT_FDDI); sdl->sdl_nlen = 0; sdl->sdl_alen = FDDI_ADDR_LEN; sdl->sdl_slen = 0; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return (EAFNOSUPPORT); } return (0); } static moduledata_t fddi_mod = { "fddi", /* module name */ NULL, /* event handler */ 0 /* extra data */ }; DECLARE_MODULE(fddi, fddi_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(fddi, 1); Index: projects/clang380-import/sys/net/if_fwsubr.c =================================================================== --- projects/clang380-import/sys/net/if_fwsubr.c (revision 293686) +++ projects/clang380-import/sys/net/if_fwsubr.c (revision 293687) @@ -1,860 +1,855 @@ /*- * Copyright (c) 2004 Doug Rabson * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #include static MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals"); struct fw_hwaddr firewire_broadcastaddr = { 0xffffffff, 0xffffffff, 0xff, 0xff, 0xffff, 0xffffffff }; static int firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct fw_com *fc = IFP2FWC(ifp); int error, type; struct m_tag *mtag; union fw_encap *enc; struct fw_hwaddr *destfw; uint8_t speed; uint16_t psize, fsize, dsize; struct mbuf *mtail; int unicast, dgl, foff; static int next_dgl; #if defined(INET) || defined(INET6) int is_gw = 0; #endif #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) goto bad; #endif if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) { error = ENETDOWN; goto bad; } #if defined(INET) || defined(INET6) - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #endif /* * For unicast, we make a tag to store the lladdr of the * destination. This might not be the first time we have seen * the packet (for instance, the arp code might be trying to * re-send it after receiving an arp reply) so we only * allocate a tag if there isn't one there already. For * multicast, we will eventually use a different tag to store * the channel number. */ unicast = !(m->m_flags & (M_BCAST | M_MCAST)); if (unicast) { mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, NULL); if (!mtag) { mtag = m_tag_alloc(MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, sizeof (struct fw_hwaddr), M_NOWAIT); if (!mtag) { error = ENOMEM; goto bad; } m_tag_prepend(m, mtag); } destfw = (struct fw_hwaddr *)(mtag + 1); } else { destfw = 0; } switch (dst->sa_family) { #ifdef INET case AF_INET: /* * Only bother with arp for unicast. Allocation of * channels etc. for firewire is quite different and * doesn't fit into the arp model. */ if (unicast) { - is_gw = 0; - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; error = arpresolve(ifp, is_gw, m, dst, (u_char *) destfw, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); } type = ETHERTYPE_IP; break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_IEEE1394); type = ETHERTYPE_ARP; if (unicast) *destfw = *(struct fw_hwaddr *) ar_tha(ah); /* * The standard arp code leaves a hole for the target * hardware address which we need to close up. */ bcopy(ar_tpa(ah), ar_tha(ah), ah->ar_pln); m_adj(m, -ah->ar_hln); break; } #endif #ifdef INET6 case AF_INET6: if (unicast) { error = nd6_resolve(fc->fc_ifp, is_gw, m, dst, (u_char *) destfw, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); } type = ETHERTYPE_IPV6; break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Let BPF tap off a copy before we encapsulate. */ if (bpf_peers_present(ifp->if_bpf)) { struct fw_bpfhdr h; if (unicast) bcopy(destfw, h.firewire_dhost, 8); else bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8); bcopy(&fc->fc_hwaddr, h.firewire_shost, 8); h.firewire_type = htons(type); bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m); } /* * Punt on MCAP for now and send all multicast packets on the * broadcast channel. */ if (m->m_flags & M_MCAST) m->m_flags |= M_BCAST; /* * Figure out what speed to use and what the largest supported * packet size is. For unicast, this is the minimum of what we * can speak and what they can hear. For broadcast, lets be * conservative and use S100. We could possibly improve that * by examining the bus manager's speed map or similar. We * also reduce the packet size for broadcast to account for * the GASP header. */ if (unicast) { speed = min(fc->fc_speed, destfw->sspd); psize = min(512 << speed, 2 << destfw->sender_max_rec); } else { speed = 0; psize = 512 - 2*sizeof(uint32_t); } /* * Next, we encapsulate, possibly fragmenting the original * datagram if it won't fit into a single packet. */ if (m->m_pkthdr.len <= psize - sizeof(uint32_t)) { /* * No fragmentation is necessary. */ M_PREPEND(m, sizeof(uint32_t), M_NOWAIT); if (!m) { error = ENOBUFS; goto bad; } enc = mtod(m, union fw_encap *); enc->unfrag.ether_type = type; enc->unfrag.lf = FW_ENCAP_UNFRAG; enc->unfrag.reserved = 0; /* * Byte swap the encapsulation header manually. */ enc->ul[0] = htonl(enc->ul[0]); error = (ifp->if_transmit)(ifp, m); return (error); } else { /* * Fragment the datagram, making sure to leave enough * space for the encapsulation header in each packet. */ fsize = psize - 2*sizeof(uint32_t); dgl = next_dgl++; dsize = m->m_pkthdr.len; foff = 0; while (m) { if (m->m_pkthdr.len > fsize) { /* * Split off the tail segment from the * datagram, copying our tags over. */ mtail = m_split(m, fsize, M_NOWAIT); m_tag_copy_chain(mtail, m, M_NOWAIT); } else { mtail = 0; } /* * Add our encapsulation header to this * fragment and hand it off to the link. */ M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT); if (!m) { error = ENOBUFS; goto bad; } enc = mtod(m, union fw_encap *); if (foff == 0) { enc->firstfrag.lf = FW_ENCAP_FIRST; enc->firstfrag.reserved1 = 0; enc->firstfrag.reserved2 = 0; enc->firstfrag.datagram_size = dsize - 1; enc->firstfrag.ether_type = type; enc->firstfrag.dgl = dgl; } else { if (mtail) enc->nextfrag.lf = FW_ENCAP_NEXT; else enc->nextfrag.lf = FW_ENCAP_LAST; enc->nextfrag.reserved1 = 0; enc->nextfrag.reserved2 = 0; enc->nextfrag.reserved3 = 0; enc->nextfrag.datagram_size = dsize - 1; enc->nextfrag.fragment_offset = foff; enc->nextfrag.dgl = dgl; } foff += m->m_pkthdr.len - 2*sizeof(uint32_t); /* * Byte swap the encapsulation header manually. */ enc->ul[0] = htonl(enc->ul[0]); enc->ul[1] = htonl(enc->ul[1]); error = (ifp->if_transmit)(ifp, m); if (error) { if (mtail) m_freem(mtail); return (ENOBUFS); } m = mtail; } return (0); } bad: if (m) m_freem(m); return (error); } static struct mbuf * firewire_input_fragment(struct fw_com *fc, struct mbuf *m, int src) { union fw_encap *enc; struct fw_reass *r; struct mbuf *mf, *mprev; int dsize; int fstart, fend, start, end, islast; uint32_t id; /* * Find an existing reassembly buffer or create a new one. */ enc = mtod(m, union fw_encap *); id = enc->firstfrag.dgl | (src << 16); STAILQ_FOREACH(r, &fc->fc_frags, fr_link) if (r->fr_id == id) break; if (!r) { r = malloc(sizeof(struct fw_reass), M_TEMP, M_NOWAIT); if (!r) { m_freem(m); return 0; } r->fr_id = id; r->fr_frags = 0; STAILQ_INSERT_HEAD(&fc->fc_frags, r, fr_link); } /* * If this fragment overlaps any other fragment, we must discard * the partial reassembly and start again. */ if (enc->firstfrag.lf == FW_ENCAP_FIRST) fstart = 0; else fstart = enc->nextfrag.fragment_offset; fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t); dsize = enc->nextfrag.datagram_size; islast = (enc->nextfrag.lf == FW_ENCAP_LAST); for (mf = r->fr_frags; mf; mf = mf->m_nextpkt) { enc = mtod(mf, union fw_encap *); if (enc->nextfrag.datagram_size != dsize) { /* * This fragment must be from a different * packet. */ goto bad; } if (enc->firstfrag.lf == FW_ENCAP_FIRST) start = 0; else start = enc->nextfrag.fragment_offset; end = start + mf->m_pkthdr.len - 2*sizeof(uint32_t); if ((fstart < end && fend > start) || (islast && enc->nextfrag.lf == FW_ENCAP_LAST)) { /* * Overlap - discard reassembly buffer and start * again with this fragment. */ goto bad; } } /* * Find where to put this fragment in the list. */ for (mf = r->fr_frags, mprev = NULL; mf; mprev = mf, mf = mf->m_nextpkt) { enc = mtod(mf, union fw_encap *); if (enc->firstfrag.lf == FW_ENCAP_FIRST) start = 0; else start = enc->nextfrag.fragment_offset; if (start >= fend) break; } /* * If this is a last fragment and we are not adding at the end * of the list, discard the buffer. */ if (islast && mprev && mprev->m_nextpkt) goto bad; if (mprev) { m->m_nextpkt = mprev->m_nextpkt; mprev->m_nextpkt = m; /* * Coalesce forwards and see if we can make a whole * datagram. */ enc = mtod(mprev, union fw_encap *); if (enc->firstfrag.lf == FW_ENCAP_FIRST) start = 0; else start = enc->nextfrag.fragment_offset; end = start + mprev->m_pkthdr.len - 2*sizeof(uint32_t); while (end == fstart) { /* * Strip off the encap header from m and * append it to mprev, freeing m. */ m_adj(m, 2*sizeof(uint32_t)); mprev->m_nextpkt = m->m_nextpkt; mprev->m_pkthdr.len += m->m_pkthdr.len; m_cat(mprev, m); if (mprev->m_pkthdr.len == dsize + 1 + 2*sizeof(uint32_t)) { /* * We have assembled a complete packet * we must be finished. Make sure we have * merged the whole chain. */ STAILQ_REMOVE(&fc->fc_frags, r, fw_reass, fr_link); free(r, M_TEMP); m = mprev->m_nextpkt; while (m) { mf = m->m_nextpkt; m_freem(m); m = mf; } mprev->m_nextpkt = NULL; return (mprev); } /* * See if we can continue merging forwards. */ end = fend; m = mprev->m_nextpkt; if (m) { enc = mtod(m, union fw_encap *); if (enc->firstfrag.lf == FW_ENCAP_FIRST) fstart = 0; else fstart = enc->nextfrag.fragment_offset; fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t); } else { break; } } } else { m->m_nextpkt = 0; r->fr_frags = m; } return (0); bad: while (r->fr_frags) { mf = r->fr_frags; r->fr_frags = mf->m_nextpkt; m_freem(mf); } m->m_nextpkt = 0; r->fr_frags = m; return (0); } void firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src) { struct fw_com *fc = IFP2FWC(ifp); union fw_encap *enc; int type, isr; /* * The caller has already stripped off the packet header * (stream or wreqb) and marked the mbuf's M_BCAST flag * appropriately. We de-encapsulate the IP packet and pass it * up the line after handling link-level fragmentation. */ if (m->m_pkthdr.len < sizeof(uint32_t)) { if_printf(ifp, "discarding frame without " "encapsulation header (len %u pkt len %u)\n", m->m_len, m->m_pkthdr.len); } m = m_pullup(m, sizeof(uint32_t)); if (m == NULL) return; enc = mtod(m, union fw_encap *); /* * Byte swap the encapsulation header manually. */ enc->ul[0] = ntohl(enc->ul[0]); if (enc->unfrag.lf != 0) { m = m_pullup(m, 2*sizeof(uint32_t)); if (!m) return; enc = mtod(m, union fw_encap *); enc->ul[1] = ntohl(enc->ul[1]); m = firewire_input_fragment(fc, m, src); if (!m) return; enc = mtod(m, union fw_encap *); type = enc->firstfrag.ether_type; m_adj(m, 2*sizeof(uint32_t)); } else { type = enc->unfrag.ether_type; m_adj(m, sizeof(uint32_t)); } if (m->m_pkthdr.rcvif == NULL) { if_printf(ifp, "discard frame w/o interface pointer\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } #ifdef DIAGNOSTIC if (m->m_pkthdr.rcvif != ifp) { if_printf(ifp, "Warning, frame marked as received on %s\n", m->m_pkthdr.rcvif->if_xname); } #endif #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* * Give bpf a chance at the packet. The link-level driver * should have left us a tag with the EUID of the sender. */ if (bpf_peers_present(ifp->if_bpf)) { struct fw_bpfhdr h; struct m_tag *mtag; mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_SENDER_EUID, 0); if (mtag) bcopy(mtag + 1, h.firewire_shost, 8); else bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8); bcopy(&fc->fc_hwaddr, h.firewire_dhost, 8); h.firewire_type = htons(type); bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m); } if (ifp->if_flags & IFF_MONITOR) { /* * Interface marked for monitoring; discard packet. */ m_freem(m); return; } if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* Discard packet if interface is not up */ if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); switch (type) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); /* * Adjust the arp packet to insert an empty tha slot. */ m->m_len += ah->ar_hln; m->m_pkthdr.len += ah->ar_hln; bcopy(ar_tha(ah), ar_tpa(ah), ah->ar_pln); isr = NETISR_ARP; break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: m_freem(m); return; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } int firewire_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(&IFP2FWC(ifp)->fc_hwaddr, (caddr_t) sa->sa_data, sizeof(struct fw_hwaddr)); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > 1500) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } static int firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. */ *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; *llsa = 0; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; *llsa = 0; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } void firewire_ifattach(struct ifnet *ifp, struct fw_hwaddr *llc) { struct fw_com *fc = IFP2FWC(ifp); struct ifaddr *ifa; struct sockaddr_dl *sdl; static const char* speeds[] = { "S100", "S200", "S400", "S800", "S1600", "S3200" }; fc->fc_speed = llc->sspd; STAILQ_INIT(&fc->fc_frags); ifp->if_addrlen = sizeof(struct fw_hwaddr); ifp->if_hdrlen = 0; if_attach(ifp); ifp->if_mtu = 1500; /* XXX */ ifp->if_output = firewire_output; ifp->if_resolvemulti = firewire_resolvemulti; ifp->if_broadcastaddr = (u_char *) &firewire_broadcastaddr; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_IEEE1394; sdl->sdl_alen = ifp->if_addrlen; bcopy(llc, LLADDR(sdl), ifp->if_addrlen); bpfattach(ifp, DLT_APPLE_IP_OVER_IEEE1394, sizeof(struct fw_hwaddr)); if_printf(ifp, "Firewire address: %8D @ 0x%04x%08x, %s, maxrec %d\n", (uint8_t *) &llc->sender_unique_ID_hi, ":", ntohs(llc->sender_unicast_FIFO_hi), ntohl(llc->sender_unicast_FIFO_lo), speeds[llc->sspd], (2 << llc->sender_max_rec)); } void firewire_ifdetach(struct ifnet *ifp) { bpfdetach(ifp); if_detach(ifp); } void firewire_busreset(struct ifnet *ifp) { struct fw_com *fc = IFP2FWC(ifp); struct fw_reass *r; struct mbuf *m; /* * Discard any partial datagrams since the host ids may have changed. */ while ((r = STAILQ_FIRST(&fc->fc_frags))) { STAILQ_REMOVE_HEAD(&fc->fc_frags, fr_link); while (r->fr_frags) { m = r->fr_frags; r->fr_frags = m->m_nextpkt; m_freem(m); } free(r, M_TEMP); } } static void * firewire_alloc(u_char type, struct ifnet *ifp) { struct fw_com *fc; fc = malloc(sizeof(struct fw_com), M_FWCOM, M_WAITOK | M_ZERO); fc->fc_ifp = ifp; return (fc); } static void firewire_free(void *com, u_char type) { free(com, M_FWCOM); } static int firewire_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: if_register_com_alloc(IFT_IEEE1394, firewire_alloc, firewire_free); break; case MOD_UNLOAD: if_deregister_com_alloc(IFT_IEEE1394); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t firewire_mod = { "if_firewire", firewire_modevent, 0 }; DECLARE_MODULE(if_firewire, firewire_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(if_firewire, 1); Index: projects/clang380-import/sys/net/if_iso88025subr.c =================================================================== --- projects/clang380-import/sys/net/if_iso88025subr.c (revision 293686) +++ projects/clang380-import/sys/net/if_iso88025subr.c (revision 293687) @@ -1,701 +1,697 @@ /*- * Copyright (c) 1998, Larry Lile * All rights reserved. * * For latest sources and information on this driver, please * go to http://anarchy.stdio.com. * * Questions, comments or suggestions should be directed to * Larry Lile . * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ /* * * General ISO 802.5 (Token Ring) support routines * */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #include static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int iso88025_resolvemulti (struct ifnet *, struct sockaddr **, struct sockaddr *); #define senderr(e) do { error = (e); goto bad; } while (0) /* * Perform common duties while attaching to interface list */ void iso88025_ifattach(struct ifnet *ifp, const u_int8_t *lla, int bpf) { struct ifaddr *ifa; struct sockaddr_dl *sdl; ifa = NULL; ifp->if_type = IFT_ISO88025; ifp->if_addrlen = ISO88025_ADDR_LEN; ifp->if_hdrlen = ISO88025_HDR_LEN; if_attach(ifp); /* Must be called before additional assignments */ ifp->if_output = iso88025_output; ifp->if_input = iso88025_input; ifp->if_resolvemulti = iso88025_resolvemulti; ifp->if_broadcastaddr = iso88025_broadcastaddr; if (ifp->if_baudrate == 0) ifp->if_baudrate = TR_16MBPS; /* 16Mbit should be a safe default */ if (ifp->if_mtu == 0) ifp->if_mtu = ISO88025_DEFAULT_MTU; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ISO88025; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); if (bpf) bpfattach(ifp, DLT_IEEE802, ISO88025_HDR_LEN); return; } /* * Perform common duties while detaching a Token Ring interface */ void iso88025_ifdetach(ifp, bpf) struct ifnet *ifp; int bpf; { if (bpf) bpfdetach(ifp); if_detach(ifp); return; } int iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa; struct ifreq *ifr; int error; ifa = (struct ifaddr *) data; ifr = (struct ifreq *) data; error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif /* INET */ default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, ISO88025_ADDR_LEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ISO88025_MAX_MTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } /* * ISO88025 encapsulation */ int iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int16_t snap_type = 0; int loop_copy = 0, error = 0, rif_len = 0; u_char edst[ISO88025_ADDR_LEN]; struct iso88025_header *th; struct iso88025_header gen_th; struct sockaddr_dl *sdl = NULL; struct rtentry *rt0 = NULL; int is_gw = 0; - if (ro != NULL) { - rt0 = ro->ro_rt; - if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; - } - + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); getmicrotime(&ifp->if_lastchange); /* Calculate routing info length based on arp table entry */ /* XXX any better way to do this ? */ if (rt0 && (sdl = (struct sockaddr_dl *)rt0->rt_gateway)) if (SDL_ISO88025(sdl)->trld_rcf != 0) rif_len = TR_RCF_RIFLEN(SDL_ISO88025(sdl)->trld_rcf); /* Generate a generic 802.5 header for the packet */ gen_th.ac = TR_AC; gen_th.fc = TR_LLC_FRAME; (void)memcpy((caddr_t)gen_th.iso88025_shost, IF_LLADDR(ifp), ISO88025_ADDR_LEN); if (rif_len) { gen_th.iso88025_shost[0] |= TR_RII; if (rif_len > 2) { gen_th.rcf = SDL_ISO88025(sdl)->trld_rcf; (void)memcpy((caddr_t)gen_th.rd, (caddr_t)SDL_ISO88025(sdl)->trld_route, rif_len - 2); } } switch (dst->sa_family) { #ifdef INET case AF_INET: error = arpresolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); snap_type = ETHERTYPE_IP; break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_IEEE802); loop_copy = -1; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: snap_type = ETHERTYPE_REVARP; break; case ARPOP_REQUEST: case ARPOP_REPLY: default: snap_type = ETHERTYPE_ARP; break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, ISO88025_ADDR_LEN); else bcopy(ar_tha(ah), edst, ISO88025_ADDR_LEN); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); snap_type = ETHERTYPE_IPV6; break; #endif /* INET6 */ case AF_UNSPEC: { const struct iso88025_sockaddr_data *sd; /* * For AF_UNSPEC sockaddr.sa_data must contain all of the * mac information needed to send the packet. This allows * full mac, llc, and source routing function to be controlled. * llc and source routing information must already be in the * mbuf provided, ac/fc are set in sa_data. sockaddr.sa_data * should be an iso88025_sockaddr_data structure see iso88025.h */ loop_copy = -1; sd = (const struct iso88025_sockaddr_data *)dst->sa_data; gen_th.ac = sd->ac; gen_th.fc = sd->fc; (void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN); (void)memcpy(gen_th.iso88025_shost, sd->ether_shost, ISO88025_ADDR_LEN); rif_len = 0; break; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); break; } /* * Add LLC header. */ if (snap_type != 0) { struct llc *l; M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); l = mtod(m, struct llc *); l->llc_control = LLC_UI; l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP; l->llc_snap.org_code[0] = l->llc_snap.org_code[1] = l->llc_snap.org_code[2] = 0; l->llc_snap.ether_type = htons(snap_type); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT); if (m == 0) senderr(ENOBUFS); th = mtod(m, struct iso88025_header *); bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN); /* Copy as much of the generic header as is needed into the mbuf */ memcpy(th, &gen_th, ISO88025_HDR_LEN + rif_len); /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) { if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); (void) if_simloop(ifp, n, dst->sa_family, ISO88025_HDR_LEN); } else if (bcmp(th->iso88025_dhost, th->iso88025_shost, ETHER_ADDR_LEN) == 0) { (void) if_simloop(ifp, m, dst->sa_family, ISO88025_HDR_LEN); return(0); /* XXX */ } } IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error); if (error) { printf("iso88025_output: packet dropped QFULL.\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } return (error); bad: if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (m) m_freem(m); return (error); } /* * ISO 88025 de-encapsulation */ void iso88025_input(ifp, m) struct ifnet *ifp; struct mbuf *m; { struct iso88025_header *th; struct llc *l; int isr; int mac_hdr_len; /* * Do consistency checks to verify assumptions * made by code past this point. */ if ((m->m_flags & M_PKTHDR) == 0) { if_printf(ifp, "discard frame w/o packet header\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } if (m->m_pkthdr.rcvif == NULL) { if_printf(ifp, "discard frame w/o interface pointer\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } m = m_pullup(m, ISO88025_HDR_LEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } th = mtod(m, struct iso88025_header *); /* * Discard packet if interface is not up. */ if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) goto dropanyway; /* * Give bpf a chance at the packet. */ BPF_MTAP(ifp, m); /* * Interface marked for monitoring; discard packet. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); return; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif /* * Update interface statistics. */ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); getmicrotime(&ifp->if_lastchange); /* * Discard non local unicast packets when interface * is in promiscuous mode. */ if ((ifp->if_flags & IFF_PROMISC) && ((th->iso88025_dhost[0] & 1) == 0) && (bcmp(IF_LLADDR(ifp), (caddr_t) th->iso88025_dhost, ISO88025_ADDR_LEN) != 0)) goto dropanyway; /* * Set mbuf flags for bcast/mcast. */ if (th->iso88025_dhost[0] & 1) { if (bcmp(iso88025_broadcastaddr, th->iso88025_dhost, ISO88025_ADDR_LEN) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } mac_hdr_len = ISO88025_HDR_LEN; /* Check for source routing info */ if (th->iso88025_shost[0] & TR_RII) mac_hdr_len += TR_RCF_RIFLEN(th->rcf); /* Strip off ISO88025 header. */ m_adj(m, mac_hdr_len); m = m_pullup(m, LLC_SNAPFRAMELEN); if (m == 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } l = mtod(m, struct llc *); switch (l->llc_dsap) { case LLC_SNAP_LSAP: { u_int16_t type; if ((l->llc_control != LLC_UI) || (l->llc_ssap != LLC_SNAP_LSAP)) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } if (l->llc_snap.org_code[0] != 0 || l->llc_snap.org_code[1] != 0 || l->llc_snap.org_code[2] != 0) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } type = ntohs(l->llc_snap.ether_type); m_adj(m, LLC_SNAPFRAMELEN); switch (type) { #ifdef INET case ETHERTYPE_IP: th->iso88025_shost[0] &= ~(TR_RII); isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) goto dropanyway; isr = NETISR_ARP; break; #endif /* INET */ #ifdef INET6 case ETHERTYPE_IPV6: th->iso88025_shost[0] &= ~(TR_RII); isr = NETISR_IPV6; break; #endif /* INET6 */ default: printf("iso88025_input: unexpected llc_snap ether_type 0x%02x\n", type); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } break; } #ifdef ISO case LLC_ISO_LSAP: switch (l->llc_control) { case LLC_UI: if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; break; case LLC_XID: case LLC_XID_P: if(m->m_len < ISO88025_ADDR_LEN) goto dropanyway; l->llc_window = 0; l->llc_fid = 9; l->llc_class = 1; l->llc_dsap = l->llc_ssap = 0; /* Fall through to */ case LLC_TEST: case LLC_TEST_P: { struct sockaddr sa; struct iso88025_sockaddr_data *th2; int i; u_char c; c = l->llc_dsap; if (th->iso88025_shost[0] & TR_RII) { /* XXX */ printf("iso88025_input: dropping source routed LLC_TEST\n"); goto dropanyway; } l->llc_dsap = l->llc_ssap; l->llc_ssap = c; if (m->m_flags & (M_BCAST | M_MCAST)) bcopy((caddr_t)IF_LLADDR(ifp), (caddr_t)th->iso88025_dhost, ISO88025_ADDR_LEN); sa.sa_family = AF_UNSPEC; sa.sa_len = sizeof(sa); th2 = (struct iso88025_sockaddr_data *)sa.sa_data; for (i = 0; i < ISO88025_ADDR_LEN; i++) { th2->ether_shost[i] = c = th->iso88025_dhost[i]; th2->ether_dhost[i] = th->iso88025_dhost[i] = th->iso88025_shost[i]; th->iso88025_shost[i] = c; } th2->ac = TR_AC; th2->fc = TR_LLC_FRAME; ifp->if_output(ifp, m, &sa, NULL); return; } default: printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; break; } break; #endif /* ISO */ default: printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; break; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; dropanyway: if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if (m) m_freem(m); return; } static int iso88025_resolvemulti (ifp, llsa, sa) struct ifnet *ifp; struct sockaddr **llsa; struct sockaddr *sa; { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if ((e_addr[0] & 1) != 1) { return (EADDRNOTAVAIL); } *llsa = 0; return (0); #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { return (EADDRNOTAVAIL); } sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025); sdl->sdl_alen = ISO88025_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return (0); } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { return (EADDRNOTAVAIL); } sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025); sdl->sdl_alen = ISO88025_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return (EAFNOSUPPORT); } return (0); } static moduledata_t iso88025_mod = { .name = "iso88025", }; DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(iso88025, 1); Index: projects/clang380-import/sys/net/if_loop.c =================================================================== --- projects/clang380-import/sys/net/if_loop.c (revision 293686) +++ projects/clang380-import/sys/net/if_loop.c (revision 293687) @@ -1,450 +1,446 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_loop.c 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ /* * Loopback interface driver for protocol testing and timing. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #ifdef INET6 #ifndef INET #include #endif #include #include #endif #include #ifdef TINY_LOMTU #define LOMTU (1024+512) #elif defined(LARGE_LOMTU) #define LOMTU 131072 #else #define LOMTU 16384 #endif #define LO_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) #define LO_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) #define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ CSUM_PSEUDO_HDR | \ CSUM_IP_CHECKED | CSUM_IP_VALID | \ CSUM_SCTP_VALID) int loioctl(struct ifnet *, u_long, caddr_t); int looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static int lo_clone_create(struct if_clone *, int, caddr_t); static void lo_clone_destroy(struct ifnet *); VNET_DEFINE(struct ifnet *, loif); /* Used externally */ #ifdef VIMAGE static VNET_DEFINE(struct if_clone *, lo_cloner); #define V_lo_cloner VNET(lo_cloner) #endif static struct if_clone *lo_cloner; static const char loname[] = "lo"; static void lo_clone_destroy(struct ifnet *ifp) { #ifndef VIMAGE /* XXX: destroying lo0 will lead to panics. */ KASSERT(V_loif != ifp, ("%s: destroying lo0", __func__)); #endif bpfdetach(ifp); if_detach(ifp); if_free(ifp); } static int lo_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct ifnet *ifp; ifp = if_alloc(IFT_LOOP); if (ifp == NULL) return (ENOSPC); if_initname(ifp, loname, unit); ifp->if_mtu = LOMTU; ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST; ifp->if_ioctl = loioctl; ifp->if_output = looutput; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_capabilities = ifp->if_capenable = IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; ifp->if_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); if (V_loif == NULL) V_loif = ifp; return (0); } static void vnet_loif_init(const void *unused __unused) { #ifdef VIMAGE lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy, 1); V_lo_cloner = lo_cloner; #else lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy, 1); #endif } VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_loif_init, NULL); #ifdef VIMAGE static void vnet_loif_uninit(const void *unused __unused) { if_clone_detach(V_lo_cloner); V_loif = NULL; } VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_loif_uninit, NULL); #endif static int loop_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: printf("loop module unload - not possible for this module type\n"); return (EINVAL); default: return (EOPNOTSUPP); } return (0); } static moduledata_t loop_mod = { "if_lo", loop_modevent, 0 }; DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); int looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int32_t af; - struct rtentry *rt = NULL; #ifdef MAC int error; #endif M_ASSERTPKTHDR(m); /* check if we have the packet header */ - if (ro != NULL) - rt = ro->ro_rt; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); return (error); } #endif - if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + if (ro != NULL && ro->ro_flags & (RT_REJECT|RT_BLACKHOLE)) { m_freem(m); - return (rt->rt_flags & RTF_BLACKHOLE ? 0 : - rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + return (ro->ro_flags & RT_BLACKHOLE ? 0 : EHOSTUNREACH); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; #if 1 /* XXX */ switch (af) { case AF_INET: if (ifp->if_capenable & IFCAP_RXCSUM) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = LO_CSUM_SET; } m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES; break; case AF_INET6: #if 0 /* * XXX-BZ for now always claim the checksum is good despite * any interface flags. This is a workaround for 9.1-R and * a proper solution ought to be sought later. */ if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = LO_CSUM_SET; } #else m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = LO_CSUM_SET; #endif m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6; break; default: printf("looutput: af=%d unexpected\n", af); m_freem(m); return (EAFNOSUPPORT); } #endif return (if_simloop(ifp, m, af, 0)); } /* * if_simloop() * * This function is to support software emulation of hardware loopback, * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't * hear their own broadcasts, we create a copy of the packet that we * would normally receive via a hardware loopback. * * This function expects the packet to include the media header of length hlen. */ int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen) { int isr; M_ASSERTPKTHDR(m); m_tag_delete_nonpersistent(m); m->m_pkthdr.rcvif = ifp; #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif /* * Let BPF see incoming packet in the following manner: * - Emulated packet loopback for a simplex interface * (net/if_ethersubr.c) * -> passes it to ifp's BPF * - IPv4/v6 multicast packet loopback (netinet(6)/ip(6)_output.c) * -> not passes it to any BPF * - Normal packet loopback from myself to myself (net/if_loop.c) * -> passes to lo0's BPF (even in case of IPv6, where ifp!=lo0) */ if (hlen > 0) { if (bpf_peers_present(ifp->if_bpf)) { bpf_mtap(ifp->if_bpf, m); } } else { if (bpf_peers_present(V_loif->if_bpf)) { if ((m->m_flags & M_MCAST) == 0 || V_loif == ifp) { /* XXX beware sizeof(af) != 4 */ u_int32_t af1 = af; /* * We need to prepend the address family. */ bpf_mtap2(V_loif->if_bpf, &af1, sizeof(af1), m); } } } /* Strip away media header */ if (hlen > 0) { m_adj(m, hlen); #ifndef __NO_STRICT_ALIGNMENT /* * Some archs do not like unaligned data, so * we move data down in the first mbuf. */ if (mtod(m, vm_offset_t) & 3) { KASSERT(hlen >= 3, ("if_simloop: hlen too small")); bcopy(m->m_data, (char *)(mtod(m, vm_offset_t) - (mtod(m, vm_offset_t) & 3)), m->m_len); m->m_data -= (mtod(m,vm_offset_t) & 3); } #endif } /* Deliver to upper layer protocol */ switch (af) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: m->m_flags |= M_LOOP; isr = NETISR_IPV6; break; #endif default: printf("if_simloop: can't handle af=%d\n", af); m_freem(m); return (EAFNOSUPPORT); } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); netisr_queue(isr, m); /* mbuf is free'd on failure. */ return (0); } /* * Process an ioctl request. */ /* ARGSUSED */ int loioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; int error = 0, mask; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; ifp->if_drv_flags |= IFF_DRV_RUNNING; /* * Everything else is done at a higher level. */ break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == 0) { error = EAFNOSUPPORT; /* XXX */ break; } switch (ifr->ifr_addr.sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: error = EAFNOSUPPORT; break; } break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFFLAGS: break; case SIOCSIFCAP: mask = ifp->if_capenable ^ ifr->ifr_reqcap; if ((mask & IFCAP_RXCSUM) != 0) ifp->if_capenable ^= IFCAP_RXCSUM; if ((mask & IFCAP_TXCSUM) != 0) ifp->if_capenable ^= IFCAP_TXCSUM; if ((mask & IFCAP_RXCSUM_IPV6) != 0) { #if 0 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; #else error = EOPNOTSUPP; break; #endif } if ((mask & IFCAP_TXCSUM_IPV6) != 0) { #if 0 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; #else error = EOPNOTSUPP; break; #endif } ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = LO_CSUM_FEATURES; #if 0 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= LO_CSUM_FEATURES6; #endif break; default: error = EINVAL; } return (error); } Index: projects/clang380-import/sys/net/radix_mpath.c =================================================================== --- projects/clang380-import/sys/net/radix_mpath.c (revision 293686) +++ projects/clang380-import/sys/net/radix_mpath.c (revision 293687) @@ -1,294 +1,313 @@ /* $KAME: radix_mpath.c,v 1.17 2004/11/08 10:29:39 itojun Exp $ */ /* * Copyright (C) 2001 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL * PROPERTIES. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include /* * give some jitter to hash, to avoid synchronization between routers */ static uint32_t hashjitter; int rn_mpath_capable(struct radix_node_head *rnh) { return rnh->rnh_multipath; } struct radix_node * rn_mpath_next(struct radix_node *rn) { struct radix_node *next; if (!rn->rn_dupedkey) return NULL; next = rn->rn_dupedkey; if (rn->rn_mask == next->rn_mask) return next; else return NULL; } uint32_t rn_mpath_count(struct radix_node *rn) { uint32_t i = 0; struct rtentry *rt; while (rn != NULL) { rt = (struct rtentry *)rn; i += rt->rt_weight; rn = rn_mpath_next(rn); } return (i); } struct rtentry * rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate) { struct radix_node *rn; if (!gate || !rt->rt_gateway) return NULL; /* beyond here, we use rn as the master copy */ rn = (struct radix_node *)rt; do { rt = (struct rtentry *)rn; /* * we are removing an address alias that has * the same prefix as another address * we need to compare the interface address because * rt_gateway is a special sockadd_dl structure */ if (rt->rt_gateway->sa_family == AF_LINK) { if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len)) break; } /* * Check for other options: * 1) Routes with 'real' IPv4/IPv6 gateway * 2) Loopback host routes (another AF_LINK/sockadd_dl check) * */ if (rt->rt_gateway->sa_len == gate->sa_len && !memcmp(rt->rt_gateway, gate, gate->sa_len)) break; } while ((rn = rn_mpath_next(rn)) != NULL); return (struct rtentry *)rn; } /* * go through the chain and unlink "rt" from the list * the caller will free "rt" */ int rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt) { struct radix_node *t, *tt; if (!headrt || !rt) return (0); t = (struct radix_node *)headrt; tt = rn_mpath_next(t); while (tt) { if (tt == (struct radix_node *)rt) { t->rn_dupedkey = tt->rn_dupedkey; tt->rn_dupedkey = NULL; tt->rn_flags &= ~RNF_ACTIVE; tt[1].rn_flags &= ~RNF_ACTIVE; return (1); } t = tt; tt = rn_mpath_next((struct radix_node *)t); } return (0); } /* * check if we have the same key/mask/gateway on the table already. * Assume @rt rt_key host bits are cleared according to @netmask */ int rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt, struct sockaddr *netmask) { struct radix_node *rn, *rn1; struct rtentry *rt1; rn = (struct radix_node *)rt; rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh); if (!rn1 || rn1->rn_flags & RNF_ROOT) return (0); /* key/mask are the same. compare gateway for all multipaths */ do { rt1 = (struct rtentry *)rn1; /* sanity: no use in comparing the same thing */ if (rn1 == rn) continue; if (rt1->rt_gateway->sa_family == AF_LINK) { if (rt1->rt_ifa->ifa_addr->sa_len != rt->rt_ifa->ifa_addr->sa_len || bcmp(rt1->rt_ifa->ifa_addr, rt->rt_ifa->ifa_addr, rt1->rt_ifa->ifa_addr->sa_len)) continue; } else { if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len || bcmp(rt1->rt_gateway, rt->rt_gateway, rt1->rt_gateway->sa_len)) continue; } /* all key/mask/gateway are the same. conflicting entry. */ return (EEXIST); } while ((rn1 = rn_mpath_next(rn1)) != NULL); return (0); } -void -rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) +static struct rtentry * +rt_mpath_selectrte(struct rtentry *rte, uint32_t hash) { struct radix_node *rn0, *rn; u_int32_t n; struct rtentry *rt; int64_t weight; + /* beyond here, we use rn as the master copy */ + rn0 = rn = (struct radix_node *)rte; + n = rn_mpath_count(rn0); + + /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ + hash += hashjitter; + hash %= n; + for (weight = abs((int32_t)hash), rt = rte; + weight >= rt->rt_weight && rn; + weight -= rt->rt_weight) { + + /* stay within the multipath routes */ + if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) + break; + rn = rn->rn_dupedkey; + rt = (struct rtentry *)rn; + } + + return (rt); +} + +struct rtentry * +rt_mpath_select(struct rtentry *rte, uint32_t hash) +{ + if (rn_mpath_next((struct radix_node *)rte) == NULL) + return (rte); + + return (rt_mpath_selectrte(rte, hash)); +} + +void +rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) +{ + struct rtentry *rt; + /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP) && RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) return; ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) return; if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) { RT_UNLOCK(ro->ro_rt); return; } - /* beyond here, we use rn as the master copy */ - rn0 = rn = (struct radix_node *)ro->ro_rt; - n = rn_mpath_count(rn0); - - /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ - hash += hashjitter; - hash %= n; - for (weight = abs((int32_t)hash), rt = ro->ro_rt; - weight >= rt->rt_weight && rn; - weight -= rt->rt_weight) { - - /* stay within the multipath routes */ - if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) - break; - rn = rn->rn_dupedkey; - rt = (struct rtentry *)rn; - } + rt = rt_mpath_selectrte(ro->ro_rt, hash); /* XXX try filling rt_gwroute and avoid unreachable gw */ /* gw selection has failed - there must be only zero weight routes */ - if (!rn) { + if (!rt) { RT_UNLOCK(ro->ro_rt); ro->ro_rt = NULL; return; } if (ro->ro_rt != rt) { RTFREE_LOCKED(ro->ro_rt); - ro->ro_rt = (struct rtentry *)rn; + ro->ro_rt = rt; RT_LOCK(ro->ro_rt); RT_ADDREF(ro->ro_rt); } RT_UNLOCK(ro->ro_rt); } extern int in6_inithead(void **head, int off); extern int in_inithead(void **head, int off); #ifdef INET int rn4_mpath_inithead(void **head, int off) { struct radix_node_head *rnh; hashjitter = arc4random(); if (in_inithead(head, off) == 1) { rnh = (struct radix_node_head *)*head; rnh->rnh_multipath = 1; return 1; } else return 0; } #endif #ifdef INET6 int rn6_mpath_inithead(void **head, int off) { struct radix_node_head *rnh; hashjitter = arc4random(); if (in6_inithead(head, off) == 1) { rnh = (struct radix_node_head *)*head; rnh->rnh_multipath = 1; return 1; } else return 0; } #endif Index: projects/clang380-import/sys/net/radix_mpath.h =================================================================== --- projects/clang380-import/sys/net/radix_mpath.h (revision 293686) +++ projects/clang380-import/sys/net/radix_mpath.h (revision 293687) @@ -1,63 +1,64 @@ /* $KAME: radix_mpath.h,v 1.10 2004/11/06 15:44:28 itojun Exp $ */ /* * Copyright (C) 2001 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL * PROPERTIES. */ /* $FreeBSD$ */ #ifndef _NET_RADIX_MPATH_H_ #define _NET_RADIX_MPATH_H_ #ifdef _KERNEL /* * Radix tree API with multipath support */ struct route; struct rtentry; struct sockaddr; int rn_mpath_capable(struct radix_node_head *); struct radix_node *rn_mpath_next(struct radix_node *); u_int32_t rn_mpath_count(struct radix_node *); struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *); int rt_mpath_conflict(struct radix_node_head *, struct rtentry *, struct sockaddr *); void rtalloc_mpath_fib(struct route *, u_int32_t, u_int); #define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0) +struct rtentry *rt_mpath_select(struct rtentry *, uint32_t); struct radix_node *rn_mpath_lookup(void *, void *, struct radix_node_head *); int rt_mpath_deldup(struct rtentry *, struct rtentry *); int rn4_mpath_inithead(void **, int); int rn6_mpath_inithead(void **, int); #endif #endif /* _NET_RADIX_MPATH_H_ */ Index: projects/clang380-import/sys/net/route.c =================================================================== --- projects/clang380-import/sys/net/route.c (revision 293686) +++ projects/clang380-import/sys/net/route.c (revision 293687) @@ -1,2323 +1,2312 @@ /*- * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ /************************************************************************ * Note: In this file a 'fib' is a "forwarding information base" * * Which is the new name for an in kernel routing (next hop) table. * ***********************************************************************/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include "opt_sctp.h" #include "opt_mrouting.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #define RT_MAXFIBS UINT16_MAX /* Kernel config default option. */ #ifdef ROUTETABLES #if ROUTETABLES <= 0 #error "ROUTETABLES defined too low" #endif #if ROUTETABLES > RT_MAXFIBS #error "ROUTETABLES defined too big" #endif #define RT_NUMFIBS ROUTETABLES #endif /* ROUTETABLES */ /* Initialize to default if not otherwise set. */ #ifndef RT_NUMFIBS #define RT_NUMFIBS 1 #endif #if defined(INET) || defined(INET6) #ifdef SCTP extern void sctp_addr_change(struct ifaddr *ifa, int cmd); #endif /* SCTP */ #endif /* This is read-only.. */ u_int rt_numfibs = RT_NUMFIBS; SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, ""); /* * By default add routes to all fibs for new interfaces. * Once this is set to 0 then only allocate routes on interface * changes for the FIB of the caller when adding a new set of addresses * to an interface. XXX this is a shotgun aproach to a problem that needs * a more fine grained solution.. that will come. * XXX also has the problems getting the FIB from curthread which will not * always work given the fib can be overridden and prefixes can be added * from the network stack context. */ VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1; SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(rt_add_addr_allfibs), 0, ""); VNET_DEFINE(struct rtstat, rtstat); #define V_rtstat VNET(rtstat) VNET_DEFINE(struct radix_node_head *, rt_tables); #define V_rt_tables VNET(rt_tables) VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ #define V_rttrash VNET(rttrash) /* * Convert a 'struct radix_node *' to a 'struct rtentry *'. * The operation can be done safely (in this code) because a * 'struct rtentry' starts with two 'struct radix_node''s, the first * one representing leaf nodes in the routing tree, which is * what the code in radix.c passes us as a 'struct radix_node'. * * But because there are a lot of assumptions in this conversion, * do not cast explicitly, but always use the macro below. */ #define RNTORT(p) ((struct rtentry *)(p)) static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) static int rtrequest1_fib_change(struct radix_node_head *, struct rt_addrinfo *, struct rtentry **, u_int); static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *); static int rt_ifdelroute(const struct rtentry *rt, void *arg); static struct rtentry *rt_unlinkrte(struct radix_node_head *rnh, struct rt_addrinfo *info, int *perror); static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); #ifdef RADIX_MPATH static struct radix_node *rt_mpath_unlink(struct radix_node_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror); #endif static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags); struct if_mtuinfo { struct ifnet *ifp; int mtu; }; static int if_updatemtu_cb(struct radix_node *, void *); /* * handler for net.my_fibnum */ static int sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) { int fibnum; int error; fibnum = curthread->td_proc->p_fibnum; error = sysctl_handle_int(oidp, &fibnum, 0, req); return (error); } SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); static __inline struct radix_node_head ** rt_tables_get_rnh_ptr(int table, int fam) { struct radix_node_head **rnh; KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", __func__)); KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", __func__)); /* rnh is [fib=0][af=0]. */ rnh = (struct radix_node_head **)V_rt_tables; /* Get the offset to the requested table and fam. */ rnh += table * (AF_MAX+1) + fam; return (rnh); } struct radix_node_head * rt_tables_get_rnh(int table, int fam) { return (*rt_tables_get_rnh_ptr(table, fam)); } /* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. */ static void route_init(void) { /* whack the tunable ints into line. */ if (rt_numfibs > RT_MAXFIBS) rt_numfibs = RT_MAXFIBS; if (rt_numfibs == 0) rt_numfibs = 1; } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); static int rtentry_zinit(void *mem, int size, int how) { struct rtentry *rt = mem; rt->rt_pksent = counter_u64_alloc(how); if (rt->rt_pksent == NULL) return (ENOMEM); RT_LOCK_INIT(rt); return (0); } static void rtentry_zfini(void *mem, int size) { struct rtentry *rt = mem; RT_LOCK_DESTROY(rt); counter_u64_free(rt->rt_pksent); } static int rtentry_ctor(void *mem, int size, void *arg, int how) { struct rtentry *rt = mem; bzero(rt, offsetof(struct rtentry, rt_endzero)); counter_u64_zero(rt->rt_pksent); rt->rt_chain = NULL; return (0); } static void rtentry_dtor(void *mem, int size, void *arg) { struct rtentry *rt = mem; RT_UNLOCK_COND(rt); } static void vnet_route_init(const void *unused __unused) { struct domain *dom; struct radix_node_head **rnh; int table; int fam; V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), rtentry_ctor, rtentry_dtor, rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtattach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtattach((void **)rnh, 0); } } } VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, vnet_route_init, 0); #ifdef VIMAGE static void vnet_route_uninit(const void *unused __unused) { int table; int fam; struct domain *dom; struct radix_node_head **rnh; for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtdetach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtdetach((void **)rnh, 0); } } free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, vnet_route_uninit, 0); #endif #ifndef _SYS_SYSPROTO_H_ struct setfib_args { int fibnum; }; #endif int sys_setfib(struct thread *td, struct setfib_args *uap) { if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) return EINVAL; td->td_proc->p_fibnum = uap->fibnum; return (0); } /* * Packet routing routines. */ void rtalloc(struct route *ro) { rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); } void rtalloc_fib(struct route *ro, u_int fibnum) { rtalloc_ign_fib(ro, 0UL, fibnum); } void rtalloc_ign(struct route *ro, u_long ignore) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; RTFREE(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } void rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; RTFREE(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } /* * Look up the route that matches the address given * Or, at least try.. Create a cloned route if needed. * * The returned route, if any, is locked. */ struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); } struct rtentry * rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { struct radix_node_head *rnh; struct radix_node *rn; struct rtentry *newrt; struct rt_addrinfo info; int err = 0, msgtype = RTM_MISS; int needlock; KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); rnh = rt_tables_get_rnh(fibnum, dst->sa_family); newrt = NULL; if (rnh == NULL) goto miss; /* * Look up the address in the table for that Address Family */ needlock = !(ignflags & RTF_RNH_LOCKED); if (needlock) RADIX_NODE_HEAD_RLOCK(rnh); #ifdef INVARIANTS else RADIX_NODE_HEAD_LOCK_ASSERT(rnh); #endif rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { newrt = RNTORT(rn); RT_LOCK(newrt); RT_ADDREF(newrt); if (needlock) RADIX_NODE_HEAD_RUNLOCK(rnh); goto done; } else if (needlock) RADIX_NODE_HEAD_RUNLOCK(rnh); /* * Either we hit the root or couldn't find any match, * Which basically means * "caint get there frm here" */ miss: V_rtstat.rts_unreach++; if (report) { /* * If required, report the failure to the supervising * Authorities. * For a delete, this is not an error. (report == 0) */ bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; rt_missmsg_fib(msgtype, &info, 0, err, fibnum); } done: if (newrt) RT_LOCK_ASSERT(newrt); return (newrt); } /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void rtfree(struct rtentry *rt) { struct radix_node_head *rnh; KASSERT(rt != NULL,("%s: NULL rt", __func__)); rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); RT_LOCK_ASSERT(rt); /* * The callers should use RTFREE_LOCKED() or RTFREE(), so * we should come here exactly with the last reference. */ RT_REMREF(rt); if (rt->rt_refcnt > 0) { log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); goto done; } /* * On last reference give the "close method" a chance * to cleanup private state. This also permits (for * IPv4 and IPv6) a chance to decide if the routing table * entry should be purged immediately or at a later time. * When an immediate purge is to happen the close routine * typically calls rtexpunge which clears the RTF_UP flag * on the entry so that the code below reclaims the storage. */ if (rt->rt_refcnt == 0 && rnh->rnh_close) rnh->rnh_close((struct radix_node *)rt, rnh); /* * If we are no longer "up" (and ref == 0) * then we can free the resources associated * with the route. */ if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); /* * the rtentry must have been removed from the routing table * so it is represented in rttrash.. remove that now. */ V_rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); goto done; } #endif /* * release references on items we hold them on.. * e.g other routes and ifaddrs. */ if (rt->rt_ifa) ifa_free(rt->rt_ifa); /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. */ R_Free(rt_key(rt)); /* * and the rtentry itself of course */ uma_zfree(V_rtzone, rt); return; } done: RT_UNLOCK(rt); } /* * Force a routing table entry to the specified * destination to go through the given gateway. * Normally called as a result of a routing redirect * message from the network layer. */ void rtredirect(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src) { rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); } void rtredirect_fib(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src, u_int fibnum) { - struct rtentry *rt, *rt0 = NULL; + struct rtentry *rt; int error = 0; short *stat = NULL; struct rt_addrinfo info; struct ifaddr *ifa; struct radix_node_head *rnh; ifa = NULL; rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) { error = EAFNOSUPPORT; goto out; } /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) { error = ENETUNREACH; goto out; } rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ if (!(flags & RTF_DONE) && rt) { if (!sa_equal(src, rt->rt_gateway)) { error = EINVAL; goto done; } if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) { error = EINVAL; goto done; } } if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) { error = EHOSTUNREACH; goto done; } /* * Create a new entry if we just got back a wildcard entry * or the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ if (rt->rt_flags & RTF_GATEWAY) { if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { /* * Changing from route to net => route to host. * Create new route, rather than smashing route to net. */ create: - rt0 = rt; + RTFREE(rt); rt = NULL; flags |= RTF_DYNAMIC; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; - if (rt0 != NULL) - RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); if (rt != NULL) { RT_LOCK(rt); - if (rt0 != NULL) - EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); flags = rt->rt_flags; } - if (rt0 != NULL) - RTFREE(rt0); stat = &V_rtstat.rts_dynamic; } else { - struct rtentry *gwrt; /* * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ if ((flags & RTF_GATEWAY) == 0) rt->rt_flags &= ~RTF_GATEWAY; rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &V_rtstat.rts_newgateway; /* * add the key and gateway (in one malloc'd chunk). */ RT_UNLOCK(rt); RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); rt_setgate(rt, rt_key(rt), gateway); - gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); RADIX_NODE_HEAD_UNLOCK(rnh); - EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); - if (gwrt) - RTFREE_LOCKED(gwrt); } } else error = EHOSTUNREACH; done: if (rt) RTFREE_LOCKED(rt); out: if (error) V_rtstat.rts_badredirect++; else if (stat != NULL) (*stat)++; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); if (ifa != NULL) ifa_free(ifa); } int rtioctl(u_long req, caddr_t data) { return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); } /* * Routing table ioctl interface. */ int rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* * If more ioctl commands are added here, make sure the proper * super-user checks are being performed because it is possible for * prison-root to make it this far if raw sockets have been enabled * in jails. */ #ifdef INET /* Multicast goop, grrr... */ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ } struct ifaddr * ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, u_int fibnum) { struct ifaddr *ifa; int not_found = 0; if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = NULL; if (flags & RTF_HOST) ifa = ifa_ifwithdstaddr(dst, fibnum); if (ifa == NULL) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway, fibnum); } if (ifa == NULL) ifa = ifa_ifwithnet(gateway, 0, fibnum); if (ifa == NULL) { struct rtentry *rt = rtalloc1_fib(gateway, 0, 0, fibnum); if (rt == NULL) return (NULL); /* * dismiss a gateway that is reachable only * through the default router */ switch (gateway->sa_family) { case AF_INET: if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) not_found = 1; break; case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) not_found = 1; break; default: break; } if (!not_found && rt->rt_ifa != NULL) { ifa = rt->rt_ifa; ifa_ref(ifa); } RT_REMREF(rt); RT_UNLOCK(rt); if (not_found || ifa == NULL) return (NULL); } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == NULL) ifa = oifa; else ifa_free(oifa); } return (ifa); } /* * Do appropriate manipulations of a routing tree given * all the bits of info needed */ int rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) { return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, RT_DEFAULT_FIB)); } int rtrequest_fib(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt, u_int fibnum) { struct rt_addrinfo info; if (dst->sa_len == 0) return(EINVAL); bzero((caddr_t)&info, sizeof(info)); info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; return rtrequest1_fib(req, &info, ret_nrt, fibnum); } /* * Copy most of @rt data into @info. * * If @flags contains NHR_COPY, copies dst,netmask and gw to the * pointers specified by @info structure. Assume such pointers * are zeroed sockaddr-like structures with sa_len field initialized * to reflect size of the provided buffer. if no NHR_COPY is specified, * point dst,netmask and gw @info fields to appropriate @rt values. * * if @flags contains NHR_REF, do refcouting on rt_ifp. * * Returns 0 on success. */ int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags) { struct rt_metrics *rmx; struct sockaddr *src, *dst; int sa_len; if (flags & NHR_COPY) { /* Copy destination if dst is non-zero */ src = rt_key(rt); dst = info->rti_info[RTAX_DST]; sa_len = src->sa_len; - if (src != NULL && dst != NULL) { + if (dst != NULL) { if (src->sa_len > dst->sa_len) return (ENOMEM); memcpy(dst, src, src->sa_len); info->rti_addrs |= RTA_DST; } /* Copy mask if set && dst is non-zero */ src = rt_mask(rt); dst = info->rti_info[RTAX_NETMASK]; if (src != NULL && dst != NULL) { /* * Radix stores different value in sa_len, * assume rt_mask() to have the same length * as rt_key() */ if (sa_len > dst->sa_len) return (ENOMEM); memcpy(dst, src, src->sa_len); info->rti_addrs |= RTA_NETMASK; } /* Copy gateway is set && dst is non-zero */ src = rt->rt_gateway; dst = info->rti_info[RTAX_GATEWAY]; if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){ if (src->sa_len > dst->sa_len) return (ENOMEM); memcpy(dst, src, src->sa_len); info->rti_addrs |= RTA_GATEWAY; } } else { info->rti_info[RTAX_DST] = rt_key(rt); info->rti_addrs |= RTA_DST; if (rt_mask(rt) != NULL) { info->rti_info[RTAX_NETMASK] = rt_mask(rt); info->rti_addrs |= RTA_NETMASK; } if (rt->rt_flags & RTF_GATEWAY) { info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; info->rti_addrs |= RTA_GATEWAY; } } rmx = info->rti_rmx; if (rmx != NULL) { info->rti_mflags |= RTV_MTU; rmx->rmx_mtu = rt->rt_mtu; } info->rti_flags = rt->rt_flags; info->rti_ifp = rt->rt_ifp; info->rti_ifa = rt->rt_ifa; if (flags & NHR_REF) { /* Do 'traditional' refcouting */ if_ref(info->rti_ifp); } return (0); } /* * Lookups up route entry for @dst in RIB database for fib @fibnum. * Exports entry data to @info using rt_exportinfo(). * * if @flags contains NHR_REF, refcouting is performed on rt_ifp. * All references can be released later by calling rib_free_info() * * Returns 0 on success. * Returns ENOENT for lookup failure, ENOMEM for export failure. */ int rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, uint32_t flowid, struct rt_addrinfo *info) { struct radix_node_head *rh; struct radix_node *rn; struct rtentry *rt; int error; KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum")); rh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rh == NULL) return (ENOENT); RADIX_NODE_HEAD_RLOCK(rh); rn = rh->rnh_matchaddr(__DECONST(void *, dst), rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rt->rt_ifp)) { flags = (flags & NHR_REF) | NHR_COPY; error = rt_exportinfo(rt, info, flags); RADIX_NODE_HEAD_RUNLOCK(rh); return (error); } } RADIX_NODE_HEAD_RUNLOCK(rh); return (ENOENT); } /* * Releases all references acquired by rib_lookup_info() when * called with NHR_REF flags. */ void rib_free_info(struct rt_addrinfo *info) { if_rele(info->rti_ifp); } /* * Iterates over all existing fibs in system calling * @setwa_f function prior to traversing each fib. * Calls @wa_f function for each element in current fib. * If af is not AF_UNSPEC, iterates over fibs in particular * address family. */ void rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f, void *arg) { struct radix_node_head *rnh; uint32_t fibnum; int i; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (af != AF_UNSPEC) { rnh = rt_tables_get_rnh(fibnum, af); if (rnh == NULL) continue; if (setwa_f != NULL) setwa_f(rnh, fibnum, af, arg); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, (walktree_f_t *)wa_f, arg); RADIX_NODE_HEAD_UNLOCK(rnh); continue; } for (i = 1; i <= AF_MAX; i++) { rnh = rt_tables_get_rnh(fibnum, i); if (rnh == NULL) continue; if (setwa_f != NULL) setwa_f(rnh, fibnum, i, arg); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, (walktree_f_t *)wa_f, arg); RADIX_NODE_HEAD_UNLOCK(rnh); } } } struct rt_delinfo { struct rt_addrinfo info; struct radix_node_head *rnh; struct rtentry *head; }; /* * Conditionally unlinks @rn from radix tree based * on info data passed in @arg. */ static int rt_checkdelroute(struct radix_node *rn, void *arg) { struct rt_delinfo *di; struct rt_addrinfo *info; struct rtentry *rt; int error; di = (struct rt_delinfo *)arg; rt = (struct rtentry *)rn; info = &di->info; error = 0; info->rti_info[RTAX_DST] = rt_key(rt); info->rti_info[RTAX_NETMASK] = rt_mask(rt); info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; rt = rt_unlinkrte(di->rnh, info, &error); if (rt == NULL) { /* Either not allowed or not matched. Skip entry */ return (0); } /* Entry was unlinked. Add to the list and return */ rt->rt_chain = di->head; di->head = rt; return (0); } /* * Iterates over all existing fibs in system. * Deletes each element for which @filter_f function returned * non-zero value. * If @af is not AF_UNSPEC, iterates over fibs in particular * address family. */ void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg) { struct radix_node_head *rnh; struct rt_delinfo di; struct rtentry *rt; uint32_t fibnum; int i, start, end; bzero(&di, sizeof(di)); di.info.rti_filter = filter_f; di.info.rti_filterdata = arg; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (af != AF_UNSPEC) { start = af; end = af; } else { start = 1; end = AF_MAX; } for (i = start; i <= end; i++) { rnh = rt_tables_get_rnh(fibnum, i); if (rnh == NULL) continue; di.rnh = rnh; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, rt_checkdelroute, &di); RADIX_NODE_HEAD_UNLOCK(rnh); if (di.head == NULL) continue; /* We might have something to reclaim */ while (di.head != NULL) { rt = di.head; di.head = rt->rt_chain; rt->rt_chain = NULL; /* TODO std rt -> rt_addrinfo export */ di.info.rti_info[RTAX_DST] = rt_key(rt); di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_notifydelete(rt, &di.info); RTFREE_LOCKED(rt); } } } } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rnh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rt pointer to rtentry * arg argument passed to rnh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated */ static int rt_ifdelroute(const struct rtentry *rt, void *arg) { struct ifnet *ifp = arg; if (rt->rt_ifp != ifp) return (0); /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rt_flags & RTF_UP) == 0) return (0); return (1); } /* * Delete all remaining routes using this interface * Unfortuneatly the only way to do this is to slog through * the entire routing table looking for routes which point * to this interface...oh well... */ void rt_flushifroutes(struct ifnet *ifp) { rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp); } /* * Conditionally unlinks rtentry matching data inside @info from @rnh. * Returns unlinked, locked and referenced @rtentry on success, * Returns NULL and sets @perror to: * ESRCH - if prefix was not found, * EADDRINUSE - if trying to delete PINNED route without appropriate flag. * ENOENT - if supplied filter function returned 0 (not matched). */ static struct rtentry * rt_unlinkrte(struct radix_node_head *rnh, struct rt_addrinfo *info, int *perror) { struct sockaddr *dst, *netmask; struct rtentry *rt; struct radix_node *rn; dst = info->rti_info[RTAX_DST]; netmask = info->rti_info[RTAX_NETMASK]; rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, rnh); if (rt == NULL) { *perror = ESRCH; return (NULL); } if ((info->rti_flags & RTF_PINNED) == 0) { /* Check if target route can be deleted */ if (rt->rt_flags & RTF_PINNED) { *perror = EADDRINUSE; return (NULL); } } if (info->rti_filter != NULL) { if (info->rti_filter(rt, info->rti_filterdata) == 0) { /* Not matched */ *perror = ENOENT; return (NULL); } /* * Filter function requested rte deletion. * Ease the caller work by filling in remaining info * from that particular entry. */ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; } /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. */ *perror = ESRCH; #ifdef RADIX_MPATH if (rn_mpath_capable(rnh)) rn = rt_mpath_unlink(rnh, info, rt, perror); else #endif rn = rnh->rnh_deladdr(dst, netmask, rnh); if (rn == NULL) return (NULL); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); rt = RNTORT(rn); RT_LOCK(rt); RT_ADDREF(rt); rt->rt_flags &= ~RTF_UP; *perror = 0; return (rt); } static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa; /* * give the protocol a chance to keep things in sync. */ ifa = rt->rt_ifa; if (ifa != NULL && ifa->ifa_rtrequest != NULL) ifa->ifa_rtrequest(RTM_DELETE, rt, info); /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented * when RTFREE(rt) is eventually called. */ V_rttrash++; } /* * These (questionable) definitions of apparent local variables apply * to the next two functions. XXXXXX!!! */ #define dst info->rti_info[RTAX_DST] #define gateway info->rti_info[RTAX_GATEWAY] #define netmask info->rti_info[RTAX_NETMASK] #define ifaaddr info->rti_info[RTAX_IFA] #define ifpaddr info->rti_info[RTAX_IFP] #define flags info->rti_flags /* * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, * it will be referenced so the caller must free it. */ int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) { struct ifaddr *ifa; int error = 0; /* * ifp may be specified by sockaddr_dl * when protocol address is ambiguous. */ if (info->rti_ifp == NULL && ifpaddr != NULL && ifpaddr->sa_family == AF_LINK && (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) { info->rti_ifp = ifa->ifa_ifp; ifa_free(ifa); } if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if (info->rti_ifa == NULL) { struct sockaddr *sa; sa = ifaaddr != NULL ? ifaaddr : (gateway != NULL ? gateway : dst); if (sa != NULL && info->rti_ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); else if (dst != NULL && gateway != NULL) info->rti_ifa = ifa_ifwithroute(flags, dst, gateway, fibnum); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute(flags, sa, sa, fibnum); } if ((ifa = info->rti_ifa) != NULL) { if (info->rti_ifp == NULL) info->rti_ifp = ifa->ifa_ifp; } else error = ENETUNREACH; return (error); } static int if_updatemtu_cb(struct radix_node *rn, void *arg) { struct rtentry *rt; struct if_mtuinfo *ifmtu; rt = (struct rtentry *)rn; ifmtu = (struct if_mtuinfo *)arg; if (rt->rt_ifp != ifmtu->ifp) return (0); if (rt->rt_mtu >= ifmtu->mtu) { /* We have to decrease mtu regardless of flags */ rt->rt_mtu = ifmtu->mtu; return (0); } /* * New MTU is bigger. Check if are allowed to alter it */ if ((rt->rt_flags & (RTF_FIXEDMTU | RTF_GATEWAY | RTF_HOST)) != 0) { /* * Skip routes with user-supplied MTU and * non-interface routes */ return (0); } /* We are safe to update route MTU */ rt->rt_mtu = ifmtu->mtu; return (0); } void rt_updatemtu(struct ifnet *ifp) { struct if_mtuinfo ifmtu; struct radix_node_head *rnh; int i, j; ifmtu.ifp = ifp; /* * Try to update rt_mtu for all routes using this interface * Unfortunately the only way to do this is to traverse all * routing tables in all fibs/domains. */ for (i = 1; i <= AF_MAX; i++) { ifmtu.mtu = if_getmtu_family(ifp, i); for (j = 0; j < rt_numfibs; j++) { rnh = rt_tables_get_rnh(j, i); if (rnh == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, if_updatemtu_cb, &ifmtu); RADIX_NODE_HEAD_UNLOCK(rnh); } } } #if 0 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); int rt_print(char *buf, int buflen, struct rtentry *rt); int p_sockaddr(char *buf, int buflen, struct sockaddr *s) { void *paddr = NULL; switch (s->sa_family) { case AF_INET: paddr = &((struct sockaddr_in *)s)->sin_addr; break; case AF_INET6: paddr = &((struct sockaddr_in6 *)s)->sin6_addr; break; } if (paddr == NULL) return (0); if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) return (0); return (strlen(buf)); } int rt_print(char *buf, int buflen, struct rtentry *rt) { struct sockaddr *addr, *mask; int i = 0; addr = rt_key(rt); mask = rt_mask(rt); i = p_sockaddr(buf, buflen, addr); if (!(rt->rt_flags & RTF_HOST)) { buf[i++] = '/'; i += p_sockaddr(buf + i, buflen - i, mask); } if (rt->rt_flags & RTF_GATEWAY) { buf[i++] = '>'; i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway); } return (i); } #endif #ifdef RADIX_MPATH /* * Deletes key for single-path routes, unlinks rtentry with * gateway specified in @info from multi-path routes. * * Returnes unlinked entry. In case of failure, returns NULL * and sets @perror to ESRCH. */ static struct radix_node * rt_mpath_unlink(struct radix_node_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror) { /* * if we got multipath routes, we require users to specify * a matching RTAX_GATEWAY. */ struct rtentry *rt; // *rto = NULL; struct radix_node *rn; struct sockaddr *gw; gw = info->rti_info[RTAX_GATEWAY]; rt = rt_mpath_matchgate(rto, gw); if (rt == NULL) { *perror = ESRCH; return (NULL); } /* * this is the first entry in the chain */ if (rto == rt) { rn = rn_mpath_next((struct radix_node *)rt); /* * there is another entry, now it's active */ if (rn) { rto = RNTORT(rn); RT_LOCK(rto); rto->rt_flags |= RTF_UP; RT_UNLOCK(rto); } else if (rt->rt_flags & RTF_GATEWAY) { /* * For gateway routes, we need to * make sure that we we are deleting * the correct gateway. * rt_mpath_matchgate() does not * check the case when there is only * one route in the chain. */ if (gw && (rt->rt_gateway->sa_len != gw->sa_len || memcmp(rt->rt_gateway, gw, gw->sa_len))) { *perror = ESRCH; return (NULL); } } /* * use the normal delete code to remove * the first entry */ rn = rnh->rnh_deladdr(dst, netmask, rnh); *perror = 0; return (rn); } /* * if the entry is 2nd and on up */ if (rt_mpath_deldup(rto, rt) == 0) panic ("rtrequest1: rt_mpath_deldup"); *perror = 0; rn = (struct radix_node *)rt; return (rn); } #endif #ifdef FLOWTABLE static struct rtentry * rt_flowtable_check_route(struct radix_node_head *rnh, struct rt_addrinfo *info) { #if defined(INET6) || defined(INET) struct radix_node *rn; #endif struct rtentry *rt0; rt0 = NULL; /* "flow-table" only supports IPv6 and IPv4 at the moment. */ switch (dst->sa_family) { #ifdef INET6 case AF_INET6: #endif #ifdef INET case AF_INET: #endif #if defined(INET6) || defined(INET) rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { struct sockaddr *mask; u_char *m, *n; int len; /* * compare mask to see if the new route is * more specific than the existing one */ rt0 = RNTORT(rn); RT_LOCK(rt0); RT_ADDREF(rt0); RT_UNLOCK(rt0); /* * A host route is already present, so * leave the flow-table entries as is. */ if (rt0->rt_flags & RTF_HOST) { RTFREE(rt0); rt0 = NULL; } else if (!(flags & RTF_HOST) && netmask) { mask = rt_mask(rt0); len = mask->sa_len; m = (u_char *)mask; n = (u_char *)netmask; while (len-- > 0) { if (*n != *m) break; n++; m++; } if (len == 0 || (*n < *m)) { RTFREE(rt0); rt0 = NULL; } } } #endif/* INET6 || INET */ } return (rt0); } #endif int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { int error = 0; struct rtentry *rt, *rt_old; #ifdef FLOWTABLE struct rtentry *rt0; #endif struct radix_node *rn; struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; struct sockaddr_storage mdst; KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); KASSERT((flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } /* * Find the correct routing tree to use for this Address Family */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) return (EAFNOSUPPORT); /* * If we are adding a host route then we don't want to put * a netmask in the tree, nor do we want to clone it. */ if (flags & RTF_HOST) netmask = NULL; switch (req) { case RTM_DELETE: if (netmask) { rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); dst = (struct sockaddr *)&mdst; } RADIX_NODE_HEAD_LOCK(rnh); rt = rt_unlinkrte(rnh, info, &error); RADIX_NODE_HEAD_UNLOCK(rnh); if (error != 0) return (error); rt_notifydelete(rt, info); /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ if (ret_nrt) { *ret_nrt = rt; RT_UNLOCK(rt); } else RTFREE_LOCKED(rt); break; case RTM_RESOLVE: /* * resolve was only used for route cloning * here for compat */ break; case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) return (EINVAL); if (dst && gateway && (dst->sa_family != gateway->sa_family) && (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) return (EINVAL); if (info->rti_ifa == NULL) { error = rt_getifa_fib(info, fibnum); if (error) return (error); } else ifa_ref(info->rti_ifa); ifa = info->rti_ifa; rt = uma_zalloc(V_rtzone, M_NOWAIT); if (rt == NULL) { ifa_free(ifa); return (ENOBUFS); } rt->rt_flags = RTF_UP | flags; rt->rt_fibnum = fibnum; /* * Add the gateway. Possibly re-malloc-ing the storage for it. */ if ((error = rt_setgate(rt, dst, gateway)) != 0) { ifa_free(ifa); uma_zfree(V_rtzone, rt); return (error); } /* * point to the (possibly newly malloc'd) dest address. */ ndst = (struct sockaddr *)rt_key(rt); /* * make sure it contains the value we want (masked if needed). */ if (netmask) { rt_maskedcopy(dst, ndst, netmask); } else bcopy(dst, ndst, dst->sa_len); /* * We use the ifa reference returned by rt_getifa_fib(). * This moved from below so that rnh->rnh_addaddr() can * examine the ifa and ifa->ifa_ifp if it so desires. */ rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; rt->rt_weight = 1; rt_setmetrics(info, rt); RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); #ifdef RADIX_MPATH /* do not permit exactly the same dst/mask/gw pair */ if (rn_mpath_capable(rnh) && rt_mpath_conflict(rnh, rt, netmask)) { RADIX_NODE_HEAD_UNLOCK(rnh); ifa_free(rt->rt_ifa); R_Free(rt_key(rt)); uma_zfree(V_rtzone, rt); return (EEXIST); } #endif #ifdef FLOWTABLE rt0 = rt_flowtable_check_route(rnh, info); #endif /* FLOWTABLE */ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); rt_old = NULL; if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { /* * Force removal and re-try addition * TODO: better multipath&pinned support */ struct sockaddr *info_dst = info->rti_info[RTAX_DST]; info->rti_info[RTAX_DST] = ndst; /* Do not delete existing PINNED(interface) routes */ info->rti_flags &= ~RTF_PINNED; rt_old = rt_unlinkrte(rnh, info, &error); info->rti_flags |= RTF_PINNED; info->rti_info[RTAX_DST] = info_dst; if (rt_old != NULL) rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); } RADIX_NODE_HEAD_UNLOCK(rnh); if (rt_old != NULL) RT_UNLOCK(rt_old); /* * If it still failed to go into the tree, * then un-make it (this should be a function) */ if (rn == NULL) { ifa_free(rt->rt_ifa); R_Free(rt_key(rt)); uma_zfree(V_rtzone, rt); #ifdef FLOWTABLE if (rt0 != NULL) RTFREE(rt0); #endif return (EEXIST); } #ifdef FLOWTABLE else if (rt0 != NULL) { flowtable_route_flush(dst->sa_family, rt0); RTFREE(rt0); } #endif if (rt_old != NULL) { rt_notifydelete(rt_old, info); RTFREE(rt_old); } /* * If this protocol has something to add to this then * allow it to do that as well. */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, info); /* * actually return a resultant rtentry and * give the caller a single reference. */ if (ret_nrt) { *ret_nrt = rt; RT_ADDREF(rt); } RT_UNLOCK(rt); break; case RTM_CHANGE: RADIX_NODE_HEAD_LOCK(rnh); error = rtrequest1_fib_change(rnh, info, ret_nrt, fibnum); RADIX_NODE_HEAD_UNLOCK(rnh); break; default: error = EOPNOTSUPP; } return (error); } #undef dst #undef gateway #undef netmask #undef ifaaddr #undef ifpaddr #undef flags static int rtrequest1_fib_change(struct radix_node_head *rnh, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { struct rtentry *rt = NULL; int error = 0; int free_ifa = 0; int family, mtu; struct if_mtuinfo ifmtu; rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh); if (rt == NULL) return (ESRCH); #ifdef RADIX_MPATH /* * If we got multipath routes, * we require users to specify a matching RTAX_GATEWAY. */ if (rn_mpath_capable(rnh)) { rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); if (rt == NULL) return (ESRCH); } #endif RT_LOCK(rt); rt_setmetrics(info, rt); /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && info->rti_info[RTAX_GATEWAY] != NULL) || info->rti_info[RTAX_IFP] != NULL || (info->rti_info[RTAX_IFA] != NULL && !sa_equal(info->rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { error = rt_getifa_fib(info, fibnum); if (info->rti_ifa != NULL) free_ifa = 1; if (error != 0) goto bad; } /* Check if outgoing interface has changed */ if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa && rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) { rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, info); ifa_free(rt->rt_ifa); } /* Update gateway address */ if (info->rti_info[RTAX_GATEWAY] != NULL) { error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]); if (error != 0) goto bad; rt->rt_flags &= ~RTF_GATEWAY; rt->rt_flags |= (RTF_GATEWAY & info->rti_flags); } if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) { ifa_ref(info->rti_ifa); rt->rt_ifa = info->rti_ifa; rt->rt_ifp = info->rti_ifp; } /* Allow some flags to be toggled on change. */ rt->rt_flags &= ~RTF_FMASK; rt->rt_flags |= info->rti_flags & RTF_FMASK; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); /* Alter route MTU if necessary */ if (rt->rt_ifp != NULL) { family = info->rti_info[RTAX_DST]->sa_family; mtu = if_getmtu_family(rt->rt_ifp, family); /* Set default MTU */ if (rt->rt_mtu == 0) rt->rt_mtu = mtu; if (rt->rt_mtu != mtu) { /* Check if we really need to update */ ifmtu.ifp = rt->rt_ifp; ifmtu.mtu = mtu; if_updatemtu_cb(rt->rt_nodes, &ifmtu); } } if (ret_nrt) { *ret_nrt = rt; RT_ADDREF(rt); } bad: RT_UNLOCK(rt); if (free_ifa != 0) ifa_free(info->rti_ifa); return (error); } static void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) { if (info->rti_mflags & RTV_MTU) { if (info->rti_rmx->rmx_mtu != 0) { /* * MTU was explicitly provided by user. * Keep it. */ rt->rt_flags |= RTF_FIXEDMTU; } else { /* * User explicitly sets MTU to 0. * Assume rollback to default. */ rt->rt_flags &= ~RTF_FIXEDMTU; } rt->rt_mtu = info->rti_rmx->rmx_mtu; } if (info->rti_mflags & RTV_WEIGHT) rt->rt_weight = info->rti_rmx->rmx_weight; /* Kernel -> userland timebase conversion. */ if (info->rti_mflags & RTV_EXPIRE) rt->rt_expire = info->rti_rmx->rmx_expire ? info->rti_rmx->rmx_expire - time_second + time_uptime : 0; } int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { /* XXX dst may be overwritten, can we move this to below */ int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); /* * Prepare to store the gateway in rt->rt_gateway. * Both dst and gateway are stored one after the other in the same * malloc'd chunk. If we have room, we can reuse the old buffer, * rt_gateway already points to the right place. * Otherwise, malloc a new block and update the 'dst' address. */ if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { caddr_t new; R_Malloc(new, caddr_t, dlen + glen); if (new == NULL) return ENOBUFS; /* * XXX note, we copy from *dst and not *rt_key(rt) because * rt_setgate() can be called to initialize a newly * allocated route entry, in which case rt_key(rt) == NULL * (and also rt->rt_gateway == NULL). * Free()/free() handle a NULL argument just fine. */ bcopy(dst, new, dlen); R_Free(rt_key(rt)); /* free old block, if any */ rt_key(rt) = (struct sockaddr *)new; rt->rt_gateway = (struct sockaddr *)(new + dlen); } /* * Copy the new gateway value into the memory chunk. */ bcopy(gate, rt->rt_gateway, glen); return (0); } void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) { u_char *cp1 = (u_char *)src; u_char *cp2 = (u_char *)dst; u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Set up a routing table entry, normally * for an interface. */ #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; struct rt_addrinfo info; int error = 0; int startfib, endfib; char tempbuf[_SOCKADDR_TMPSIZE]; int didwork = 0; int a_failure = 0; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct radix_node_head *rnh; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; netmask = NULL; } else { dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } if (dst->sa_len == 0) return(EINVAL); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } if (fibnum == RT_ALL_FIBS) { if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) startfib = endfib = ifa->ifa_ifp->if_fib; else { startfib = 0; endfib = rt_numfibs - 1; } } else { KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); startfib = fibnum; endfib = fibnum; } /* * If it's a delete, check that if it exists, * it's on the correct interface or we might scrub * a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) * XXX this is kinda inet specific.. */ if (netmask != NULL) { rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); dst = (struct sockaddr *)tempbuf; } } /* * Now go through all the requested tables (fibs) and do the * requested action. Realistically, this will either be fib 0 * for protocols that don't do multiple tables or all the * tables for those that do. */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { struct radix_node *rn; /* * Look up an rtentry that is in the routing tree and * contains the correct info. */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) /* this table doesn't exist but others might */ continue; RADIX_NODE_HEAD_RLOCK(rnh); rn = rnh->rnh_lookup(dst, netmask, rnh); #ifdef RADIX_MPATH if (rn_mpath_capable(rnh)) { if (rn == NULL) error = ESRCH; else { rt = RNTORT(rn); /* * for interface route the * rt->rt_gateway is sockaddr_intf * for cloning ARP entries, so * rt_mpath_matchgate must use the * interface address */ rt = rt_mpath_matchgate(rt, ifa->ifa_addr); if (rt == NULL) error = ESRCH; } } #endif error = (rn == NULL || (rn->rn_flags & RNF_ROOT) || RNTORT(rn)->rt_ifa != ifa); RADIX_NODE_HEAD_RUNLOCK(rnh); if (error) { /* this is only an error if bad on ALL tables */ continue; } } /* * Do the actual request */ bzero((caddr_t)&info, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; info.rti_info[RTAX_DST] = dst; /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; error = rtrequest1_fib(cmd, &info, &rt, fibnum); if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ RT_LOCK(rt); #ifdef RADIX_MPATH /* * in case address alias finds the first address * e.g. ifconfig bge0 192.0.2.246/24 * e.g. ifconfig bge0 192.0.2.247/24 * the address set in the route is 192.0.2.246 * so we need to replace it with 192.0.2.247 */ if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) { ifa_free(rt->rt_ifa); ifa_ref(ifa); rt->rt_ifp = ifa->ifa_ifp; rt->rt_ifa = ifa; } #endif /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) { ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = rt->rt_ifp->if_type; ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = rt->rt_ifp->if_index; } RT_ADDREF(rt); RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); RT_LOCK(rt); RT_REMREF(rt); if (cmd == RTM_DELETE) { /* * If we are deleting, and we found an entry, * then it's been removed from the tree.. * now throw it away. */ RTFREE_LOCKED(rt); } else { if (cmd == RTM_ADD) { /* * We just wanted to add it.. * we don't actually need a reference. */ RT_REMREF(rt); } RT_UNLOCK(rt); } didwork = 1; } if (error) a_failure = error; } if (cmd == RTM_DELETE) { if (didwork) { error = 0; } else { /* we only give an error if it wasn't in any table */ error = ((flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } } else { if (a_failure) { /* return an error if any of them failed */ error = a_failure; } } return (error); } /* * Set up a routing table entry, normally * for an interface. */ int rtinit(struct ifaddr *ifa, int cmd, int flags) { struct sockaddr *dst; int fib = RT_DEFAULT_FIB; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; } else { dst = ifa->ifa_addr; } switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We do support multiple FIBs. */ fib = RT_ALL_FIBS; break; } return (rtinit1(ifa, cmd, flags, fib)); } /* * Announce interface address arrival/withdraw * Returns 0 on success. */ int rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); #if defined(INET) || defined(INET6) #ifdef SCTP /* * notify the SCTP stack * this will only get called when an address is added/deleted * XXX pass the ifaddr struct instead if ifa->ifa_addr... */ sctp_addr_change(ifa, cmd); #endif /* SCTP */ #endif return (rtsock_addrmsg(cmd, ifa, fibnum)); } /* * Announce route addition/removal. * Users of this function MUST validate input data BEFORE calling. * However we have to be able to handle invalid data: * if some userland app sends us "invalid" route message (invalid mask, * no dst, wrong address families, etc...) we need to pass it back * to app (and any other rtsock consumers) with rtm_errno field set to * non-zero value. * Returns 0 on success. */ int rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); return (rtsock_routemsg(cmd, ifp, error, rt, fibnum)); } void rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) { rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. */ void rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %u", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); if (cmd == RTM_ADD) { rt_addrmsg(cmd, ifa, fibnum); if (rt != NULL) rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); } else { if (rt != NULL) rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); rt_addrmsg(cmd, ifa, fibnum); } } Index: projects/clang380-import/sys/net/route.h =================================================================== --- projects/clang380-import/sys/net/route.h (revision 293686) +++ projects/clang380-import/sys/net/route.h (revision 293687) @@ -1,475 +1,491 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ #include #include /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* * Struct route consiste of a destination address, * a route entry pointer, link-layer prepend data pointer along * with its length. */ struct route { struct rtentry *ro_rt; char *ro_prepend; uint16_t ro_plen; uint16_t ro_flags; uint16_t ro_mtu; /* saved ro_rt mtu */ uint16_t spare; struct sockaddr ro_dst; }; #define RT_L2_ME_BIT 2 /* dst L2 addr is our address */ #define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */ #define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */ #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */ #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ -#define RT_L2_ME (1 << RT_L2_ME_BIT) -#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) -#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) +#define RT_L2_ME (1 << RT_L2_ME_BIT) /* 0x0004 */ +#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) /* 0x0008 */ +#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) /* 0x0010 */ +#define RT_REJECT 0x0020 /* Destination is reject */ +#define RT_BLACKHOLE 0x0040 /* Destination is blackhole */ +#define RT_HAS_GW 0x0080 /* Destination has GW */ + struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_weight; /* route weight */ u_long rmx_filler[3]; /* will be used for T/TCP later */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) /* lle state is exported in rmx_state rt_metrics field */ #define rmx_state rmx_weight #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ #define RT_ALL_FIBS -1 /* Announce event for every fib */ #ifdef _KERNEL extern u_int rt_numfibs; /* number of usable routing tables */ VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) #endif /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #ifndef RNF_NORMAL #include #ifdef RADIX_MPATH #include #endif #endif #if defined(_KERNEL) || defined(_WANT_RTENTRY) struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* * XXX struct rtentry must begin with a struct radix_node (or two!) * because the code does some casts of a 'struct radix_node *' * to a 'struct rtentry *' */ #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) struct sockaddr *rt_gateway; /* value */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ int rt_flags; /* up/down?, host/net */ int rt_refcnt; /* # held references */ u_int rt_fibnum; /* which FIB */ u_long rt_mtu; /* MTU for this path */ u_long rt_weight; /* absolute weight */ u_long rt_expire; /* lifetime for route, e.g. redirect */ #define rt_endzero rt_pksent counter_u64_t rt_pksent; /* packets sent using this route */ struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ }; #endif /* _KERNEL || _WANT_RTENTRY */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused, was RTF_DELCLONE */ /* 0x100 unused, was RTF_CLONING */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* DEPRECATED - exists ONLY for backward compatibility */ #define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ /* 0x10000 unused, was RTF_PRCLONING */ /* 0x20000 unused, was RTF_WASCLONED */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ #define RTF_FIXEDMTU 0x80000 /* MTU was explicitly specified */ #define RTF_PINNED 0x100000 /* route is immutable */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ /* 0x8000000 and up unassigned */ #define RTF_STICKY 0x10000000 /* always route dst->src */ #define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */ #define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting with existing routing apps */ /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ #define RTF_FMASK \ (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ RTF_REJECT | RTF_STATIC | RTF_STICKY) /* * fib_ nexthop API flags. */ /* Consumer-visible nexthop info flags */ #define NHF_REJECT 0x0010 /* RTF_REJECT */ #define NHF_BLACKHOLE 0x0020 /* RTF_BLACKHOLE */ #define NHF_REDIRECT 0x0040 /* RTF_DYNAMIC|RTF_MODIFIED */ #define NHF_DEFAULT 0x0080 /* Default route */ #define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */ #define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */ /* Nexthop request flags */ #define NHR_IFAIF 0x01 /* Return ifa_ifp interface */ #define NHR_REF 0x02 /* For future use */ /* Control plane route request flags */ #define NHR_COPY 0x100 /* Copy rte data */ /* rte<>nhop translation */ static inline uint16_t fib_rte_to_nh_flags(int rt_flags) { uint16_t res; res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0; res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0; res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0; res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0; res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0; return (res); } +#ifdef _KERNEL +/* rte<>ro_flags translation */ +static inline void +rt_update_ro_flags(struct route *ro) +{ + int rt_flags = ro->ro_rt->rt_flags; + + ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW); + + ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0; + ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0; + ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0; +} +#endif + /* * Routing statistics. */ struct rtstat { short rts_badredirect; /* bogus redirect calls */ short rts_dynamic; /* routes created by redirects */ short rts_newgateway; /* routes modified by redirects */ short rts_unreach; /* lookups which failed */ short rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_fmask; /* bitmask used in RTM_CHANGE message */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ /* 0x9 */ /* 0xa */ #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ #define RTM_IFANNOUNCE 0x11 /* iface arrival/departure */ #define RTM_IEEE80211 0x12 /* IEEE80211 wireless event */ /* * Bitmask values for rtm_inits and rmx_locks. */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ #define RTV_WEIGHT 0x100 /* init or lock _weight */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_MAX 8 /* size of array to allocate */ typedef int rt_filter_f_t(const struct rtentry *, void *); struct rt_addrinfo { int rti_addrs; /* Route RTF_ flags */ int rti_flags; /* Route RTF_ flags */ struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */ struct ifaddr *rti_ifa; /* value of rt_ifa addr */ struct ifnet *rti_ifp; /* route interface */ rt_filter_f_t *rti_filter; /* filter function */ void *rti_filterdata; /* filter paramenters */ u_long rti_mflags; /* metrics RTV_ flags */ u_long rti_spare; /* Will be used for fib */ struct rt_metrics *rti_rmx; /* Pointer to route metrics */ }; /* * This macro returns the size of a struct sockaddr when passed * through a routing socket. Basically we round up sa_len to * a multiple of sizeof(long), with a minimum of sizeof(long). * The check for a NULL pointer is just a convenience, probably never used. * The case sa_len == 0 should only apply to empty structures. */ #define SA_SIZE(sa) \ ( (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ? \ sizeof(long) : \ 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) ) #define sa_equal(a, b) ( \ (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \ (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0)) #ifdef _KERNEL #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \ || (ifp)->if_link_state == LINK_STATE_UP) #define RT_LOCK_INIT(_rt) \ mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK) #define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) #define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) #define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) #define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) #define RT_UNLOCK_COND(_rt) do { \ if (mtx_owned(&(_rt)->rt_mtx)) \ mtx_unlock(&(_rt)->rt_mtx); \ } while (0) #define RT_ADDREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt >= 0, \ ("negative refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt++; \ } while (0) #define RT_REMREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt > 0, \ ("bogus refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt--; \ } while (0) #define RTFREE_LOCKED(_rt) do { \ if ((_rt)->rt_refcnt <= 1) \ rtfree(_rt); \ else { \ RT_REMREF(_rt); \ RT_UNLOCK(_rt); \ } \ /* guard against invalid refs */ \ _rt = 0; \ } while (0) #define RTFREE(_rt) do { \ RT_LOCK(_rt); \ RTFREE_LOCKED(_rt); \ } while (0) #define RO_RTFREE(_ro) do { \ if ((_ro)->ro_rt) { \ if ((_ro)->ro_flags & RT_NORTREF) { \ (_ro)->ro_flags &= ~RT_NORTREF; \ (_ro)->ro_rt = NULL; \ } else { \ RT_LOCK((_ro)->ro_rt); \ RTFREE_LOCKED((_ro)->ro_rt); \ } \ } \ } while (0) struct radix_node_head *rt_tables_get_rnh(int, int); struct ifmultiaddr; void rt_ieee80211msg(struct ifnet *, int, void *, size_t); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); void rt_missmsg(int, struct rt_addrinfo *, int, int); void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int); void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); void rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int); int rt_addrmsg(int, struct ifaddr *, int); int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); void rt_newmaddrmsg(int, struct ifmultiaddr *); int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); int rtsock_addrmsg(int, struct ifaddr *, int); int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); /* * Note the following locking behavior: * * rtalloc_ign() and rtalloc() return ro->ro_rt unlocked * * rtalloc1() returns a locked rtentry * * rtfree() and RTFREE_LOCKED() require a locked rtentry * * RTFREE() uses an unlocked entry. */ int rt_expunge(struct radix_node_head *, struct rtentry *); void rtfree(struct rtentry *); int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); void rt_updatemtu(struct ifnet *); typedef int rt_walktree_f_t(struct rtentry *, void *); typedef void rt_setwarg_t(struct radix_node_head *, uint32_t, int, void *); void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *); void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg); void rt_flushifroutes(struct ifnet *ifp); /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ /* Thes are used by old code not yet converted to use multiple FIBS */ void rtalloc_ign(struct route *ro, u_long ignflags); void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */ struct rtentry *rtalloc1(struct sockaddr *, int, u_long); int rtinit(struct ifaddr *, int, int); int rtioctl(u_long, caddr_t); void rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *); int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones * but this will change.. */ int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum); void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum); void rtalloc_fib(struct route *ro, u_int fibnum); struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); int rtioctl_fib(u_long, caddr_t, u_int); void rtredirect_fib(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int rtrequest_fib(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); int rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t, struct rt_addrinfo *); void rib_free_info(struct rt_addrinfo *info); -#include -typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); -EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn); #endif #endif Index: projects/clang380-import/sys/net80211/ieee80211_freebsd.c =================================================================== --- projects/clang380-import/sys/net80211/ieee80211_freebsd.c (revision 293686) +++ projects/clang380-import/sys/net80211/ieee80211_freebsd.c (revision 293687) @@ -1,915 +1,916 @@ /*- * Copyright (c) 2003-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IEEE 802.11 support (FreeBSD-specific code) */ #include "opt_wlan.h" #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD, 0, "IEEE 80211 parameters"); #ifdef IEEE80211_DEBUG int ieee80211_debug = 0; SYSCTL_INT(_net_wlan, OID_AUTO, debug, CTLFLAG_RW, &ieee80211_debug, 0, "debugging printfs"); #endif static MALLOC_DEFINE(M_80211_COM, "80211com", "802.11 com state"); static const char wlanname[] = "wlan"; static struct if_clone *wlan_cloner; static int wlan_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct ieee80211_clone_params cp; struct ieee80211vap *vap; struct ieee80211com *ic; int error; error = copyin(params, &cp, sizeof(cp)); if (error) return error; ic = ieee80211_find_com(cp.icp_parent); if (ic == NULL) return ENXIO; if (cp.icp_opmode >= IEEE80211_OPMODE_MAX) { ic_printf(ic, "%s: invalid opmode %d\n", __func__, cp.icp_opmode); return EINVAL; } if ((ic->ic_caps & ieee80211_opcap[cp.icp_opmode]) == 0) { ic_printf(ic, "%s mode not supported\n", ieee80211_opmode_name[cp.icp_opmode]); return EOPNOTSUPP; } if ((cp.icp_flags & IEEE80211_CLONE_TDMA) && #ifdef IEEE80211_SUPPORT_TDMA (ic->ic_caps & IEEE80211_C_TDMA) == 0 #else (1) #endif ) { ic_printf(ic, "TDMA not supported\n"); return EOPNOTSUPP; } vap = ic->ic_vap_create(ic, wlanname, unit, cp.icp_opmode, cp.icp_flags, cp.icp_bssid, cp.icp_flags & IEEE80211_CLONE_MACADDR ? cp.icp_macaddr : ic->ic_macaddr); return (vap == NULL ? EIO : 0); } static void wlan_clone_destroy(struct ifnet *ifp) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; ic->ic_vap_delete(vap); } void ieee80211_vap_destroy(struct ieee80211vap *vap) { CURVNET_SET(vap->iv_ifp->if_vnet); if_clone_destroyif(wlan_cloner, vap->iv_ifp); CURVNET_RESTORE(); } int ieee80211_sysctl_msecs_ticks(SYSCTL_HANDLER_ARGS) { int msecs = ticks_to_msecs(*(int *)arg1); int error, t; error = sysctl_handle_int(oidp, &msecs, 0, req); if (error || !req->newptr) return error; t = msecs_to_ticks(msecs); *(int *)arg1 = (t < 1) ? 1 : t; return 0; } static int ieee80211_sysctl_inact(SYSCTL_HANDLER_ARGS) { int inact = (*(int *)arg1) * IEEE80211_INACT_WAIT; int error; error = sysctl_handle_int(oidp, &inact, 0, req); if (error || !req->newptr) return error; *(int *)arg1 = inact / IEEE80211_INACT_WAIT; return 0; } static int ieee80211_sysctl_parent(SYSCTL_HANDLER_ARGS) { struct ieee80211com *ic = arg1; return SYSCTL_OUT_STR(req, ic->ic_name); } static int ieee80211_sysctl_radar(SYSCTL_HANDLER_ARGS) { struct ieee80211com *ic = arg1; int t = 0, error; error = sysctl_handle_int(oidp, &t, 0, req); if (error || !req->newptr) return error; IEEE80211_LOCK(ic); ieee80211_dfs_notify_radar(ic, ic->ic_curchan); IEEE80211_UNLOCK(ic); return 0; } void ieee80211_sysctl_attach(struct ieee80211com *ic) { } void ieee80211_sysctl_detach(struct ieee80211com *ic) { } void ieee80211_sysctl_vattach(struct ieee80211vap *vap) { struct ifnet *ifp = vap->iv_ifp; struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; char num[14]; /* sufficient for 32 bits */ ctx = (struct sysctl_ctx_list *) IEEE80211_MALLOC(sizeof(struct sysctl_ctx_list), M_DEVBUF, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (ctx == NULL) { if_printf(ifp, "%s: cannot allocate sysctl context!\n", __func__); return; } sysctl_ctx_init(ctx); snprintf(num, sizeof(num), "%u", ifp->if_dunit); oid = SYSCTL_ADD_NODE(ctx, &SYSCTL_NODE_CHILDREN(_net, wlan), OID_AUTO, num, CTLFLAG_RD, NULL, ""); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD, vap->iv_ic, 0, ieee80211_sysctl_parent, "A", "parent device"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "driver_caps", CTLFLAG_RW, &vap->iv_caps, 0, "driver capabilities"); #ifdef IEEE80211_DEBUG vap->iv_debug = ieee80211_debug; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "debug", CTLFLAG_RW, &vap->iv_debug, 0, "control debugging printfs"); #endif SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "bmiss_max", CTLFLAG_RW, &vap->iv_bmiss_max, 0, "consecutive beacon misses before scanning"); /* XXX inherit from tunables */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_run", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_run, 0, ieee80211_sysctl_inact, "I", "station inactivity timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_probe", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_probe, 0, ieee80211_sysctl_inact, "I", "station inactivity probe timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_auth", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_auth, 0, ieee80211_sysctl_inact, "I", "station authentication timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_init", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_init, 0, ieee80211_sysctl_inact, "I", "station initial state timeout (sec)"); if (vap->iv_htcaps & IEEE80211_HTC_HT) { SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_bk", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_BK], 0, "BK traffic tx aggr threshold (pps)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_be", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_BE], 0, "BE traffic tx aggr threshold (pps)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_vo", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_VO], 0, "VO traffic tx aggr threshold (pps)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_vi", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_VI], 0, "VI traffic tx aggr threshold (pps)"); } if (vap->iv_caps & IEEE80211_C_DFS) { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "radar", CTLTYPE_INT | CTLFLAG_RW, vap->iv_ic, 0, ieee80211_sysctl_radar, "I", "simulate radar event"); } vap->iv_sysctl = ctx; vap->iv_oid = oid; } void ieee80211_sysctl_vdetach(struct ieee80211vap *vap) { if (vap->iv_sysctl != NULL) { sysctl_ctx_free(vap->iv_sysctl); IEEE80211_FREE(vap->iv_sysctl, M_DEVBUF); vap->iv_sysctl = NULL; } } int ieee80211_node_dectestref(struct ieee80211_node *ni) { /* XXX need equivalent of atomic_dec_and_test */ atomic_subtract_int(&ni->ni_refcnt, 1); return atomic_cmpset_int(&ni->ni_refcnt, 0, 1); } void ieee80211_drain_ifq(struct ifqueue *ifq) { struct ieee80211_node *ni; struct mbuf *m; for (;;) { IF_DEQUEUE(ifq, m); if (m == NULL) break; ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; KASSERT(ni != NULL, ("frame w/o node")); ieee80211_free_node(ni); m->m_pkthdr.rcvif = NULL; m_freem(m); } } void ieee80211_flush_ifq(struct ifqueue *ifq, struct ieee80211vap *vap) { struct ieee80211_node *ni; struct mbuf *m, **mprev; IF_LOCK(ifq); mprev = &ifq->ifq_head; while ((m = *mprev) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; if (ni != NULL && ni->ni_vap == vap) { *mprev = m->m_nextpkt; /* remove from list */ ifq->ifq_len--; m_freem(m); ieee80211_free_node(ni); /* reclaim ref */ } else mprev = &m->m_nextpkt; } /* recalculate tail ptr */ m = ifq->ifq_head; for (; m != NULL && m->m_nextpkt != NULL; m = m->m_nextpkt) ; ifq->ifq_tail = m; IF_UNLOCK(ifq); } /* * As above, for mbufs allocated with m_gethdr/MGETHDR * or initialized by M_COPY_PKTHDR. */ #define MC_ALIGN(m, len) \ do { \ (m)->m_data += (MCLBYTES - (len)) &~ (sizeof(long) - 1); \ } while (/* CONSTCOND */ 0) /* * Allocate and setup a management frame of the specified * size. We return the mbuf and a pointer to the start * of the contiguous data area that's been reserved based * on the packet length. The data area is forced to 32-bit * alignment and the buffer length to a multiple of 4 bytes. * This is done mainly so beacon frames (that require this) * can use this interface too. */ struct mbuf * ieee80211_getmgtframe(uint8_t **frm, int headroom, int pktlen) { struct mbuf *m; u_int len; /* * NB: we know the mbuf routines will align the data area * so we don't need to do anything special. */ len = roundup2(headroom + pktlen, 4); KASSERT(len <= MCLBYTES, ("802.11 mgt frame too large: %u", len)); if (len < MINCLSIZE) { m = m_gethdr(M_NOWAIT, MT_DATA); /* * Align the data in case additional headers are added. * This should only happen when a WEP header is added * which only happens for shared key authentication mgt * frames which all fit in MHLEN. */ if (m != NULL) M_ALIGN(m, len); } else { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m != NULL) MC_ALIGN(m, len); } if (m != NULL) { m->m_data += headroom; *frm = m->m_data; } return m; } #ifndef __NO_STRICT_ALIGNMENT /* * Re-align the payload in the mbuf. This is mainly used (right now) * to handle IP header alignment requirements on certain architectures. */ struct mbuf * ieee80211_realign(struct ieee80211vap *vap, struct mbuf *m, size_t align) { int pktlen, space; struct mbuf *n; pktlen = m->m_pkthdr.len; space = pktlen + align; if (space < MINCLSIZE) n = m_gethdr(M_NOWAIT, MT_DATA); else { n = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, space <= MCLBYTES ? MCLBYTES : #if MJUMPAGESIZE != MCLBYTES space <= MJUMPAGESIZE ? MJUMPAGESIZE : #endif space <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES); } if (__predict_true(n != NULL)) { m_move_pkthdr(n, m); n->m_data = (caddr_t)(ALIGN(n->m_data + align) - align); m_copydata(m, 0, pktlen, mtod(n, caddr_t)); n->m_len = pktlen; } else { IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY, mtod(m, const struct ieee80211_frame *), NULL, "%s", "no mbuf to realign"); vap->iv_stats.is_rx_badalign++; } m_freem(m); return n; } #endif /* !__NO_STRICT_ALIGNMENT */ int ieee80211_add_callback(struct mbuf *m, void (*func)(struct ieee80211_node *, void *, int), void *arg) { struct m_tag *mtag; struct ieee80211_cb *cb; mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_CALLBACK, sizeof(struct ieee80211_cb), M_NOWAIT); if (mtag == NULL) return 0; cb = (struct ieee80211_cb *)(mtag+1); cb->func = func; cb->arg = arg; m_tag_prepend(m, mtag); m->m_flags |= M_TXCB; return 1; } int ieee80211_add_xmit_params(struct mbuf *m, const struct ieee80211_bpf_params *params) { struct m_tag *mtag; struct ieee80211_tx_params *tx; mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_XMIT_PARAMS, sizeof(struct ieee80211_tx_params), M_NOWAIT); if (mtag == NULL) return (0); tx = (struct ieee80211_tx_params *)(mtag+1); memcpy(&tx->params, params, sizeof(struct ieee80211_bpf_params)); m_tag_prepend(m, mtag); return (1); } int ieee80211_get_xmit_params(struct mbuf *m, struct ieee80211_bpf_params *params) { struct m_tag *mtag; struct ieee80211_tx_params *tx; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_XMIT_PARAMS, NULL); if (mtag == NULL) return (-1); tx = (struct ieee80211_tx_params *)(mtag + 1); memcpy(params, &tx->params, sizeof(struct ieee80211_bpf_params)); return (0); } void ieee80211_process_callback(struct ieee80211_node *ni, struct mbuf *m, int status) { struct m_tag *mtag; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_CALLBACK, NULL); if (mtag != NULL) { struct ieee80211_cb *cb = (struct ieee80211_cb *)(mtag+1); cb->func(ni, cb->arg, status); } } /* * Add RX parameters to the given mbuf. * * Returns 1 if OK, 0 on error. */ int ieee80211_add_rx_params(struct mbuf *m, const struct ieee80211_rx_stats *rxs) { struct m_tag *mtag; struct ieee80211_rx_params *rx; mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_RECV_PARAMS, sizeof(struct ieee80211_rx_stats), M_NOWAIT); if (mtag == NULL) return (0); rx = (struct ieee80211_rx_params *)(mtag + 1); memcpy(&rx->params, rxs, sizeof(*rxs)); m_tag_prepend(m, mtag); return (1); } int ieee80211_get_rx_params(struct mbuf *m, struct ieee80211_rx_stats *rxs) { struct m_tag *mtag; struct ieee80211_rx_params *rx; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_RECV_PARAMS, NULL); if (mtag == NULL) return (-1); rx = (struct ieee80211_rx_params *)(mtag + 1); memcpy(rxs, &rx->params, sizeof(*rxs)); return (0); } /* * Transmit a frame to the parent interface. */ int ieee80211_parent_xmitpkt(struct ieee80211com *ic, struct mbuf *m) { int error; /* * Assert the IC TX lock is held - this enforces the * processing -> queuing order is maintained */ IEEE80211_TX_LOCK_ASSERT(ic); error = ic->ic_transmit(ic, m); if (error) { struct ieee80211_node *ni; ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; /* XXX number of fragments */ if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); ieee80211_free_mbuf(m); } return (error); } /* * Transmit a frame to the VAP interface. */ int ieee80211_vap_xmitpkt(struct ieee80211vap *vap, struct mbuf *m) { struct ifnet *ifp = vap->iv_ifp; /* * When transmitting via the VAP, we shouldn't hold * any IC TX lock as the VAP TX path will acquire it. */ IEEE80211_TX_UNLOCK_ASSERT(vap->iv_ic); return (ifp->if_transmit(ifp, m)); } #include void get_random_bytes(void *p, size_t n) { uint8_t *dp = p; while (n > 0) { uint32_t v = arc4random(); size_t nb = n > sizeof(uint32_t) ? sizeof(uint32_t) : n; bcopy(&v, dp, n > sizeof(uint32_t) ? sizeof(uint32_t) : n); dp += sizeof(uint32_t), n -= nb; } } /* * Helper function for events that pass just a single mac address. */ static void notify_macaddr(struct ifnet *ifp, int op, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ieee80211_join_event iev; CURVNET_SET(ifp->if_vnet); memset(&iev, 0, sizeof(iev)); IEEE80211_ADDR_COPY(iev.iev_addr, mac); rt_ieee80211msg(ifp, op, &iev, sizeof(iev)); CURVNET_RESTORE(); } void ieee80211_notify_node_join(struct ieee80211_node *ni, int newassoc) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; CURVNET_SET_QUIET(ifp->if_vnet); IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%snode join", (ni == vap->iv_bss) ? "bss " : ""); if (ni == vap->iv_bss) { notify_macaddr(ifp, newassoc ? RTM_IEEE80211_ASSOC : RTM_IEEE80211_REASSOC, ni->ni_bssid); if_link_state_change(ifp, LINK_STATE_UP); } else { notify_macaddr(ifp, newassoc ? RTM_IEEE80211_JOIN : RTM_IEEE80211_REJOIN, ni->ni_macaddr); } CURVNET_RESTORE(); } void ieee80211_notify_node_leave(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; CURVNET_SET_QUIET(ifp->if_vnet); IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%snode leave", (ni == vap->iv_bss) ? "bss " : ""); if (ni == vap->iv_bss) { rt_ieee80211msg(ifp, RTM_IEEE80211_DISASSOC, NULL, 0); if_link_state_change(ifp, LINK_STATE_DOWN); } else { /* fire off wireless event station leaving */ notify_macaddr(ifp, RTM_IEEE80211_LEAVE, ni->ni_macaddr); } CURVNET_RESTORE(); } void ieee80211_notify_scan_done(struct ieee80211vap *vap) { struct ifnet *ifp = vap->iv_ifp; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s\n", "notify scan done"); /* dispatch wireless event indicating scan completed */ CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_SCAN, NULL, 0); CURVNET_RESTORE(); } void ieee80211_notify_replay_failure(struct ieee80211vap *vap, const struct ieee80211_frame *wh, const struct ieee80211_key *k, u_int64_t rsc, int tid) { struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, wh->i_addr2, "%s replay detected tid %d ", k->wk_cipher->ic_name, tid, (intmax_t) rsc, (intmax_t) k->wk_keyrsc[tid], k->wk_keyix, k->wk_rxkeyix); if (ifp != NULL) { /* NB: for cipher test modules */ struct ieee80211_replay_event iev; IEEE80211_ADDR_COPY(iev.iev_dst, wh->i_addr1); IEEE80211_ADDR_COPY(iev.iev_src, wh->i_addr2); iev.iev_cipher = k->wk_cipher->ic_cipher; if (k->wk_rxkeyix != IEEE80211_KEYIX_NONE) iev.iev_keyix = k->wk_rxkeyix; else iev.iev_keyix = k->wk_keyix; iev.iev_keyrsc = k->wk_keyrsc[tid]; iev.iev_rsc = rsc; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_REPLAY, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_michael_failure(struct ieee80211vap *vap, const struct ieee80211_frame *wh, u_int keyix) { struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, wh->i_addr2, "michael MIC verification failed ", keyix); vap->iv_stats.is_rx_tkipmic++; if (ifp != NULL) { /* NB: for cipher test modules */ struct ieee80211_michael_event iev; IEEE80211_ADDR_COPY(iev.iev_dst, wh->i_addr1); IEEE80211_ADDR_COPY(iev.iev_src, wh->i_addr2); iev.iev_cipher = IEEE80211_CIPHER_TKIP; iev.iev_keyix = keyix; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_MICHAEL, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_wds_discover(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; notify_macaddr(ifp, RTM_IEEE80211_WDS, ni->ni_macaddr); } void ieee80211_notify_csa(struct ieee80211com *ic, const struct ieee80211_channel *c, int mode, int count) { struct ieee80211_csa_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_flags = c->ic_flags; iev.iev_freq = c->ic_freq; iev.iev_ieee = c->ic_ieee; iev.iev_mode = mode; iev.iev_count = count; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_CSA, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_radar(struct ieee80211com *ic, const struct ieee80211_channel *c) { struct ieee80211_radar_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_flags = c->ic_flags; iev.iev_freq = c->ic_freq; iev.iev_ieee = c->ic_ieee; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_RADAR, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_cac(struct ieee80211com *ic, const struct ieee80211_channel *c, enum ieee80211_notify_cac_event type) { struct ieee80211_cac_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_flags = c->ic_flags; iev.iev_freq = c->ic_freq; iev.iev_ieee = c->ic_ieee; iev.iev_type = type; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_CAC, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_node_deauth(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%s", "node deauth"); notify_macaddr(ifp, RTM_IEEE80211_DEAUTH, ni->ni_macaddr); } void ieee80211_notify_node_auth(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%s", "node auth"); notify_macaddr(ifp, RTM_IEEE80211_AUTH, ni->ni_macaddr); } void ieee80211_notify_country(struct ieee80211vap *vap, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t cc[2]) { struct ifnet *ifp = vap->iv_ifp; struct ieee80211_country_event iev; memset(&iev, 0, sizeof(iev)); IEEE80211_ADDR_COPY(iev.iev_addr, bssid); iev.iev_cc[0] = cc[0]; iev.iev_cc[1] = cc[1]; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_COUNTRY, &iev, sizeof(iev)); CURVNET_RESTORE(); } void ieee80211_notify_radio(struct ieee80211com *ic, int state) { struct ieee80211_radio_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_state = state; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_RADIO, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_load_module(const char *modname) { #ifdef notyet (void)kern_kldload(curthread, modname, NULL); #else printf("%s: load the %s module by hand for now.\n", __func__, modname); #endif } static eventhandler_tag wlan_bpfevent; static void bpf_track(void *arg, struct ifnet *ifp, int dlt, int attach) { /* NB: identify vap's by if_init */ if (dlt == DLT_IEEE802_11_RADIO && ifp->if_init == ieee80211_init) { struct ieee80211vap *vap = ifp->if_softc; /* * Track bpf radiotap listener state. We mark the vap * to indicate if any listener is present and the com * to indicate if any listener exists on any associated * vap. This flag is used by drivers to prepare radiotap * state only when needed. */ if (attach) { ieee80211_syncflag_ext(vap, IEEE80211_FEXT_BPF); if (vap->iv_opmode == IEEE80211_M_MONITOR) atomic_add_int(&vap->iv_ic->ic_montaps, 1); } else if (!bpf_peers_present(vap->iv_rawbpf)) { ieee80211_syncflag_ext(vap, -IEEE80211_FEXT_BPF); if (vap->iv_opmode == IEEE80211_M_MONITOR) atomic_subtract_int(&vap->iv_ic->ic_montaps, 1); } } } /* * Module glue. * * NB: the module name is "wlan" for compatibility with NetBSD. */ static int wlan_modevent(module_t mod, int type, void *unused) { switch (type) { case MOD_LOAD: if (bootverbose) printf("wlan: <802.11 Link Layer>\n"); wlan_bpfevent = EVENTHANDLER_REGISTER(bpf_track, bpf_track, 0, EVENTHANDLER_PRI_ANY); wlan_cloner = if_clone_simple(wlanname, wlan_clone_create, wlan_clone_destroy, 0); return 0; case MOD_UNLOAD: if_clone_detach(wlan_cloner); EVENTHANDLER_DEREGISTER(bpf_track, wlan_bpfevent); return 0; } return EINVAL; } static moduledata_t wlan_mod = { wlanname, wlan_modevent, 0 }; DECLARE_MODULE(wlan, wlan_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); MODULE_VERSION(wlan, 1); MODULE_DEPEND(wlan, ether, 1, 1, 1); #ifdef IEEE80211_ALQ MODULE_DEPEND(wlan, alq, 1, 1, 1); #endif /* IEEE80211_ALQ */ Index: projects/clang380-import/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c =================================================================== --- projects/clang380-import/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c (revision 293686) +++ projects/clang380-import/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c (revision 293687) @@ -1,3582 +1,3582 @@ /* * ng_btsocket_rfcomm.c */ /*- * Copyright (c) 2001-2003 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_rfcomm.c,v 1.28 2003/09/14 23:29:06 max Exp $ * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* MALLOC define */ #ifdef NG_SEPARATE_MALLOC static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_RFCOMM, "netgraph_btsocks_rfcomm", "Netgraph Bluetooth RFCOMM sockets"); #else #define M_NETGRAPH_BTSOCKET_RFCOMM M_NETGRAPH #endif /* NG_SEPARATE_MALLOC */ /* Debug */ #define NG_BTSOCKET_RFCOMM_INFO \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_INFO_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define NG_BTSOCKET_RFCOMM_WARN \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_WARN_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define NG_BTSOCKET_RFCOMM_ERR \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ERR_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define NG_BTSOCKET_RFCOMM_ALERT \ if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \ ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \ printf #define ALOT 0x7fff /* Local prototypes */ static int ng_btsocket_rfcomm_upcall (struct socket *so, void *arg, int waitflag); static void ng_btsocket_rfcomm_sessions_task (void *ctx, int pending); static void ng_btsocket_rfcomm_session_task (ng_btsocket_rfcomm_session_p s); #define ng_btsocket_rfcomm_task_wakeup() \ taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_rfcomm_task) static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_connect_ind (ng_btsocket_rfcomm_session_p s, int channel); static void ng_btsocket_rfcomm_connect_cfm (ng_btsocket_rfcomm_session_p s); static int ng_btsocket_rfcomm_session_create (ng_btsocket_rfcomm_session_p *sp, struct socket *l2so, bdaddr_p src, bdaddr_p dst, struct thread *td); static int ng_btsocket_rfcomm_session_accept (ng_btsocket_rfcomm_session_p s0); static int ng_btsocket_rfcomm_session_connect (ng_btsocket_rfcomm_session_p s); static int ng_btsocket_rfcomm_session_receive (ng_btsocket_rfcomm_session_p s); static int ng_btsocket_rfcomm_session_send (ng_btsocket_rfcomm_session_p s); static void ng_btsocket_rfcomm_session_clean (ng_btsocket_rfcomm_session_p s); static void ng_btsocket_rfcomm_session_process_pcb (ng_btsocket_rfcomm_session_p s); static ng_btsocket_rfcomm_session_p ng_btsocket_rfcomm_session_by_addr (bdaddr_p src, bdaddr_p dst); static int ng_btsocket_rfcomm_receive_frame (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_sabm (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_disc (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_ua (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_dm (ng_btsocket_rfcomm_session_p s, int dlci); static int ng_btsocket_rfcomm_receive_uih (ng_btsocket_rfcomm_session_p s, int dlci, int pf, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_mcc (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_test (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_fc (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_msc (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_rpn (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_rls (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static int ng_btsocket_rfcomm_receive_pn (ng_btsocket_rfcomm_session_p s, struct mbuf *m0); static void ng_btsocket_rfcomm_set_pn (ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr, u_int8_t flow_control, u_int8_t credits, u_int16_t mtu); static int ng_btsocket_rfcomm_send_command (ng_btsocket_rfcomm_session_p s, u_int8_t type, u_int8_t dlci); static int ng_btsocket_rfcomm_send_uih (ng_btsocket_rfcomm_session_p s, u_int8_t address, u_int8_t pf, u_int8_t credits, struct mbuf *data); static int ng_btsocket_rfcomm_send_msc (ng_btsocket_rfcomm_pcb_p pcb); static int ng_btsocket_rfcomm_send_pn (ng_btsocket_rfcomm_pcb_p pcb); static int ng_btsocket_rfcomm_send_credits (ng_btsocket_rfcomm_pcb_p pcb); static int ng_btsocket_rfcomm_pcb_send (ng_btsocket_rfcomm_pcb_p pcb, int limit); static void ng_btsocket_rfcomm_pcb_kill (ng_btsocket_rfcomm_pcb_p pcb, int error); static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_by_dlci (ng_btsocket_rfcomm_session_p s, int dlci); static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_listener (bdaddr_p src, int channel); static void ng_btsocket_rfcomm_timeout (ng_btsocket_rfcomm_pcb_p pcb); static void ng_btsocket_rfcomm_untimeout (ng_btsocket_rfcomm_pcb_p pcb); static void ng_btsocket_rfcomm_process_timeout (void *xpcb); static struct mbuf * ng_btsocket_rfcomm_prepare_packet (struct sockbuf *sb, int length); /* Globals */ extern int ifqmaxlen; static u_int32_t ng_btsocket_rfcomm_debug_level; static u_int32_t ng_btsocket_rfcomm_timo; struct task ng_btsocket_rfcomm_task; static LIST_HEAD(, ng_btsocket_rfcomm_session) ng_btsocket_rfcomm_sessions; static struct mtx ng_btsocket_rfcomm_sessions_mtx; static LIST_HEAD(, ng_btsocket_rfcomm_pcb) ng_btsocket_rfcomm_sockets; static struct mtx ng_btsocket_rfcomm_sockets_mtx; static struct timeval ng_btsocket_rfcomm_lasttime; static int ng_btsocket_rfcomm_curpps; /* Sysctl tree */ SYSCTL_DECL(_net_bluetooth_rfcomm_sockets); static SYSCTL_NODE(_net_bluetooth_rfcomm_sockets, OID_AUTO, stream, CTLFLAG_RW, 0, "Bluetooth STREAM RFCOMM sockets family"); SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, debug_level, CTLFLAG_RW, &ng_btsocket_rfcomm_debug_level, NG_BTSOCKET_INFO_LEVEL, "Bluetooth STREAM RFCOMM sockets debug level"); SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, timeout, CTLFLAG_RW, &ng_btsocket_rfcomm_timo, 60, "Bluetooth STREAM RFCOMM sockets timeout"); /***************************************************************************** ***************************************************************************** ** RFCOMM CRC ***************************************************************************** *****************************************************************************/ static u_int8_t ng_btsocket_rfcomm_crc_table[256] = { 0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75, 0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b, 0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69, 0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67, 0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d, 0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43, 0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51, 0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f, 0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05, 0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b, 0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19, 0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17, 0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d, 0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33, 0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21, 0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f, 0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95, 0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b, 0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89, 0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87, 0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad, 0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3, 0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1, 0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf, 0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5, 0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb, 0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9, 0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7, 0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd, 0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3, 0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1, 0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf }; /* CRC */ static u_int8_t ng_btsocket_rfcomm_crc(u_int8_t *data, int length) { u_int8_t crc = 0xff; while (length --) crc = ng_btsocket_rfcomm_crc_table[crc ^ *data++]; return (crc); } /* ng_btsocket_rfcomm_crc */ /* FCS on 2 bytes */ static u_int8_t ng_btsocket_rfcomm_fcs2(u_int8_t *data) { return (0xff - ng_btsocket_rfcomm_crc(data, 2)); } /* ng_btsocket_rfcomm_fcs2 */ /* FCS on 3 bytes */ static u_int8_t ng_btsocket_rfcomm_fcs3(u_int8_t *data) { return (0xff - ng_btsocket_rfcomm_crc(data, 3)); } /* ng_btsocket_rfcomm_fcs3 */ /* * Check FCS * * From Bluetooth spec * * "... In 07.10, the frame check sequence (FCS) is calculated on different * sets of fields for different frame types. These are the fields that the * FCS are calculated on: * * For SABM, DISC, UA, DM frames: on Address, Control and length field. * For UIH frames: on Address and Control field. * * (This is stated here for clarification, and to set the standard for RFCOMM; * the fields included in FCS calculation have actually changed in version * 7.0.0 of TS 07.10, but RFCOMM will not change the FCS calculation scheme * from the one above.) ..." */ static int ng_btsocket_rfcomm_check_fcs(u_int8_t *data, int type, u_int8_t fcs) { if (type != RFCOMM_FRAME_UIH) return (ng_btsocket_rfcomm_fcs3(data) != fcs); return (ng_btsocket_rfcomm_fcs2(data) != fcs); } /* ng_btsocket_rfcomm_check_fcs */ /***************************************************************************** ***************************************************************************** ** Socket interface ***************************************************************************** *****************************************************************************/ /* * Initialize everything */ void ng_btsocket_rfcomm_init(void) { /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; ng_btsocket_rfcomm_debug_level = NG_BTSOCKET_WARN_LEVEL; ng_btsocket_rfcomm_timo = 60; /* RFCOMM task */ TASK_INIT(&ng_btsocket_rfcomm_task, 0, ng_btsocket_rfcomm_sessions_task, NULL); /* RFCOMM sessions list */ LIST_INIT(&ng_btsocket_rfcomm_sessions); mtx_init(&ng_btsocket_rfcomm_sessions_mtx, "btsocks_rfcomm_sessions_mtx", NULL, MTX_DEF); /* RFCOMM sockets list */ LIST_INIT(&ng_btsocket_rfcomm_sockets); mtx_init(&ng_btsocket_rfcomm_sockets_mtx, "btsocks_rfcomm_sockets_mtx", NULL, MTX_DEF); } /* ng_btsocket_rfcomm_init */ /* * Abort connection on socket */ void ng_btsocket_rfcomm_abort(struct socket *so) { so->so_error = ECONNABORTED; (void)ng_btsocket_rfcomm_disconnect(so); } /* ng_btsocket_rfcomm_abort */ void ng_btsocket_rfcomm_close(struct socket *so) { (void)ng_btsocket_rfcomm_disconnect(so); } /* ng_btsocket_rfcomm_close */ /* * Accept connection on socket. Nothing to do here, socket must be connected * and ready, so just return peer address and be done with it. */ int ng_btsocket_rfcomm_accept(struct socket *so, struct sockaddr **nam) { return (ng_btsocket_rfcomm_peeraddr(so, nam)); } /* ng_btsocket_rfcomm_accept */ /* * Create and attach new socket */ int ng_btsocket_rfcomm_attach(struct socket *so, int proto, struct thread *td) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); int error; /* Check socket and protocol */ if (so->so_type != SOCK_STREAM) return (ESOCKTNOSUPPORT); #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */ if (proto != 0) if (proto != BLUETOOTH_PROTO_RFCOMM) return (EPROTONOSUPPORT); #endif /* XXX */ if (pcb != NULL) return (EISCONN); /* Reserve send and receive space if it is not reserved yet */ if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) { error = soreserve(so, NG_BTSOCKET_RFCOMM_SENDSPACE, NG_BTSOCKET_RFCOMM_RECVSPACE); if (error != 0) return (error); } /* Allocate the PCB */ pcb = malloc(sizeof(*pcb), M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO); if (pcb == NULL) return (ENOMEM); /* Link the PCB and the socket */ so->so_pcb = (caddr_t) pcb; pcb->so = so; /* Initialize PCB */ pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED; pcb->flags = NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->lmodem = pcb->rmodem = (RFCOMM_MODEM_RTC | RFCOMM_MODEM_RTR | RFCOMM_MODEM_DV); pcb->mtu = RFCOMM_DEFAULT_MTU; pcb->tx_cred = 0; pcb->rx_cred = RFCOMM_DEFAULT_CREDITS; mtx_init(&pcb->pcb_mtx, "btsocks_rfcomm_pcb_mtx", NULL, MTX_DEF); callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0); /* Add the PCB to the list */ mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sockets, pcb, next); mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); return (0); } /* ng_btsocket_rfcomm_attach */ /* * Bind socket */ int ng_btsocket_rfcomm_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_rfcomm_pcb_t *pcb = so2rfcomm_pcb(so), *pcb1; struct sockaddr_rfcomm *sa = (struct sockaddr_rfcomm *) nam; if (pcb == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->rfcomm_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->rfcomm_len != sizeof(*sa)) return (EINVAL); if (sa->rfcomm_channel > 30) return (EINVAL); mtx_lock(&pcb->pcb_mtx); if (sa->rfcomm_channel != 0) { mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next) { if (pcb1->channel == sa->rfcomm_channel && bcmp(&pcb1->src, &sa->rfcomm_bdaddr, sizeof(pcb1->src)) == 0) { mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); mtx_unlock(&pcb->pcb_mtx); return (EADDRINUSE); } } mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); } bcopy(&sa->rfcomm_bdaddr, &pcb->src, sizeof(pcb->src)); pcb->channel = sa->rfcomm_channel; mtx_unlock(&pcb->pcb_mtx); return (0); } /* ng_btsocket_rfcomm_bind */ /* * Connect socket */ int ng_btsocket_rfcomm_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_rfcomm_pcb_t *pcb = so2rfcomm_pcb(so); struct sockaddr_rfcomm *sa = (struct sockaddr_rfcomm *) nam; ng_btsocket_rfcomm_session_t *s = NULL; struct socket *l2so = NULL; int dlci, error = 0; if (pcb == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->rfcomm_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->rfcomm_len != sizeof(*sa)) return (EINVAL); if (sa->rfcomm_channel > 30) return (EINVAL); if (sa->rfcomm_channel == 0 || bcmp(&sa->rfcomm_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) return (EDESTADDRREQ); /* * Note that we will not check for errors in socreate() because * if we failed to create L2CAP socket at this point we still * might have already open session. */ error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET, BLUETOOTH_PROTO_L2CAP, td->td_ucred, td); /* * Look for session between "pcb->src" and "sa->rfcomm_bdaddr" (dst) */ mtx_lock(&ng_btsocket_rfcomm_sessions_mtx); s = ng_btsocket_rfcomm_session_by_addr(&pcb->src, &sa->rfcomm_bdaddr); if (s == NULL) { /* * We need to create new RFCOMM session. Check if we have L2CAP * socket. If l2so == NULL then error has the error code from * socreate() */ if (l2so == NULL) { mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); return (error); } error = ng_btsocket_rfcomm_session_create(&s, l2so, &pcb->src, &sa->rfcomm_bdaddr, td); if (error != 0) { mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); soclose(l2so); return (error); } } else if (l2so != NULL) soclose(l2so); /* we don't need new L2CAP socket */ /* * Check if we already have the same DLCI the same session */ mtx_lock(&s->session_mtx); mtx_lock(&pcb->pcb_mtx); dlci = RFCOMM_MKDLCI(!INITIATOR(s), sa->rfcomm_channel); if (ng_btsocket_rfcomm_pcb_by_dlci(s, dlci) != NULL) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&s->session_mtx); mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); return (EBUSY); } /* * Check session state and if its not acceptable then refuse connection */ switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING: case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: case NG_BTSOCKET_RFCOMM_SESSION_OPEN: /* * Update destination address and channel and attach * DLC to the session */ bcopy(&sa->rfcomm_bdaddr, &pcb->dst, sizeof(pcb->dst)); pcb->channel = sa->rfcomm_channel; pcb->dlci = dlci; LIST_INSERT_HEAD(&s->dlcs, pcb, session_next); pcb->session = s; ng_btsocket_rfcomm_timeout(pcb); soisconnecting(pcb->so); if (s->state == NG_BTSOCKET_RFCOMM_SESSION_OPEN) { pcb->mtu = s->mtu; bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src, sizeof(pcb->src)); pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING; error = ng_btsocket_rfcomm_send_pn(pcb); if (error == 0) error = ng_btsocket_rfcomm_task_wakeup(); } else pcb->state = NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT; break; default: error = ECONNRESET; break; } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&s->session_mtx); mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); return (error); } /* ng_btsocket_rfcomm_connect */ /* * Process ioctl's calls on socket. * XXX FIXME this should provide interface to the RFCOMM multiplexor channel */ int ng_btsocket_rfcomm_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { return (EINVAL); } /* ng_btsocket_rfcomm_control */ /* * Process getsockopt/setsockopt system calls */ int ng_btsocket_rfcomm_ctloutput(struct socket *so, struct sockopt *sopt) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); struct ng_btsocket_rfcomm_fc_info fcinfo; int error = 0; if (pcb == NULL) return (EINVAL); if (sopt->sopt_level != SOL_RFCOMM) return (0); mtx_lock(&pcb->pcb_mtx); switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case SO_RFCOMM_MTU: error = sooptcopyout(sopt, &pcb->mtu, sizeof(pcb->mtu)); break; case SO_RFCOMM_FC_INFO: fcinfo.lmodem = pcb->lmodem; fcinfo.rmodem = pcb->rmodem; fcinfo.tx_cred = pcb->tx_cred; fcinfo.rx_cred = pcb->rx_cred; fcinfo.cfc = (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)? 1 : 0; fcinfo.reserved = 0; error = sooptcopyout(sopt, &fcinfo, sizeof(fcinfo)); break; default: error = ENOPROTOOPT; break; } break; case SOPT_SET: switch (sopt->sopt_name) { default: error = ENOPROTOOPT; break; } break; default: error = EINVAL; break; } mtx_unlock(&pcb->pcb_mtx); return (error); } /* ng_btsocket_rfcomm_ctloutput */ /* * Detach and destroy socket */ void ng_btsocket_rfcomm_detach(struct socket *so) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); KASSERT(pcb != NULL, ("ng_btsocket_rfcomm_detach: pcb == NULL")); mtx_lock(&pcb->pcb_mtx); switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTED: /* XXX What to do with pending request? */ if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) ng_btsocket_rfcomm_untimeout(pcb); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT) pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_DETACHED; else pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING; ng_btsocket_rfcomm_task_wakeup(); break; case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: ng_btsocket_rfcomm_task_wakeup(); break; } while (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CLOSED) msleep(&pcb->state, &pcb->pcb_mtx, PZERO, "rf_det", 0); if (pcb->session != NULL) panic("%s: pcb->session != NULL\n", __func__); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) panic("%s: timeout on closed DLC, flags=%#x\n", __func__, pcb->flags); mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_REMOVE(pcb, next); mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); mtx_unlock(&pcb->pcb_mtx); mtx_destroy(&pcb->pcb_mtx); bzero(pcb, sizeof(*pcb)); free(pcb, M_NETGRAPH_BTSOCKET_RFCOMM); soisdisconnected(so); so->so_pcb = NULL; } /* ng_btsocket_rfcomm_detach */ /* * Disconnect socket */ int ng_btsocket_rfcomm_disconnect(struct socket *so) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); if (pcb == NULL) return (EINVAL); mtx_lock(&pcb->pcb_mtx); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING) { mtx_unlock(&pcb->pcb_mtx); return (EINPROGRESS); } /* XXX What to do with pending request? */ if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) ng_btsocket_rfcomm_untimeout(pcb); switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: /* XXX can we get here? */ case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: /* XXX can we get here? */ case NG_BTSOCKET_RFCOMM_DLC_CONNECTED: /* * Just change DLC state and enqueue RFCOMM task. It will * queue and send DISC on the DLC. */ pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING; soisdisconnecting(so); ng_btsocket_rfcomm_task_wakeup(); break; case NG_BTSOCKET_RFCOMM_DLC_CLOSED: case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: break; default: panic("%s: Invalid DLC state=%d, flags=%#x\n", __func__, pcb->state, pcb->flags); break; } mtx_unlock(&pcb->pcb_mtx); return (0); } /* ng_btsocket_rfcomm_disconnect */ /* * Listen on socket. First call to listen() will create listening RFCOMM session */ int ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so), pcb1; ng_btsocket_rfcomm_session_p s = NULL; struct socket *l2so = NULL; int error, socreate_error, usedchannels; if (pcb == NULL) return (EINVAL); if (pcb->channel > 30) return (EADDRNOTAVAIL); usedchannels = 0; mtx_lock(&pcb->pcb_mtx); if (pcb->channel == 0) { mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next) if (pcb1->channel != 0 && bcmp(&pcb1->src, &pcb->src, sizeof(pcb->src)) == 0) usedchannels |= (1 << (pcb1->channel - 1)); for (pcb->channel = 30; pcb->channel > 0; pcb->channel --) if (!(usedchannels & (1 << (pcb->channel - 1)))) break; if (pcb->channel == 0) { mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); mtx_unlock(&pcb->pcb_mtx); return (EADDRNOTAVAIL); } mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); } mtx_unlock(&pcb->pcb_mtx); /* * Note that we will not check for errors in socreate() because * if we failed to create L2CAP socket at this point we still * might have already open session. */ socreate_error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET, BLUETOOTH_PROTO_L2CAP, td->td_ucred, td); /* * Transition the socket and session into the LISTENING state. Check * for collisions first, as there can only be one. */ mtx_lock(&ng_btsocket_rfcomm_sessions_mtx); SOCK_LOCK(so); error = solisten_proto_check(so); SOCK_UNLOCK(so); if (error != 0) goto out; LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) if (s->state == NG_BTSOCKET_RFCOMM_SESSION_LISTENING) break; if (s == NULL) { /* * We need to create default RFCOMM session. Check if we have * L2CAP socket. If l2so == NULL then error has the error code * from socreate() */ if (l2so == NULL) { error = socreate_error; goto out; } /* * Create default listen RFCOMM session. The default RFCOMM * session will listen on ANY address. * * XXX FIXME Note that currently there is no way to adjust MTU * for the default session. */ error = ng_btsocket_rfcomm_session_create(&s, l2so, NG_HCI_BDADDR_ANY, NULL, td); if (error != 0) goto out; l2so = NULL; } SOCK_LOCK(so); solisten_proto(so, backlog); SOCK_UNLOCK(so); out: mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); /* * If we still have an l2so reference here, it's unneeded, so release * it. */ if (l2so != NULL) soclose(l2so); return (error); } /* ng_btsocket_listen */ /* * Get peer address */ int ng_btsocket_rfcomm_peeraddr(struct socket *so, struct sockaddr **nam) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); struct sockaddr_rfcomm sa; if (pcb == NULL) return (EINVAL); bcopy(&pcb->dst, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr)); sa.rfcomm_channel = pcb->channel; sa.rfcomm_len = sizeof(sa); sa.rfcomm_family = AF_BLUETOOTH; *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } /* ng_btsocket_rfcomm_peeraddr */ /* * Send data to socket */ int ng_btsocket_rfcomm_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { ng_btsocket_rfcomm_pcb_t *pcb = so2rfcomm_pcb(so); int error = 0; /* Check socket and input */ if (pcb == NULL || m == NULL || control != NULL) { error = EINVAL; goto drop; } mtx_lock(&pcb->pcb_mtx); /* Make sure DLC is connected */ if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) { mtx_unlock(&pcb->pcb_mtx); error = ENOTCONN; goto drop; } /* Put the packet on the socket's send queue and wakeup RFCOMM task */ - sbappend(&pcb->so->so_snd, m); + sbappend(&pcb->so->so_snd, m, flags); m = NULL; if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_SENDING)) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_SENDING; error = ng_btsocket_rfcomm_task_wakeup(); } mtx_unlock(&pcb->pcb_mtx); drop: NG_FREE_M(m); /* checks for != NULL */ NG_FREE_M(control); return (error); } /* ng_btsocket_rfcomm_send */ /* * Get socket address */ int ng_btsocket_rfcomm_sockaddr(struct socket *so, struct sockaddr **nam) { ng_btsocket_rfcomm_pcb_p pcb = so2rfcomm_pcb(so); struct sockaddr_rfcomm sa; if (pcb == NULL) return (EINVAL); bcopy(&pcb->src, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr)); sa.rfcomm_channel = pcb->channel; sa.rfcomm_len = sizeof(sa); sa.rfcomm_family = AF_BLUETOOTH; *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } /* ng_btsocket_rfcomm_sockaddr */ /* * Upcall function for L2CAP sockets. Enqueue RFCOMM task. */ static int ng_btsocket_rfcomm_upcall(struct socket *so, void *arg, int waitflag) { int error; if (so == NULL) panic("%s: so == NULL\n", __func__); if ((error = ng_btsocket_rfcomm_task_wakeup()) != 0) NG_BTSOCKET_RFCOMM_ALERT( "%s: Could not enqueue RFCOMM task, error=%d\n", __func__, error); return (SU_OK); } /* ng_btsocket_rfcomm_upcall */ /* * RFCOMM task. Will handle all RFCOMM sessions in one pass. * XXX FIXME does not scale very well */ static void ng_btsocket_rfcomm_sessions_task(void *ctx, int pending) { ng_btsocket_rfcomm_session_p s = NULL, s_next = NULL; mtx_lock(&ng_btsocket_rfcomm_sessions_mtx); for (s = LIST_FIRST(&ng_btsocket_rfcomm_sessions); s != NULL; ) { mtx_lock(&s->session_mtx); s_next = LIST_NEXT(s, next); ng_btsocket_rfcomm_session_task(s); if (s->state == NG_BTSOCKET_RFCOMM_SESSION_CLOSED) { /* Unlink and clean the session */ LIST_REMOVE(s, next); NG_BT_MBUFQ_DRAIN(&s->outq); if (!LIST_EMPTY(&s->dlcs)) panic("%s: DLC list is not empty\n", __func__); /* Close L2CAP socket */ SOCKBUF_LOCK(&s->l2so->so_rcv); soupcall_clear(s->l2so, SO_RCV); SOCKBUF_UNLOCK(&s->l2so->so_rcv); SOCKBUF_LOCK(&s->l2so->so_snd); soupcall_clear(s->l2so, SO_SND); SOCKBUF_UNLOCK(&s->l2so->so_snd); soclose(s->l2so); mtx_unlock(&s->session_mtx); mtx_destroy(&s->session_mtx); bzero(s, sizeof(*s)); free(s, M_NETGRAPH_BTSOCKET_RFCOMM); } else mtx_unlock(&s->session_mtx); s = s_next; } mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); } /* ng_btsocket_rfcomm_sessions_task */ /* * Process RFCOMM session. Will handle all RFCOMM sockets in one pass. */ static void ng_btsocket_rfcomm_session_task(ng_btsocket_rfcomm_session_p s) { mtx_assert(&s->session_mtx, MA_OWNED); if (s->l2so->so_rcv.sb_state & SBS_CANTRCVMORE) { NG_BTSOCKET_RFCOMM_INFO( "%s: L2CAP connection has been terminated, so=%p, so_state=%#x, so_count=%d, " \ "state=%d, flags=%#x\n", __func__, s->l2so, s->l2so->so_state, s->l2so->so_count, s->state, s->flags); s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } /* Now process upcall */ switch (s->state) { /* Try to accept new L2CAP connection(s) */ case NG_BTSOCKET_RFCOMM_SESSION_LISTENING: while (ng_btsocket_rfcomm_session_accept(s) == 0) ; break; /* Process the results of the L2CAP connect */ case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING: ng_btsocket_rfcomm_session_process_pcb(s); if (ng_btsocket_rfcomm_session_connect(s) != 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } break; /* Try to receive/send more data */ case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: case NG_BTSOCKET_RFCOMM_SESSION_OPEN: case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING: ng_btsocket_rfcomm_session_process_pcb(s); if (ng_btsocket_rfcomm_session_receive(s) != 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } else if (ng_btsocket_rfcomm_session_send(s) != 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } break; case NG_BTSOCKET_RFCOMM_SESSION_CLOSED: break; default: panic("%s: Invalid session state=%d, flags=%#x\n", __func__, s->state, s->flags); break; } } /* ng_btsocket_rfcomm_session_task */ /* * Process RFCOMM connection indicator. Caller must hold s->session_mtx */ static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_connect_ind(ng_btsocket_rfcomm_session_p s, int channel) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL; ng_btsocket_l2cap_pcb_p l2pcb = NULL; struct socket *so1 = NULL; mtx_assert(&s->session_mtx, MA_OWNED); /* * Try to find RFCOMM socket that listens on given source address * and channel. This will return the best possible match. */ l2pcb = so2l2cap_pcb(s->l2so); pcb = ng_btsocket_rfcomm_pcb_listener(&l2pcb->src, channel); if (pcb == NULL) return (NULL); /* * Check the pending connections queue and if we have space then * create new socket and set proper source and destination address, * and channel. */ mtx_lock(&pcb->pcb_mtx); if (pcb->so->so_qlen <= pcb->so->so_qlimit) { CURVNET_SET(pcb->so->so_vnet); so1 = sonewconn(pcb->so, 0); CURVNET_RESTORE(); } mtx_unlock(&pcb->pcb_mtx); if (so1 == NULL) return (NULL); /* * If we got here than we have created new socket. So complete the * connection. Set source and destination address from the session. */ pcb1 = so2rfcomm_pcb(so1); if (pcb1 == NULL) panic("%s: pcb1 == NULL\n", __func__); mtx_lock(&pcb1->pcb_mtx); bcopy(&l2pcb->src, &pcb1->src, sizeof(pcb1->src)); bcopy(&l2pcb->dst, &pcb1->dst, sizeof(pcb1->dst)); pcb1->channel = channel; /* Link new DLC to the session. We already hold s->session_mtx */ LIST_INSERT_HEAD(&s->dlcs, pcb1, session_next); pcb1->session = s; mtx_unlock(&pcb1->pcb_mtx); return (pcb1); } /* ng_btsocket_rfcomm_connect_ind */ /* * Process RFCOMM connect confirmation. Caller must hold s->session_mtx. */ static void ng_btsocket_rfcomm_connect_cfm(ng_btsocket_rfcomm_session_p s) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb_next = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* * Wake up all waiting sockets and send PN request for each of them. * Note that timeout already been set in ng_btsocket_rfcomm_connect() * * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill * will unlink DLC from the session */ for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, session_next); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT) { pcb->mtu = s->mtu; bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src, sizeof(pcb->src)); error = ng_btsocket_rfcomm_send_pn(pcb); if (error == 0) pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING; else ng_btsocket_rfcomm_pcb_kill(pcb, error); } mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } } /* ng_btsocket_rfcomm_connect_cfm */ /***************************************************************************** ***************************************************************************** ** RFCOMM sessions ***************************************************************************** *****************************************************************************/ /* * Create new RFCOMM session. That function WILL NOT take ownership over l2so. * Caller MUST free l2so if function failed. */ static int ng_btsocket_rfcomm_session_create(ng_btsocket_rfcomm_session_p *sp, struct socket *l2so, bdaddr_p src, bdaddr_p dst, struct thread *td) { ng_btsocket_rfcomm_session_p s = NULL; struct sockaddr_l2cap l2sa; struct sockopt l2sopt; int error; u_int16_t mtu; mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED); /* Allocate the RFCOMM session */ s = malloc(sizeof(*s), M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO); if (s == NULL) return (ENOMEM); /* Set defaults */ s->mtu = RFCOMM_DEFAULT_MTU; s->flags = 0; s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; NG_BT_MBUFQ_INIT(&s->outq, ifqmaxlen); /* * XXX Mark session mutex as DUPOK to prevent "duplicated lock of * the same type" message. When accepting new L2CAP connection * ng_btsocket_rfcomm_session_accept() holds both session mutexes * for "old" (accepting) session and "new" (created) session. */ mtx_init(&s->session_mtx, "btsocks_rfcomm_session_mtx", NULL, MTX_DEF|MTX_DUPOK); LIST_INIT(&s->dlcs); /* Prepare L2CAP socket */ SOCKBUF_LOCK(&l2so->so_rcv); soupcall_set(l2so, SO_RCV, ng_btsocket_rfcomm_upcall, NULL); SOCKBUF_UNLOCK(&l2so->so_rcv); SOCKBUF_LOCK(&l2so->so_snd); soupcall_set(l2so, SO_SND, ng_btsocket_rfcomm_upcall, NULL); SOCKBUF_UNLOCK(&l2so->so_snd); l2so->so_state |= SS_NBIO; s->l2so = l2so; mtx_lock(&s->session_mtx); /* * "src" == NULL and "dst" == NULL means just create session. * caller must do the rest */ if (src == NULL && dst == NULL) goto done; /* * Set incoming MTU on L2CAP socket. It is RFCOMM session default MTU * plus 5 bytes: RFCOMM frame header, one extra byte for length and one * extra byte for credits. */ mtu = s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1; l2sopt.sopt_dir = SOPT_SET; l2sopt.sopt_level = SOL_L2CAP; l2sopt.sopt_name = SO_L2CAP_IMTU; l2sopt.sopt_val = (void *) &mtu; l2sopt.sopt_valsize = sizeof(mtu); l2sopt.sopt_td = NULL; error = sosetopt(s->l2so, &l2sopt); if (error != 0) goto bad; /* Bind socket to "src" address */ l2sa.l2cap_len = sizeof(l2sa); l2sa.l2cap_family = AF_BLUETOOTH; l2sa.l2cap_psm = (dst == NULL)? htole16(NG_L2CAP_PSM_RFCOMM) : 0; bcopy(src, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr)); l2sa.l2cap_cid = 0; l2sa.l2cap_bdaddr_type = BDADDR_BREDR; error = sobind(s->l2so, (struct sockaddr *) &l2sa, td); if (error != 0) goto bad; /* If "dst" is not NULL then initiate connect(), otherwise listen() */ if (dst == NULL) { s->flags = 0; s->state = NG_BTSOCKET_RFCOMM_SESSION_LISTENING; error = solisten(s->l2so, 10, td); if (error != 0) goto bad; } else { s->flags = NG_BTSOCKET_RFCOMM_SESSION_INITIATOR; s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTING; l2sa.l2cap_len = sizeof(l2sa); l2sa.l2cap_family = AF_BLUETOOTH; l2sa.l2cap_psm = htole16(NG_L2CAP_PSM_RFCOMM); bcopy(dst, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr)); l2sa.l2cap_cid = 0; l2sa.l2cap_bdaddr_type = BDADDR_BREDR; error = soconnect(s->l2so, (struct sockaddr *) &l2sa, td); if (error != 0) goto bad; } done: LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sessions, s, next); *sp = s; mtx_unlock(&s->session_mtx); return (0); bad: mtx_unlock(&s->session_mtx); /* Return L2CAP socket back to its original state */ SOCKBUF_LOCK(&l2so->so_rcv); soupcall_clear(s->l2so, SO_RCV); SOCKBUF_UNLOCK(&l2so->so_rcv); SOCKBUF_LOCK(&l2so->so_snd); soupcall_clear(s->l2so, SO_SND); SOCKBUF_UNLOCK(&l2so->so_snd); l2so->so_state &= ~SS_NBIO; mtx_destroy(&s->session_mtx); bzero(s, sizeof(*s)); free(s, M_NETGRAPH_BTSOCKET_RFCOMM); return (error); } /* ng_btsocket_rfcomm_session_create */ /* * Process accept() on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0) { struct socket *l2so = NULL; struct sockaddr_l2cap *l2sa = NULL; ng_btsocket_l2cap_pcb_t *l2pcb = NULL; ng_btsocket_rfcomm_session_p s = NULL; int error = 0; mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED); mtx_assert(&s0->session_mtx, MA_OWNED); /* Check if there is a complete L2CAP connection in the queue */ if ((error = s0->l2so->so_error) != 0) { NG_BTSOCKET_RFCOMM_ERR( "%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error); s0->l2so->so_error = 0; return (error); } ACCEPT_LOCK(); if (TAILQ_EMPTY(&s0->l2so->so_comp)) { ACCEPT_UNLOCK(); if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE) return (ECONNABORTED); return (EWOULDBLOCK); } /* Accept incoming L2CAP connection */ l2so = TAILQ_FIRST(&s0->l2so->so_comp); if (l2so == NULL) panic("%s: l2so == NULL\n", __func__); TAILQ_REMOVE(&s0->l2so->so_comp, l2so, so_list); s0->l2so->so_qlen --; l2so->so_qstate &= ~SQ_COMP; l2so->so_head = NULL; SOCK_LOCK(l2so); soref(l2so); l2so->so_state |= SS_NBIO; SOCK_UNLOCK(l2so); ACCEPT_UNLOCK(); error = soaccept(l2so, (struct sockaddr **) &l2sa); if (error != 0) { NG_BTSOCKET_RFCOMM_ERR( "%s: soaccept() on L2CAP socket failed, error=%d\n", __func__, error); soclose(l2so); return (error); } /* * Check if there is already active RFCOMM session between two devices. * If so then close L2CAP connection. We only support one RFCOMM session * between each pair of devices. Note that here we assume session in any * state. The session even could be in the middle of disconnecting. */ l2pcb = so2l2cap_pcb(l2so); s = ng_btsocket_rfcomm_session_by_addr(&l2pcb->src, &l2pcb->dst); if (s == NULL) { /* Create a new RFCOMM session */ error = ng_btsocket_rfcomm_session_create(&s, l2so, NULL, NULL, curthread /* XXX */); if (error == 0) { mtx_lock(&s->session_mtx); s->flags = 0; s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED; /* * Adjust MTU on incomming connection. Reserve 5 bytes: * RFCOMM frame header, one extra byte for length and * one extra byte for credits. */ s->mtu = min(l2pcb->imtu, l2pcb->omtu) - sizeof(struct rfcomm_frame_hdr) - 1 - 1; mtx_unlock(&s->session_mtx); } else { NG_BTSOCKET_RFCOMM_ALERT( "%s: Failed to create new RFCOMM session, error=%d\n", __func__, error); soclose(l2so); } } else { NG_BTSOCKET_RFCOMM_WARN( "%s: Rejecting duplicating RFCOMM session between src=%x:%x:%x:%x:%x:%x and " \ "dst=%x:%x:%x:%x:%x:%x, state=%d, flags=%#x\n", __func__, l2pcb->src.b[5], l2pcb->src.b[4], l2pcb->src.b[3], l2pcb->src.b[2], l2pcb->src.b[1], l2pcb->src.b[0], l2pcb->dst.b[5], l2pcb->dst.b[4], l2pcb->dst.b[3], l2pcb->dst.b[2], l2pcb->dst.b[1], l2pcb->dst.b[0], s->state, s->flags); error = EBUSY; soclose(l2so); } return (error); } /* ng_btsocket_rfcomm_session_accept */ /* * Process connect() on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_connect(ng_btsocket_rfcomm_session_p s) { ng_btsocket_l2cap_pcb_p l2pcb = so2l2cap_pcb(s->l2so); int error; mtx_assert(&s->session_mtx, MA_OWNED); /* First check if connection has failed */ if ((error = s->l2so->so_error) != 0) { s->l2so->so_error = 0; NG_BTSOCKET_RFCOMM_ERR( "%s: Could not connect RFCOMM session, error=%d, state=%d, flags=%#x\n", __func__, error, s->state, s->flags); return (error); } /* Is connection still in progress? */ if (s->l2so->so_state & SS_ISCONNECTING) return (0); /* * If we got here then we are connected. Send SABM on DLCI 0 to * open multiplexor channel. */ if (error == 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED; /* * Adjust MTU on outgoing connection. Reserve 5 bytes: RFCOMM * frame header, one extra byte for length and one extra byte * for credits. */ s->mtu = min(l2pcb->imtu, l2pcb->omtu) - sizeof(struct rfcomm_frame_hdr) - 1 - 1; error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_SABM,0); if (error == 0) error = ng_btsocket_rfcomm_task_wakeup(); } return (error); }/* ng_btsocket_rfcomm_session_connect */ /* * Receive data on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_receive(ng_btsocket_rfcomm_session_p s) { struct mbuf *m = NULL; struct uio uio; int more, flags, error; mtx_assert(&s->session_mtx, MA_OWNED); /* Can we read from the L2CAP socket? */ if (!soreadable(s->l2so)) return (0); /* First check for error on L2CAP socket */ if ((error = s->l2so->so_error) != 0) { s->l2so->so_error = 0; NG_BTSOCKET_RFCOMM_ERR( "%s: Could not receive data from L2CAP socket, error=%d, state=%d, flags=%#x\n", __func__, error, s->state, s->flags); return (error); } /* * Read all packets from the L2CAP socket. * XXX FIXME/VERIFY is that correct? For now use m->m_nextpkt as * indication that there is more packets on the socket's buffer. * Also what should we use in uio.uio_resid? * May be s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1? */ for (more = 1; more; ) { /* Try to get next packet from socket */ bzero(&uio, sizeof(uio)); /* uio.uio_td = NULL; */ uio.uio_resid = 1000000000; flags = MSG_DONTWAIT; m = NULL; error = soreceive(s->l2so, NULL, &uio, &m, (struct mbuf **) NULL, &flags); if (error != 0) { if (error == EWOULDBLOCK) return (0); /* XXX can happen? */ NG_BTSOCKET_RFCOMM_ERR( "%s: Could not receive data from L2CAP socket, error=%d\n", __func__, error); return (error); } more = (m->m_nextpkt != NULL); m->m_nextpkt = NULL; ng_btsocket_rfcomm_receive_frame(s, m); } return (0); } /* ng_btsocket_rfcomm_session_receive */ /* * Send data on RFCOMM session * XXX FIXME locking for "l2so"? */ static int ng_btsocket_rfcomm_session_send(ng_btsocket_rfcomm_session_p s) { struct mbuf *m = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* Send as much as we can from the session queue */ while (sowriteable(s->l2so)) { /* Check if socket still OK */ if ((error = s->l2so->so_error) != 0) { s->l2so->so_error = 0; NG_BTSOCKET_RFCOMM_ERR( "%s: Detected error=%d on L2CAP socket, state=%d, flags=%#x\n", __func__, error, s->state, s->flags); return (error); } NG_BT_MBUFQ_DEQUEUE(&s->outq, m); if (m == NULL) return (0); /* we are done */ /* Call send function on the L2CAP socket */ error = (*s->l2so->so_proto->pr_usrreqs->pru_send)(s->l2so, 0, m, NULL, NULL, curthread /* XXX */); if (error != 0) { NG_BTSOCKET_RFCOMM_ERR( "%s: Could not send data to L2CAP socket, error=%d\n", __func__, error); return (error); } } return (0); } /* ng_btsocket_rfcomm_session_send */ /* * Close and disconnect all DLCs for the given session. Caller must hold * s->sesson_mtx. Will wakeup session. */ static void ng_btsocket_rfcomm_session_clean(ng_btsocket_rfcomm_session_p s) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb_next = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill * will unlink DLC from the session */ for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, session_next); NG_BTSOCKET_RFCOMM_INFO( "%s: Disconnecting dlci=%d, state=%d, flags=%#x\n", __func__, pcb->dlci, pcb->state, pcb->flags); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED) error = ECONNRESET; else error = ECONNREFUSED; ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } } /* ng_btsocket_rfcomm_session_clean */ /* * Process all DLCs on the session. Caller MUST hold s->session_mtx. */ static void ng_btsocket_rfcomm_session_process_pcb(ng_btsocket_rfcomm_session_p s) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb_next = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); /* * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill * will unlink DLC from the session */ for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, session_next); switch (pcb->state) { /* * If DLC in W4_CONNECT state then we should check for both * timeout and detach. */ case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_DETACHED) ng_btsocket_rfcomm_pcb_kill(pcb, 0); else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT) ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT); break; /* * If DLC in CONFIGURING or CONNECTING state then we only * should check for timeout. If detach() was called then * DLC will be moved into DISCONNECTING state. */ case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT) ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT); break; /* * If DLC in CONNECTED state then we need to send data (if any) * from the socket's send queue. Note that we will send data * from either all sockets or none. This may overload session's * outgoing queue (but we do not check for that). * * XXX FIXME need scheduler for RFCOMM sockets */ case NG_BTSOCKET_RFCOMM_DLC_CONNECTED: error = ng_btsocket_rfcomm_pcb_send(pcb, ALOT); if (error != 0) ng_btsocket_rfcomm_pcb_kill(pcb, error); break; /* * If DLC in DISCONNECTING state then we must send DISC frame. * Note that if DLC has timeout set then we do not need to * resend DISC frame. * * XXX FIXME need to drain all data from the socket's queue * if LINGER option was set */ case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) { error = ng_btsocket_rfcomm_send_command( pcb->session, RFCOMM_FRAME_DISC, pcb->dlci); if (error == 0) ng_btsocket_rfcomm_timeout(pcb); else ng_btsocket_rfcomm_pcb_kill(pcb, error); } else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT) ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT); break; /* case NG_BTSOCKET_RFCOMM_DLC_CLOSED: */ default: panic("%s: Invalid DLC state=%d, flags=%#x\n", __func__, pcb->state, pcb->flags); break; } mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } } /* ng_btsocket_rfcomm_session_process_pcb */ /* * Find RFCOMM session between "src" and "dst". * Caller MUST hold ng_btsocket_rfcomm_sessions_mtx. */ static ng_btsocket_rfcomm_session_p ng_btsocket_rfcomm_session_by_addr(bdaddr_p src, bdaddr_p dst) { ng_btsocket_rfcomm_session_p s = NULL; ng_btsocket_l2cap_pcb_p l2pcb = NULL; int any_src; mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED); any_src = (bcmp(src, NG_HCI_BDADDR_ANY, sizeof(*src)) == 0); LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) { l2pcb = so2l2cap_pcb(s->l2so); if ((any_src || bcmp(&l2pcb->src, src, sizeof(*src)) == 0) && bcmp(&l2pcb->dst, dst, sizeof(*dst)) == 0) break; } return (s); } /* ng_btsocket_rfcomm_session_by_addr */ /***************************************************************************** ***************************************************************************** ** RFCOMM ***************************************************************************** *****************************************************************************/ /* * Process incoming RFCOMM frame. Caller must hold s->session_mtx. * XXX FIXME check frame length */ static int ng_btsocket_rfcomm_receive_frame(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_frame_hdr *hdr = NULL; struct mbuf *m = NULL; u_int16_t length; u_int8_t dlci, type; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); /* Pullup as much as we can into first mbuf (for direct access) */ length = min(m0->m_pkthdr.len, MHLEN); if (m0->m_len < length) { if ((m0 = m_pullup(m0, length)) == NULL) { NG_BTSOCKET_RFCOMM_ALERT( "%s: m_pullup(%d) failed\n", __func__, length); return (ENOBUFS); } } hdr = mtod(m0, struct rfcomm_frame_hdr *); dlci = RFCOMM_DLCI(hdr->address); type = RFCOMM_TYPE(hdr->control); /* Test EA bit in length. If not set then we have 2 bytes of length */ if (!RFCOMM_EA(hdr->length)) { bcopy(&hdr->length, &length, sizeof(length)); length = le16toh(length) >> 1; m_adj(m0, sizeof(*hdr) + 1); } else { length = hdr->length >> 1; m_adj(m0, sizeof(*hdr)); } NG_BTSOCKET_RFCOMM_INFO( "%s: Got frame type=%#x, dlci=%d, length=%d, cr=%d, pf=%d, len=%d\n", __func__, type, dlci, length, RFCOMM_CR(hdr->address), RFCOMM_PF(hdr->control), m0->m_pkthdr.len); /* * Get FCS (the last byte in the frame) * XXX this will not work if mbuf chain ends with empty mbuf. * XXX let's hope it never happens :) */ for (m = m0; m->m_next != NULL; m = m->m_next) ; if (m->m_len <= 0) panic("%s: Empty mbuf at the end of the chain, len=%d\n", __func__, m->m_len); /* * Check FCS. We only need to calculate FCS on first 2 or 3 bytes * and already m_pullup'ed mbuf chain, so it should be safe. */ if (ng_btsocket_rfcomm_check_fcs((u_int8_t *) hdr, type, m->m_data[m->m_len - 1])) { NG_BTSOCKET_RFCOMM_ERR( "%s: Invalid RFCOMM packet. Bad checksum\n", __func__); NG_FREE_M(m0); return (EINVAL); } m_adj(m0, -1); /* Trim FCS byte */ /* * Process RFCOMM frame. * * From TS 07.10 spec * * "... In the case where a SABM or DISC command with the P bit set * to 0 is received then the received frame shall be discarded..." * * "... If a unsolicited DM response is received then the frame shall * be processed irrespective of the P/F setting... " * * "... The station may transmit response frames with the F bit set * to 0 at any opportunity on an asynchronous basis. However, in the * case where a UA response is received with the F bit set to 0 then * the received frame shall be discarded..." * * From Bluetooth spec * * "... When credit based flow control is being used, the meaning of * the P/F bit in the control field of the RFCOMM header is redefined * for UIH frames..." */ switch (type) { case RFCOMM_FRAME_SABM: if (RFCOMM_PF(hdr->control)) error = ng_btsocket_rfcomm_receive_sabm(s, dlci); break; case RFCOMM_FRAME_DISC: if (RFCOMM_PF(hdr->control)) error = ng_btsocket_rfcomm_receive_disc(s, dlci); break; case RFCOMM_FRAME_UA: if (RFCOMM_PF(hdr->control)) error = ng_btsocket_rfcomm_receive_ua(s, dlci); break; case RFCOMM_FRAME_DM: error = ng_btsocket_rfcomm_receive_dm(s, dlci); break; case RFCOMM_FRAME_UIH: if (dlci == 0) error = ng_btsocket_rfcomm_receive_mcc(s, m0); else error = ng_btsocket_rfcomm_receive_uih(s, dlci, RFCOMM_PF(hdr->control), m0); return (error); /* NOT REACHED */ default: NG_BTSOCKET_RFCOMM_ERR( "%s: Invalid RFCOMM packet. Unknown type=%#x\n", __func__, type); error = EINVAL; break; } NG_FREE_M(m0); return (error); } /* ng_btsocket_rfcomm_receive_frame */ /* * Process RFCOMM SABM frame */ static int ng_btsocket_rfcomm_receive_sabm(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got SABM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* DLCI == 0 means open multiplexor channel */ if (dlci == 0) { switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: case NG_BTSOCKET_RFCOMM_SESSION_OPEN: error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, dlci); if (error == 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN; ng_btsocket_rfcomm_connect_cfm(s); } else { s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } break; default: NG_BTSOCKET_RFCOMM_WARN( "%s: Got SABM for session in invalid state state=%d, flags=%#x\n", __func__, s->state, s->flags); error = EINVAL; break; } return (error); } /* Make sure multiplexor channel is open */ if (s->state != NG_BTSOCKET_RFCOMM_SESSION_OPEN) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got SABM for dlci=%d with mulitplexor channel closed, state=%d, " \ "flags=%#x\n", __func__, dlci, s->state, s->flags); return (EINVAL); } /* * Check if we have this DLCI. This might happen when remote * peer uses PN command before actual open (SABM) happens. */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got SABM for dlci=%d in invalid state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); mtx_unlock(&pcb->pcb_mtx); return (ENOENT); } ng_btsocket_rfcomm_untimeout(pcb); error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci); if (error == 0) error = ng_btsocket_rfcomm_send_msc(pcb); if (error == 0) { pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED; soisconnected(pcb->so); } else ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); return (error); } /* * We do not have requested DLCI, so it must be an incoming connection * with default parameters. Try to accept it. */ pcb = ng_btsocket_rfcomm_connect_ind(s, RFCOMM_SRVCHANNEL(dlci)); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); pcb->dlci = dlci; error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci); if (error == 0) error = ng_btsocket_rfcomm_send_msc(pcb); if (error == 0) { pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED; soisconnected(pcb->so); } else ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); } else /* Nobody is listen()ing on the requested DLCI */ error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci); return (error); } /* ng_btsocket_rfcomm_receive_sabm */ /* * Process RFCOMM DISC frame */ static int ng_btsocket_rfcomm_receive_disc(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DISC, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* DLCI == 0 means close multiplexor channel */ if (dlci == 0) { /* XXX FIXME assume that remote side will close the socket */ error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, 0); if (error == 0) { if (s->state == NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING) s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */ else s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING; } else s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */ ng_btsocket_rfcomm_session_clean(s); } else { pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { int err; mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DISC for dlci=%d, state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, dlci); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED) err = 0; else err = ECONNREFUSED; ng_btsocket_rfcomm_pcb_kill(pcb, err); mtx_unlock(&pcb->pcb_mtx); } else { NG_BTSOCKET_RFCOMM_WARN( "%s: Got DISC for non-existing dlci=%d\n", __func__, dlci); error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_DM, dlci); } } return (error); } /* ng_btsocket_rfcomm_receive_disc */ /* * Process RFCOMM UA frame */ static int ng_btsocket_rfcomm_receive_ua(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got UA, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* dlci == 0 means multiplexor channel */ if (dlci == 0) { switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN; ng_btsocket_rfcomm_connect_cfm(s); break; case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING: s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); break; default: NG_BTSOCKET_RFCOMM_WARN( "%s: Got UA for session in invalid state=%d(%d), flags=%#x, mtu=%d\n", __func__, s->state, INITIATOR(s), s->flags, s->mtu); error = ENOENT; break; } return (error); } /* Check if we have this DLCI */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_RFCOMM_INFO( "%s: Got UA for dlci=%d, state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: ng_btsocket_rfcomm_untimeout(pcb); error = ng_btsocket_rfcomm_send_msc(pcb); if (error == 0) { pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED; soisconnected(pcb->so); } break; case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: ng_btsocket_rfcomm_pcb_kill(pcb, 0); break; default: NG_BTSOCKET_RFCOMM_WARN( "%s: Got UA for dlci=%d in invalid state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); error = ENOENT; break; } mtx_unlock(&pcb->pcb_mtx); } else { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UA for non-existing dlci=%d\n", __func__, dlci); error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci); } return (error); } /* ng_btsocket_rfcomm_receive_ua */ /* * Process RFCOMM DM frame */ static int ng_btsocket_rfcomm_receive_dm(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, s->state, s->flags, s->mtu, dlci); /* DLCI == 0 means multiplexor channel */ if (dlci == 0) { /* Disconnect all dlc's on the session */ s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; ng_btsocket_rfcomm_session_clean(s); } else { pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_RFCOMM_INFO( "%s: Got DM for dlci=%d, state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED) error = ECONNRESET; else error = ECONNREFUSED; ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); } else NG_BTSOCKET_RFCOMM_WARN( "%s: Got DM for non-existing dlci=%d\n", __func__, dlci); } return (0); } /* ng_btsocket_rfcomm_receive_dm */ /* * Process RFCOMM UIH frame (data) */ static int ng_btsocket_rfcomm_receive_uih(ng_btsocket_rfcomm_session_p s, int dlci, int pf, struct mbuf *m0) { ng_btsocket_rfcomm_pcb_p pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got UIH, session state=%d, flags=%#x, mtu=%d, dlci=%d, pf=%d, len=%d\n", __func__, s->state, s->flags, s->mtu, dlci, pf, m0->m_pkthdr.len); /* XXX should we do it here? Check for session flow control */ if (s->flags & NG_BTSOCKET_RFCOMM_SESSION_LFC) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UIH with session flow control asserted, state=%d, flags=%#x\n", __func__, s->state, s->flags); goto drop; } /* Check if we have this dlci */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci); if (pcb == NULL) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UIH for non-existing dlci=%d\n", __func__, dlci); error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci); goto drop; } mtx_lock(&pcb->pcb_mtx); /* Check dlci state */ if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got UIH for dlci=%d in invalid state=%d, flags=%#x\n", __func__, dlci, pcb->state, pcb->flags); error = EINVAL; goto drop1; } /* Check dlci flow control */ if (((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pcb->rx_cred <= 0) || (pcb->lmodem & RFCOMM_MODEM_FC)) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got UIH for dlci=%d with asserted flow control, state=%d, " \ "flags=%#x, rx_cred=%d, lmodem=%#x\n", __func__, dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->lmodem); goto drop1; } /* Did we get any credits? */ if ((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pf) { NG_BTSOCKET_RFCOMM_INFO( "%s: Got %d more credits for dlci=%d, state=%d, flags=%#x, " \ "rx_cred=%d, tx_cred=%d\n", __func__, *mtod(m0, u_int8_t *), dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->tx_cred); pcb->tx_cred += *mtod(m0, u_int8_t *); m_adj(m0, 1); /* Send more from the DLC. XXX check for errors? */ ng_btsocket_rfcomm_pcb_send(pcb, ALOT); } /* OK the of the rest of the mbuf is the data */ if (m0->m_pkthdr.len > 0) { /* If we are using credit flow control decrease rx_cred here */ if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { /* Give remote peer more credits (if needed) */ if (-- pcb->rx_cred <= RFCOMM_MAX_CREDITS / 2) ng_btsocket_rfcomm_send_credits(pcb); else NG_BTSOCKET_RFCOMM_INFO( "%s: Remote side still has credits, dlci=%d, state=%d, flags=%#x, " \ "rx_cred=%d, tx_cred=%d\n", __func__, dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->tx_cred); } /* Check packet against mtu on dlci */ if (m0->m_pkthdr.len > pcb->mtu) { NG_BTSOCKET_RFCOMM_ERR( "%s: Got oversized UIH for dlci=%d, state=%d, flags=%#x, mtu=%d, len=%d\n", __func__, dlci, pcb->state, pcb->flags, pcb->mtu, m0->m_pkthdr.len); error = EMSGSIZE; } else if (m0->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) { /* * This is really bad. Receive queue on socket does * not have enough space for the packet. We do not * have any other choice but drop the packet. */ NG_BTSOCKET_RFCOMM_ERR( "%s: Not enough space in socket receive queue. Dropping UIH for dlci=%d, " \ "state=%d, flags=%#x, len=%d, space=%ld\n", __func__, dlci, pcb->state, pcb->flags, m0->m_pkthdr.len, sbspace(&pcb->so->so_rcv)); error = ENOBUFS; } else { /* Append packet to the socket receive queue */ - sbappend(&pcb->so->so_rcv, m0); + sbappend(&pcb->so->so_rcv, m0, 0); m0 = NULL; sorwakeup(pcb->so); } } drop1: mtx_unlock(&pcb->pcb_mtx); drop: NG_FREE_M(m0); /* checks for != NULL */ return (error); } /* ng_btsocket_rfcomm_receive_uih */ /* * Process RFCOMM MCC command (Multiplexor) * * From TS 07.10 spec * * "5.4.3.1 Information Data * * ...The frames (UIH) sent by the initiating station have the C/R bit set * to 1 and those sent by the responding station have the C/R bit set to 0..." * * "5.4.6.2 Operating procedures * * Messages always exist in pairs; a command message and a corresponding * response message. If the C/R bit is set to 1 the message is a command, * if it is set to 0 the message is a response... * * ... * * NOTE: Notice that when UIH frames are used to convey information on DLCI 0 * there are at least two different fields that contain a C/R bit, and the * bits are set of different form. The C/R bit in the Type field shall be set * as it is stated above, while the C/R bit in the Address field (see subclause * 5.2.1.2) shall be set as it is described in subclause 5.4.3.1." */ static int ng_btsocket_rfcomm_receive_mcc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = NULL; u_int8_t cr, type, length; mtx_assert(&s->session_mtx, MA_OWNED); /* * We can access data directly in the first mbuf, because we have * m_pullup()'ed mbuf chain in ng_btsocket_rfcomm_receive_frame(). * All MCC commands should fit into single mbuf (except probably TEST). */ hdr = mtod(m0, struct rfcomm_mcc_hdr *); cr = RFCOMM_CR(hdr->type); type = RFCOMM_MCC_TYPE(hdr->type); length = RFCOMM_MCC_LENGTH(hdr->length); /* Check MCC frame length */ if (sizeof(*hdr) + length != m0->m_pkthdr.len) { NG_BTSOCKET_RFCOMM_ERR( "%s: Invalid MCC frame length=%d, len=%d\n", __func__, length, m0->m_pkthdr.len); NG_FREE_M(m0); return (EMSGSIZE); } switch (type) { case RFCOMM_MCC_TEST: return (ng_btsocket_rfcomm_receive_test(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_FCON: case RFCOMM_MCC_FCOFF: return (ng_btsocket_rfcomm_receive_fc(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_MSC: return (ng_btsocket_rfcomm_receive_msc(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_RPN: return (ng_btsocket_rfcomm_receive_rpn(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_RLS: return (ng_btsocket_rfcomm_receive_rls(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_PN: return (ng_btsocket_rfcomm_receive_pn(s, m0)); /* NOT REACHED */ case RFCOMM_MCC_NSC: NG_BTSOCKET_RFCOMM_ERR( "%s: Got MCC NSC, type=%#x, cr=%d, length=%d, session state=%d, flags=%#x, " \ "mtu=%d, len=%d\n", __func__, RFCOMM_MCC_TYPE(*((u_int8_t *)(hdr + 1))), cr, length, s->state, s->flags, s->mtu, m0->m_pkthdr.len); NG_FREE_M(m0); break; default: NG_BTSOCKET_RFCOMM_ERR( "%s: Got unknown MCC, type=%#x, cr=%d, length=%d, session state=%d, " \ "flags=%#x, mtu=%d, len=%d\n", __func__, type, cr, length, s->state, s->flags, s->mtu, m0->m_pkthdr.len); /* Reuse mbuf to send NSC */ hdr = mtod(m0, struct rfcomm_mcc_hdr *); m0->m_pkthdr.len = m0->m_len = sizeof(*hdr); /* Create MCC NSC header */ hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_NSC); hdr->length = RFCOMM_MKLEN8(1); /* Put back MCC command type we did not like */ m0->m_data[m0->m_len] = RFCOMM_MKMCC_TYPE(cr, type); m0->m_pkthdr.len ++; m0->m_len ++; /* Send UIH frame */ return (ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0)); /* NOT REACHED */ } return (0); } /* ng_btsocket_rfcomm_receive_mcc */ /* * Receive RFCOMM TEST MCC command */ static int ng_btsocket_rfcomm_receive_test(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC TEST, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \ "len=%d\n", __func__, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_TEST); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_test */ /* * Receive RFCOMM FCON/FCOFF MCC command */ static int ng_btsocket_rfcomm_receive_fc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); u_int8_t type = RFCOMM_MCC_TYPE(hdr->type); int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); /* * Turn ON/OFF aggregate flow on the entire session. When remote peer * asserted flow control no transmission shall occur except on dlci 0 * (control channel). */ NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC FC%s, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \ "len=%d\n", __func__, (type == RFCOMM_MCC_FCON)? "ON" : "OFF", RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { if (type == RFCOMM_MCC_FCON) s->flags &= ~NG_BTSOCKET_RFCOMM_SESSION_RFC; else s->flags |= NG_BTSOCKET_RFCOMM_SESSION_RFC; hdr->type = RFCOMM_MKMCC_TYPE(0, type); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_fc */ /* * Receive RFCOMM MSC MCC command */ static int ng_btsocket_rfcomm_receive_msc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr*); struct rfcomm_mcc_msc *msc = (struct rfcomm_mcc_msc *)(hdr+1); ng_btsocket_rfcomm_pcb_t *pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC MSC, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \ "mtu=%d, len=%d\n", __func__, RFCOMM_DLCI(msc->address), RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, RFCOMM_DLCI(msc->address)); if (pcb == NULL) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got MSC command for non-existing dlci=%d\n", __func__, RFCOMM_DLCI(msc->address)); NG_FREE_M(m0); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING && pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) { NG_BTSOCKET_RFCOMM_WARN( "%s: Got MSC on dlci=%d in invalid state=%d\n", __func__, RFCOMM_DLCI(msc->address), pcb->state); mtx_unlock(&pcb->pcb_mtx); NG_FREE_M(m0); return (EINVAL); } pcb->rmodem = msc->modem; /* Update remote port signals */ hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_MSC); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); #if 0 /* YYY */ /* Send more data from DLC. XXX check for errors? */ if (!(pcb->rmodem & RFCOMM_MODEM_FC) && !(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)) ng_btsocket_rfcomm_pcb_send(pcb, ALOT); #endif /* YYY */ mtx_unlock(&pcb->pcb_mtx); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_msc */ /* * Receive RFCOMM RPN MCC command * XXX FIXME do we need htole16/le16toh for RPN param_mask? */ static int ng_btsocket_rfcomm_receive_rpn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); struct rfcomm_mcc_rpn *rpn = (struct rfcomm_mcc_rpn *)(hdr + 1); int error = 0; u_int16_t param_mask; u_int8_t bit_rate, data_bits, stop_bits, parity, flow_control, xon_char, xoff_char; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC RPN, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \ "mtu=%d, len=%d\n", __func__, RFCOMM_DLCI(rpn->dlci), RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { param_mask = RFCOMM_RPN_PM_ALL; if (RFCOMM_MCC_LENGTH(hdr->length) == 1) { /* Request - return default setting */ bit_rate = RFCOMM_RPN_BR_115200; data_bits = RFCOMM_RPN_DATA_8; stop_bits = RFCOMM_RPN_STOP_1; parity = RFCOMM_RPN_PARITY_NONE; flow_control = RFCOMM_RPN_FLOW_NONE; xon_char = RFCOMM_RPN_XON_CHAR; xoff_char = RFCOMM_RPN_XOFF_CHAR; } else { /* * Ignore/accept bit_rate, 8 bits, 1 stop bit, no * parity, no flow control lines, default XON/XOFF * chars. */ bit_rate = rpn->bit_rate; rpn->param_mask = le16toh(rpn->param_mask); /* XXX */ data_bits = RFCOMM_RPN_DATA_BITS(rpn->line_settings); if (rpn->param_mask & RFCOMM_RPN_PM_DATA && data_bits != RFCOMM_RPN_DATA_8) { data_bits = RFCOMM_RPN_DATA_8; param_mask ^= RFCOMM_RPN_PM_DATA; } stop_bits = RFCOMM_RPN_STOP_BITS(rpn->line_settings); if (rpn->param_mask & RFCOMM_RPN_PM_STOP && stop_bits != RFCOMM_RPN_STOP_1) { stop_bits = RFCOMM_RPN_STOP_1; param_mask ^= RFCOMM_RPN_PM_STOP; } parity = RFCOMM_RPN_PARITY(rpn->line_settings); if (rpn->param_mask & RFCOMM_RPN_PM_PARITY && parity != RFCOMM_RPN_PARITY_NONE) { parity = RFCOMM_RPN_PARITY_NONE; param_mask ^= RFCOMM_RPN_PM_PARITY; } flow_control = rpn->flow_control; if (rpn->param_mask & RFCOMM_RPN_PM_FLOW && flow_control != RFCOMM_RPN_FLOW_NONE) { flow_control = RFCOMM_RPN_FLOW_NONE; param_mask ^= RFCOMM_RPN_PM_FLOW; } xon_char = rpn->xon_char; if (rpn->param_mask & RFCOMM_RPN_PM_XON && xon_char != RFCOMM_RPN_XON_CHAR) { xon_char = RFCOMM_RPN_XON_CHAR; param_mask ^= RFCOMM_RPN_PM_XON; } xoff_char = rpn->xoff_char; if (rpn->param_mask & RFCOMM_RPN_PM_XOFF && xoff_char != RFCOMM_RPN_XOFF_CHAR) { xoff_char = RFCOMM_RPN_XOFF_CHAR; param_mask ^= RFCOMM_RPN_PM_XOFF; } } rpn->bit_rate = bit_rate; rpn->line_settings = RFCOMM_MKRPN_LINE_SETTINGS(data_bits, stop_bits, parity); rpn->flow_control = flow_control; rpn->xon_char = xon_char; rpn->xoff_char = xoff_char; rpn->param_mask = htole16(param_mask); /* XXX */ m0->m_pkthdr.len = m0->m_len = sizeof(*hdr) + sizeof(*rpn); hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RPN); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore response */ return (error); } /* ng_btsocket_rfcomm_receive_rpn */ /* * Receive RFCOMM RLS MCC command */ static int ng_btsocket_rfcomm_receive_rls(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr *); struct rfcomm_mcc_rls *rls = (struct rfcomm_mcc_rls *)(hdr + 1); int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); /* * XXX FIXME Do we have to do anything else here? Remote peer tries to * tell us something about DLCI. Just report what we have received and * return back received values as required by TS 07.10 spec. */ NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC RLS, dlci=%d, status=%#x, cr=%d, length=%d, session state=%d, " \ "flags=%#x, mtu=%d, len=%d\n", __func__, RFCOMM_DLCI(rls->address), rls->status, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (RFCOMM_CR(hdr->type)) { if (rls->status & 0x1) NG_BTSOCKET_RFCOMM_ERR( "%s: Got RLS dlci=%d, error=%#x\n", __func__, RFCOMM_DLCI(rls->address), rls->status >> 1); hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RLS); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else NG_FREE_M(m0); /* XXX ignore responses */ return (error); } /* ng_btsocket_rfcomm_receive_rls */ /* * Receive RFCOMM PN MCC command */ static int ng_btsocket_rfcomm_receive_pn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0) { struct rfcomm_mcc_hdr *hdr = mtod(m0, struct rfcomm_mcc_hdr*); struct rfcomm_mcc_pn *pn = (struct rfcomm_mcc_pn *)(hdr+1); ng_btsocket_rfcomm_pcb_t *pcb = NULL; int error = 0; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Got MCC PN, dlci=%d, cr=%d, length=%d, flow_control=%#x, priority=%d, " \ "ack_timer=%d, mtu=%d, max_retrans=%d, credits=%d, session state=%d, " \ "flags=%#x, session mtu=%d, len=%d\n", __func__, pn->dlci, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length), pn->flow_control, pn->priority, pn->ack_timer, le16toh(pn->mtu), pn->max_retrans, pn->credits, s->state, s->flags, s->mtu, m0->m_pkthdr.len); if (pn->dlci == 0) { NG_BTSOCKET_RFCOMM_ERR("%s: Zero dlci in MCC PN\n", __func__); NG_FREE_M(m0); return (EINVAL); } /* Check if we have this dlci */ pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, pn->dlci); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); if (RFCOMM_CR(hdr->type)) { /* PN Request */ ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control, pn->credits, pn->mtu); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { pn->flow_control = 0xe0; pn->credits = RFCOMM_DEFAULT_CREDITS; } else { pn->flow_control = 0; pn->credits = 0; } hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); } else { /* PN Response - proceed with SABM. Timeout still set */ if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONFIGURING) { ng_btsocket_rfcomm_set_pn(pcb, 0, pn->flow_control, pn->credits, pn->mtu); pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING; error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_SABM, pn->dlci); } else NG_BTSOCKET_RFCOMM_WARN( "%s: Got PN response for dlci=%d in invalid state=%d\n", __func__, pn->dlci, pcb->state); NG_FREE_M(m0); } mtx_unlock(&pcb->pcb_mtx); } else if (RFCOMM_CR(hdr->type)) { /* PN request to non-existing dlci - incomming connection */ pcb = ng_btsocket_rfcomm_connect_ind(s, RFCOMM_SRVCHANNEL(pn->dlci)); if (pcb != NULL) { mtx_lock(&pcb->pcb_mtx); pcb->dlci = pn->dlci; ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control, pn->credits, pn->mtu); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { pn->flow_control = 0xe0; pn->credits = RFCOMM_DEFAULT_CREDITS; } else { pn->flow_control = 0; pn->credits = 0; } hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN); error = ng_btsocket_rfcomm_send_uih(s, RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0); if (error == 0) { ng_btsocket_rfcomm_timeout(pcb); pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING; soisconnecting(pcb->so); } else ng_btsocket_rfcomm_pcb_kill(pcb, error); mtx_unlock(&pcb->pcb_mtx); } else { /* Nobody is listen()ing on this channel */ error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_DM, pn->dlci); NG_FREE_M(m0); } } else NG_FREE_M(m0); /* XXX ignore response to non-existing dlci */ return (error); } /* ng_btsocket_rfcomm_receive_pn */ /* * Set PN parameters for dlci. Caller must hold pcb->pcb_mtx. * * From Bluetooth spec. * * "... The CL1 - CL4 field is completely redefined. (In TS07.10 this defines * the convergence layer to use, which is not applicable to RFCOMM. In RFCOMM, * in Bluetooth versions up to 1.0B, this field was forced to 0). * * In the PN request sent prior to a DLC establishment, this field must contain * the value 15 (0xF), indicating support of credit based flow control in the * sender. See Table 5.3 below. If the PN response contains any other value * than 14 (0xE) in this field, it is inferred that the peer RFCOMM entity is * not supporting the credit based flow control feature. (This is only possible * if the peer RFCOMM implementation is only conforming to Bluetooth version * 1.0B.) If a PN request is sent on an already open DLC, then this field must * contain the value zero; it is not possible to set initial credits more * than once per DLC activation. A responding implementation must set this * field in the PN response to 14 (0xE), if (and only if) the value in the PN * request was 15..." */ static void ng_btsocket_rfcomm_set_pn(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr, u_int8_t flow_control, u_int8_t credits, u_int16_t mtu) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); pcb->mtu = le16toh(mtu); if (cr) { if (flow_control == 0xf0) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = credits; } else { pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = 0; } } else { if (flow_control == 0xe0) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = credits; } else { pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC; pcb->tx_cred = 0; } } NG_BTSOCKET_RFCOMM_INFO( "%s: cr=%d, dlci=%d, state=%d, flags=%#x, mtu=%d, rx_cred=%d, tx_cred=%d\n", __func__, cr, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, pcb->rx_cred, pcb->tx_cred); } /* ng_btsocket_rfcomm_set_pn */ /* * Send RFCOMM SABM/DISC/UA/DM frames. Caller must hold s->session_mtx */ static int ng_btsocket_rfcomm_send_command(ng_btsocket_rfcomm_session_p s, u_int8_t type, u_int8_t dlci) { struct rfcomm_cmd_hdr *hdr = NULL; struct mbuf *m = NULL; int cr; mtx_assert(&s->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Sending command type %#x, session state=%d, flags=%#x, mtu=%d, dlci=%d\n", __func__, type, s->state, s->flags, s->mtu, dlci); switch (type) { case RFCOMM_FRAME_SABM: case RFCOMM_FRAME_DISC: cr = INITIATOR(s); break; case RFCOMM_FRAME_UA: case RFCOMM_FRAME_DM: cr = !INITIATOR(s); break; default: panic("%s: Invalid frame type=%#x\n", __func__, type); return (EINVAL); /* NOT REACHED */ } MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_pkthdr.len = m->m_len = sizeof(*hdr); hdr = mtod(m, struct rfcomm_cmd_hdr *); hdr->address = RFCOMM_MKADDRESS(cr, dlci); hdr->control = RFCOMM_MKCONTROL(type, 1); hdr->length = RFCOMM_MKLEN8(0); hdr->fcs = ng_btsocket_rfcomm_fcs3((u_int8_t *) hdr); NG_BT_MBUFQ_ENQUEUE(&s->outq, m); return (0); } /* ng_btsocket_rfcomm_send_command */ /* * Send RFCOMM UIH frame. Caller must hold s->session_mtx */ static int ng_btsocket_rfcomm_send_uih(ng_btsocket_rfcomm_session_p s, u_int8_t address, u_int8_t pf, u_int8_t credits, struct mbuf *data) { struct rfcomm_frame_hdr *hdr = NULL; struct mbuf *m = NULL, *mcrc = NULL; u_int16_t length; mtx_assert(&s->session_mtx, MA_OWNED); MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { NG_FREE_M(data); return (ENOBUFS); } m->m_pkthdr.len = m->m_len = sizeof(*hdr); MGET(mcrc, M_NOWAIT, MT_DATA); if (mcrc == NULL) { NG_FREE_M(data); return (ENOBUFS); } mcrc->m_len = 1; /* Fill UIH frame header */ hdr = mtod(m, struct rfcomm_frame_hdr *); hdr->address = address; hdr->control = RFCOMM_MKCONTROL(RFCOMM_FRAME_UIH, pf); /* Calculate FCS */ mcrc->m_data[0] = ng_btsocket_rfcomm_fcs2((u_int8_t *) hdr); /* Put length back */ length = (data != NULL)? data->m_pkthdr.len : 0; if (length > 127) { u_int16_t l = htole16(RFCOMM_MKLEN16(length)); bcopy(&l, &hdr->length, sizeof(l)); m->m_pkthdr.len ++; m->m_len ++; } else hdr->length = RFCOMM_MKLEN8(length); if (pf) { m->m_data[m->m_len] = credits; m->m_pkthdr.len ++; m->m_len ++; } /* Add payload */ if (data != NULL) { m_cat(m, data); m->m_pkthdr.len += length; } /* Put FCS back */ m_cat(m, mcrc); m->m_pkthdr.len ++; NG_BTSOCKET_RFCOMM_INFO( "%s: Sending UIH state=%d, flags=%#x, address=%d, length=%d, pf=%d, " \ "credits=%d, len=%d\n", __func__, s->state, s->flags, address, length, pf, credits, m->m_pkthdr.len); NG_BT_MBUFQ_ENQUEUE(&s->outq, m); return (0); } /* ng_btsocket_rfcomm_send_uih */ /* * Send MSC request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static int ng_btsocket_rfcomm_send_msc(ng_btsocket_rfcomm_pcb_p pcb) { struct mbuf *m = NULL; struct rfcomm_mcc_hdr *hdr = NULL; struct rfcomm_mcc_msc *msc = NULL; mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*msc); hdr = mtod(m, struct rfcomm_mcc_hdr *); msc = (struct rfcomm_mcc_msc *)(hdr + 1); hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_MSC); hdr->length = RFCOMM_MKLEN8(sizeof(*msc)); msc->address = RFCOMM_MKADDRESS(1, pcb->dlci); msc->modem = pcb->lmodem; NG_BTSOCKET_RFCOMM_INFO( "%s: Sending MSC dlci=%d, state=%d, flags=%#x, address=%d, modem=%#x\n", __func__, pcb->dlci, pcb->state, pcb->flags, msc->address, msc->modem); return (ng_btsocket_rfcomm_send_uih(pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m)); } /* ng_btsocket_rfcomm_send_msc */ /* * Send PN request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static int ng_btsocket_rfcomm_send_pn(ng_btsocket_rfcomm_pcb_p pcb) { struct mbuf *m = NULL; struct rfcomm_mcc_hdr *hdr = NULL; struct rfcomm_mcc_pn *pn = NULL; mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*pn); hdr = mtod(m, struct rfcomm_mcc_hdr *); pn = (struct rfcomm_mcc_pn *)(hdr + 1); hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_PN); hdr->length = RFCOMM_MKLEN8(sizeof(*pn)); pn->dlci = pcb->dlci; /* * Set default DLCI priority as described in GSM 07.10 * (ETSI TS 101 369) clause 5.6 page 42 */ pn->priority = (pcb->dlci < 56)? (((pcb->dlci >> 3) << 3) + 7) : 61; pn->ack_timer = 0; pn->mtu = htole16(pcb->mtu); pn->max_retrans = 0; if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) { pn->flow_control = 0xf0; pn->credits = pcb->rx_cred; } else { pn->flow_control = 0; pn->credits = 0; } NG_BTSOCKET_RFCOMM_INFO( "%s: Sending PN dlci=%d, state=%d, flags=%#x, mtu=%d, flow_control=%#x, " \ "credits=%d\n", __func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, pn->flow_control, pn->credits); return (ng_btsocket_rfcomm_send_uih(pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m)); } /* ng_btsocket_rfcomm_send_pn */ /* * Calculate and send credits based on available space in receive buffer */ static int ng_btsocket_rfcomm_send_credits(ng_btsocket_rfcomm_pcb_p pcb) { int error = 0; u_int8_t credits; mtx_assert(&pcb->pcb_mtx, MA_OWNED); mtx_assert(&pcb->session->session_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Sending more credits, dlci=%d, state=%d, flags=%#x, mtu=%d, " \ "space=%ld, tx_cred=%d, rx_cred=%d\n", __func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, sbspace(&pcb->so->so_rcv), pcb->tx_cred, pcb->rx_cred); credits = sbspace(&pcb->so->so_rcv) / pcb->mtu; if (credits > 0) { if (pcb->rx_cred + credits > RFCOMM_MAX_CREDITS) credits = RFCOMM_MAX_CREDITS - pcb->rx_cred; error = ng_btsocket_rfcomm_send_uih( pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), pcb->dlci), 1, credits, NULL); if (error == 0) { pcb->rx_cred += credits; NG_BTSOCKET_RFCOMM_INFO( "%s: Gave remote side %d more credits, dlci=%d, state=%d, flags=%#x, " \ "rx_cred=%d, tx_cred=%d\n", __func__, credits, pcb->dlci, pcb->state, pcb->flags, pcb->rx_cred, pcb->tx_cred); } else NG_BTSOCKET_RFCOMM_ERR( "%s: Could not send credits, error=%d, dlci=%d, state=%d, flags=%#x, " \ "mtu=%d, space=%ld, tx_cred=%d, rx_cred=%d\n", __func__, error, pcb->dlci, pcb->state, pcb->flags, pcb->mtu, sbspace(&pcb->so->so_rcv), pcb->tx_cred, pcb->rx_cred); } return (error); } /* ng_btsocket_rfcomm_send_credits */ /***************************************************************************** ***************************************************************************** ** RFCOMM DLCs ***************************************************************************** *****************************************************************************/ /* * Send data from socket send buffer * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static int ng_btsocket_rfcomm_pcb_send(ng_btsocket_rfcomm_pcb_p pcb, int limit) { struct mbuf *m = NULL; int sent, length, error; mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) limit = min(limit, pcb->tx_cred); else if (!(pcb->rmodem & RFCOMM_MODEM_FC)) limit = min(limit, RFCOMM_MAX_CREDITS); /* XXX ??? */ else limit = 0; if (limit == 0) { NG_BTSOCKET_RFCOMM_INFO( "%s: Could not send - remote flow control asserted, dlci=%d, flags=%#x, " \ "rmodem=%#x, tx_cred=%d\n", __func__, pcb->dlci, pcb->flags, pcb->rmodem, pcb->tx_cred); return (0); } for (error = 0, sent = 0; sent < limit; sent ++) { length = min(pcb->mtu, sbavail(&pcb->so->so_snd)); if (length == 0) break; /* Get the chunk from the socket's send buffer */ m = ng_btsocket_rfcomm_prepare_packet(&pcb->so->so_snd, length); if (m == NULL) { error = ENOBUFS; break; } sbdrop(&pcb->so->so_snd, length); error = ng_btsocket_rfcomm_send_uih(pcb->session, RFCOMM_MKADDRESS(INITIATOR(pcb->session), pcb->dlci), 0, 0, m); if (error != 0) break; } if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) pcb->tx_cred -= sent; if (error == 0 && sent > 0) { pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_SENDING; sowwakeup(pcb->so); } return (error); } /* ng_btsocket_rfcomm_pcb_send */ /* * Unlink and disconnect DLC. If ng_btsocket_rfcomm_pcb_kill() returns * non zero value than socket has no reference and has to be detached. * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx */ static void ng_btsocket_rfcomm_pcb_kill(ng_btsocket_rfcomm_pcb_p pcb, int error) { ng_btsocket_rfcomm_session_p s = pcb->session; NG_BTSOCKET_RFCOMM_INFO( "%s: Killing DLC, so=%p, dlci=%d, state=%d, flags=%#x, error=%d\n", __func__, pcb->so, pcb->dlci, pcb->state, pcb->flags, error); if (pcb->session == NULL) panic("%s: DLC without session, pcb=%p, state=%d, flags=%#x\n", __func__, pcb, pcb->state, pcb->flags); mtx_assert(&pcb->session->session_mtx, MA_OWNED); mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) ng_btsocket_rfcomm_untimeout(pcb); /* Detach DLC from the session. Does not matter which state DLC in */ LIST_REMOVE(pcb, session_next); pcb->session = NULL; /* Change DLC state and wakeup all sleepers */ pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED; pcb->so->so_error = error; soisdisconnected(pcb->so); wakeup(&pcb->state); /* Check if we have any DLCs left on the session */ if (LIST_EMPTY(&s->dlcs) && INITIATOR(s)) { NG_BTSOCKET_RFCOMM_INFO( "%s: Disconnecting session, state=%d, flags=%#x, mtu=%d\n", __func__, s->state, s->flags, s->mtu); switch (s->state) { case NG_BTSOCKET_RFCOMM_SESSION_CLOSED: case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING: /* * Do not have to do anything here. We can get here * when L2CAP connection was terminated or we have * received DISC on multiplexor channel */ break; case NG_BTSOCKET_RFCOMM_SESSION_OPEN: /* Send DISC on multiplexor channel */ error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_DISC, 0); if (error == 0) { s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING; break; } /* FALL THROUGH */ case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING: case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED: s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; break; /* case NG_BTSOCKET_RFCOMM_SESSION_LISTENING: */ default: panic("%s: Invalid session state=%d, flags=%#x\n", __func__, s->state, s->flags); break; } ng_btsocket_rfcomm_task_wakeup(); } } /* ng_btsocket_rfcomm_pcb_kill */ /* * Look for given dlci for given RFCOMM session. Caller must hold s->session_mtx */ static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_by_dlci(ng_btsocket_rfcomm_session_p s, int dlci) { ng_btsocket_rfcomm_pcb_p pcb = NULL; mtx_assert(&s->session_mtx, MA_OWNED); LIST_FOREACH(pcb, &s->dlcs, session_next) if (pcb->dlci == dlci) break; return (pcb); } /* ng_btsocket_rfcomm_pcb_by_dlci */ /* * Look for socket that listens on given src address and given channel */ static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_listener(bdaddr_p src, int channel) { ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL; mtx_lock(&ng_btsocket_rfcomm_sockets_mtx); LIST_FOREACH(pcb, &ng_btsocket_rfcomm_sockets, next) { if (pcb->channel != channel || !(pcb->so->so_options & SO_ACCEPTCONN)) continue; if (bcmp(&pcb->src, src, sizeof(*src)) == 0) break; if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) pcb1 = pcb; } mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx); return ((pcb != NULL)? pcb : pcb1); } /* ng_btsocket_rfcomm_pcb_listener */ /***************************************************************************** ***************************************************************************** ** Misc. functions ***************************************************************************** *****************************************************************************/ /* * Set timeout. Caller MUST hold pcb_mtx */ static void ng_btsocket_rfcomm_timeout(ng_btsocket_rfcomm_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) { pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMO; pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT; callout_reset(&pcb->timo, ng_btsocket_rfcomm_timo * hz, ng_btsocket_rfcomm_process_timeout, pcb); } else panic("%s: Duplicated socket timeout?!\n", __func__); } /* ng_btsocket_rfcomm_timeout */ /* * Unset pcb timeout. Caller MUST hold pcb_mtx */ static void ng_btsocket_rfcomm_untimeout(ng_btsocket_rfcomm_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) { callout_stop(&pcb->timo); pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO; pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT; } else panic("%s: No socket timeout?!\n", __func__); } /* ng_btsocket_rfcomm_timeout */ /* * Process pcb timeout */ static void ng_btsocket_rfcomm_process_timeout(void *xpcb) { ng_btsocket_rfcomm_pcb_p pcb = (ng_btsocket_rfcomm_pcb_p) xpcb; mtx_assert(&pcb->pcb_mtx, MA_OWNED); NG_BTSOCKET_RFCOMM_INFO( "%s: Timeout, so=%p, dlci=%d, state=%d, flags=%#x\n", __func__, pcb->so, pcb->dlci, pcb->state, pcb->flags); pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO; pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT; switch (pcb->state) { case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING; break; case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT: case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING: break; default: panic( "%s: DLC timeout in invalid state, dlci=%d, state=%d, flags=%#x\n", __func__, pcb->dlci, pcb->state, pcb->flags); break; } ng_btsocket_rfcomm_task_wakeup(); } /* ng_btsocket_rfcomm_process_timeout */ /* * Get up to length bytes from the socket buffer */ static struct mbuf * ng_btsocket_rfcomm_prepare_packet(struct sockbuf *sb, int length) { struct mbuf *top = NULL, *m = NULL, *n = NULL, *nextpkt = NULL; int mlen, noff, len; MGETHDR(top, M_NOWAIT, MT_DATA); if (top == NULL) return (NULL); top->m_pkthdr.len = length; top->m_len = 0; mlen = MHLEN; m = top; n = sb->sb_mb; nextpkt = n->m_nextpkt; noff = 0; while (length > 0 && n != NULL) { len = min(mlen - m->m_len, n->m_len - noff); if (len > length) len = length; bcopy(mtod(n, caddr_t)+noff, mtod(m, caddr_t)+m->m_len, len); m->m_len += len; noff += len; length -= len; if (length > 0 && m->m_len == mlen) { MGET(m->m_next, M_NOWAIT, MT_DATA); if (m->m_next == NULL) { NG_FREE_M(top); return (NULL); } m = m->m_next; m->m_len = 0; mlen = MLEN; } if (noff == n->m_len) { noff = 0; n = n->m_next; if (n == NULL) n = nextpkt; nextpkt = (n != NULL)? n->m_nextpkt : NULL; } } if (length < 0) panic("%s: length=%d\n", __func__, length); if (length > 0 && n == NULL) panic("%s: bogus length=%d, n=%p\n", __func__, length, n); return (top); } /* ng_btsocket_rfcomm_prepare_packet */ Index: projects/clang380-import/sys/netgraph/netflow/netflow.c =================================================================== --- projects/clang380-import/sys/netgraph/netflow/netflow.c (revision 293686) +++ projects/clang380-import/sys/netgraph/netflow/netflow.c (revision 293687) @@ -1,1189 +1,1190 @@ /*- * Copyright (c) 2010-2011 Alexander V. Chernikov * Copyright (c) 2004-2005 Gleb Smirnoff * Copyright (c) 2001-2003 Roman V. Palagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NBUCKETS (65536) /* must be power of 2 */ /* This hash is for TCP or UDP packets. */ #define FULL_HASH(addr1, addr2, port1, port2) \ (((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) ^ \ port1 ^ htons(port2)) & \ (NBUCKETS - 1)) /* This hash is for all other IP packets. */ #define ADDR_HASH(addr1, addr2) \ ((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) & \ (NBUCKETS - 1)) /* Macros to shorten logical constructions */ /* XXX: priv must exist in namespace */ #define INACTIVE(fle) (time_uptime - fle->f.last > priv->nfinfo_inact_t) #define AGED(fle) (time_uptime - fle->f.first > priv->nfinfo_act_t) #define ISFREE(fle) (fle->f.packets == 0) /* * 4 is a magical number: statistically number of 4-packet flows is * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case * of reachable host and 4-packet otherwise. */ #define SMALL(fle) (fle->f.packets <= 4) MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash"); static int export_add(item_p, struct flow_entry *); static int export_send(priv_p, fib_export_p, item_p, int); static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *, int, uint8_t, uint8_t); #ifdef INET6 static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *, int, uint8_t, uint8_t); #endif static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int); /* * Generate hash for a given flow record. * * FIB is not used here, because: * most VRFS will carry public IPv4 addresses which are unique even * without FIB private addresses can overlap, but this is worked out * via flow_rec bcmp() containing fib id. In IPv6 world addresses are * all globally unique (it's not fully true, there is FC00::/7 for example, * but chances of address overlap are MUCH smaller) */ static inline uint32_t ip_hash(struct flow_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr, r->r_sport, r->r_dport); default: return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr); } } #ifdef INET6 /* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */ static inline uint32_t ip6_hash(struct flow6_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport, r->r_dport); default: return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3]); } } #endif /* * Detach export datagram from priv, if there is any. * If there is no, allocate a new one. */ static item_p get_export_dgram(priv_p priv, fib_export_p fe) { item_p item = NULL; mtx_lock(&fe->export_mtx); if (fe->exp.item != NULL) { item = fe->exp.item; fe->exp.item = NULL; } mtx_unlock(&fe->export_mtx); if (item == NULL) { struct netflow_v5_export_dgram *dgram; struct mbuf *m; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (NULL); item = ng_package_data(m, NG_NOFLAGS); if (item == NULL) return (NULL); dgram = mtod(m, struct netflow_v5_export_dgram *); dgram->header.count = 0; dgram->header.version = htons(NETFLOW_V5); dgram->header.pad = 0; } return (item); } /* * Re-attach incomplete datagram back to priv. * If there is already another one, then send incomplete. */ static void return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags) { /* * It may happen on SMP, that some thread has already * put its item there, in this case we bail out and * send what we have to collector. */ mtx_lock(&fe->export_mtx); if (fe->exp.item == NULL) { fe->exp.item = item; mtx_unlock(&fe->export_mtx); } else { mtx_unlock(&fe->export_mtx); export_send(priv, fe, item, flags); } } /* * The flow is over. Call export_add() and free it. If datagram is * full, then call export_send(). */ static void expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags) { struct netflow_export_item exp; uint16_t version = fle->f.version; if ((priv->export != NULL) && (version == IPVERSION)) { exp.item = get_export_dgram(priv, fe); if (exp.item == NULL) { priv->nfinfo_export_failed++; if (priv->export9 != NULL) priv->nfinfo_export9_failed++; /* fle definitely contains IPv4 flow. */ uma_zfree_arg(priv->zone, fle, priv); return; } if (export_add(exp.item, fle) > 0) export_send(priv, fe, exp.item, flags); else return_export_dgram(priv, fe, exp.item, NG_QUEUE); } if (priv->export9 != NULL) { exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt); if (exp.item9 == NULL) { priv->nfinfo_export9_failed++; if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif else panic("ng_netflow: Unknown IP proto: %d", version); return; } if (export9_add(exp.item9, exp.item9_opt, fle) > 0) export9_send(priv, fe, exp.item9, exp.item9_opt, flags); else return_export9_dgram(priv, fe, exp.item9, exp.item9_opt, NG_QUEUE); } if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif } /* Get a snapshot of node statistics */ void ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i) { i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes); i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets); i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6); i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6); i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes); i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets); i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6); i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6); i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp); i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp); i->nfinfo_used = uma_zone_get_cur(priv->zone); #ifdef INET6 i->nfinfo_used6 = uma_zone_get_cur(priv->zone6); #endif i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed; i->nfinfo_export_failed = priv->nfinfo_export_failed; i->nfinfo_export9_failed = priv->nfinfo_export9_failed; i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf; i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs; i->nfinfo_inact_t = priv->nfinfo_inact_t; i->nfinfo_act_t = priv->nfinfo_act_t; } /* * Insert a record into defined slot. * * First we get for us a free flow entry, then fill in all * possible fields in it. * * TODO: consider dropping hash mutex while filling in datagram, * as this was done in previous version. Need to test & profile * to be sure. */ static int hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow_entry *fle; struct sockaddr_in sin; struct rtentry *rt; mtx_assert(&hsh->mtx, MA_OWNED); fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT); if (fle == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle->f.version = IPVERSION; bcopy(r, &fle->f.r, sizeof(struct flow_rec)); fle->f.bytes = plen; fle->f.packets = 1; fle->f.tcp_flags = tcp_flags; fle->f.first = fle->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { bzero(&sin, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_dst; rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); if (rt != NULL) { fle->f.fle_o_ifx = rt->rt_ifp->if_index; if (rt->rt_flags & RTF_GATEWAY && rt->rt_gateway->sa_family == AF_INET) fle->f.next_hop = ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr; if (rt_mask(rt)) fle->f.dst_mask = bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr); else if (rt->rt_flags & RTF_HOST) /* Give up. We can't determine mask :( */ fle->f.dst_mask = 32; RTFREE_LOCKED(rt); } } /* Do route lookup on source address, to fill in src_mask. */ if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { bzero(&sin, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_src; rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); if (rt != NULL) { if (rt_mask(rt)) fle->f.src_mask = bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr); else if (rt->rt_flags & RTF_HOST) /* Give up. We can't determine mask :( */ fle->f.src_mask = 32; RTFREE_LOCKED(rt); } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); return (0); } #ifdef INET6 /* XXX: make normal function, instead of.. */ #define ipv6_masklen(x) bitcount32((x).__u6_addr.__u6_addr32[0]) + \ bitcount32((x).__u6_addr.__u6_addr32[1]) + \ bitcount32((x).__u6_addr.__u6_addr32[2]) + \ bitcount32((x).__u6_addr.__u6_addr32[3]) #define RT_MASK6(x) (ipv6_masklen(((struct sockaddr_in6 *)rt_mask(x))->sin6_addr)) static int hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow6_entry *fle6; struct sockaddr_in6 sin6; struct rtentry *rt; mtx_assert(&hsh6->mtx, MA_OWNED); fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT); if (fle6 == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle6->f.version = IP6VERSION; bcopy(r, &fle6->f.r, sizeof(struct flow6_rec)); fle6->f.bytes = plen; fle6->f.packets = 1; fle6->f.tcp_flags = tcp_flags; fle6->f.first = fle6->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { bzero(&sin6, sizeof(struct sockaddr_in6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = r->dst.r_dst6; rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib); if (rt != NULL) { fle6->f.fle_o_ifx = rt->rt_ifp->if_index; if (rt->rt_flags & RTF_GATEWAY && rt->rt_gateway->sa_family == AF_INET6) fle6->f.n.next_hop6 = ((struct sockaddr_in6 *)(rt->rt_gateway))->sin6_addr; if (rt_mask(rt)) fle6->f.dst_mask = RT_MASK6(rt); else fle6->f.dst_mask = 128; RTFREE_LOCKED(rt); } } if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { /* Do route lookup on source address, to fill in src_mask. */ bzero(&sin6, sizeof(struct sockaddr_in6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = r->src.r_src6; rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib); if (rt != NULL) { if (rt_mask(rt)) fle6->f.src_mask = RT_MASK6(rt); else fle6->f.src_mask = 128; RTFREE_LOCKED(rt); } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash); return (0); } #undef ipv6_masklen #undef RT_MASK6 #endif /* * Non-static functions called from ng_netflow.c */ /* Allocate memory and set up flow cache */ void ng_netflow_cache_init(priv_p priv) { struct flow_hash_entry *hsh; int i; /* Initialize cache UMA zone. */ priv->zone = uma_zcreate("NetFlow IPv4 cache", sizeof(struct flow_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone, CACHESIZE); #ifdef INET6 priv->zone6 = uma_zcreate("NetFlow IPv6 cache", sizeof(struct flow6_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone6, CACHESIZE); #endif /* Allocate hash. */ priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #ifdef INET6 /* Allocate hash. */ priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #endif priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets = counter_u64_alloc(M_WAITOK); priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK); priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK); ng_netflow_v9_cache_init(priv); CTR0(KTR_NET, "ng_netflow startup()"); } /* Initialize new FIB table for v5 and v9 */ int ng_netflow_fib_init(priv_p priv, int fib) { fib_export_p fe = priv_to_fib(priv, fib); CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib); if (fe != NULL) return (0); if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF); mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF); fe->fib = fib; fe->domain_id = fib; if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib], (uintptr_t)NULL, (uintptr_t)fe) == 0) { /* FIB already set up by other ISR */ CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p", fib, fe, priv_to_fib(priv, fib)); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } else { /* Increase counter for statistics */ CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)", fib, fe, priv_to_fib(priv, fib)); priv->nfinfo_alloc_fibs++; } return (0); } /* Free all flow cache memory. Called from node close method. */ void ng_netflow_cache_flush(priv_p priv) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct netflow_export_item exp; fib_export_p fe; int i; bzero(&exp, sizeof(exp)); /* * We are going to free probably billable data. * Expire everything before freeing it. * No locking is required since callout is already drained. */ for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #ifdef INET6 for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #endif uma_zdestroy(priv->zone); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash != NULL) free(priv->hash, M_NETFLOW_HASH); #ifdef INET6 uma_zdestroy(priv->zone6); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash6 != NULL) free(priv->hash6, M_NETFLOW_HASH); #endif for (i = 0; i < priv->maxfibs; i++) { if ((fe = priv_to_fib(priv, i)) == NULL) continue; if (fe->exp.item != NULL) export_send(priv, fe, fe->exp.item, NG_QUEUE); if (fe->exp.item9 != NULL) export9_send(priv, fe, fe->exp.item9, fe->exp.item9_opt, NG_QUEUE); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } counter_u64_free(priv->nfinfo_bytes); counter_u64_free(priv->nfinfo_packets); counter_u64_free(priv->nfinfo_bytes6); counter_u64_free(priv->nfinfo_packets6); counter_u64_free(priv->nfinfo_sbytes); counter_u64_free(priv->nfinfo_spackets); counter_u64_free(priv->nfinfo_sbytes6); counter_u64_free(priv->nfinfo_spackets6); counter_u64_free(priv->nfinfo_act_exp); counter_u64_free(priv->nfinfo_inact_exp); ng_netflow_v9_cache_flush(priv); } /* Insert packet from into flow cache. */ int ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct flow_rec r; int hlen, plen; int error = 0; uint16_t eproto; uint8_t tcp_flags = 0; bzero(&r, sizeof(r)); if (ip->ip_v != IPVERSION) return (EINVAL); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) return (EINVAL); eproto = ETHERTYPE_IP; /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V4_L4; r.r_src = ip->ip_src; r.r_dst = ip->ip_dst; r.fib = fe->fib; plen = ntohs(ip->ip_len); r.r_ip_p = ip->ip_p; r.r_tos = ip->ip_tos; r.r_i_ifx = src_if_index; /* * XXX NOTE: only first fragment of fragmented TCP, UDP and * ICMP packet will be recorded with proper s_port and d_port. * Following fragments will be recorded simply as IP packet with * ip_proto = ip->ip_p and s_port, d_port set to zero. * I know, it looks like bug. But I don't want to re-implement * ip packet assebmling here. Anyway, (in)famous trafd works this way - * and nobody complains yet :) */ if ((ip->ip_off & htons(IP_OFFMASK)) == 0) switch(r.r_ip_p) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)((caddr_t )ip + hlen); r.r_sport = tcp->th_sport; r.r_dport = tcp->th_dport; tcp_flags = tcp->th_flags; break; } case IPPROTO_UDP: r.r_ports = *(uint32_t *)((caddr_t )ip + hlen); break; } counter_u64_add(priv->nfinfo_packets, 1); counter_u64_add(priv->nfinfo_bytes, plen); /* Find hash slot. */ hsh = &priv->hash[ip_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0) break; if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle) { /* An existent entry. */ fle->f.bytes += plen; fle->f.packets ++; fle->f.tcp_flags |= tcp_flags; fle->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) || (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } #ifdef INET6 /* Insert IPv6 packet from into flow cache. */ int ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle = NULL, *fle1; struct flow6_entry *fle6; struct flow_hash_entry *hsh; struct flow6_rec r; int plen; int error = 0; uint8_t tcp_flags = 0; /* check version */ if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) return (EINVAL); bzero(&r, sizeof(r)); r.src.r_src6 = ip6->ip6_src; r.dst.r_dst6 = ip6->ip6_dst; r.fib = fe->fib; /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V6_L4; plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); #if 0 /* XXX: set DSCP/CoS value */ r.r_tos = ip->ip_tos; #endif if ((flags & NG_NETFLOW_IS_FRAG) == 0) { switch(upper_proto) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)upper_ptr; r.r_ports = *(uint32_t *)upper_ptr; tcp_flags = tcp->th_flags; break; } case IPPROTO_UDP: case IPPROTO_SCTP: r.r_ports = *(uint32_t *)upper_ptr; break; } } r.r_ip_p = upper_proto; r.r_i_ifx = src_if_index; counter_u64_add(priv->nfinfo_packets6, 1); counter_u64_add(priv->nfinfo_bytes6, plen); /* Find hash slot. */ hsh = &priv->hash6[ip6_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (fle->f.version != IP6VERSION) continue; fle6 = (struct flow6_entry *)fle; if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0) break; if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle != NULL) { /* An existent entry. */ fle6 = (struct flow6_entry *)fle; fle6->f.bytes += plen; fle6->f.packets ++; fle6->f.tcp_flags |= tcp_flags; fle6->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) || (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } #endif /* * Return records from cache to userland. * * TODO: matching particular IP should be done in kernel, here. */ int ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req, struct ngnf_show_header *resp) { struct flow_hash_entry *hsh; struct flow_entry *fle; struct flow_entry_data *data = (struct flow_entry_data *)(resp + 1); #ifdef INET6 struct flow6_entry_data *data6 = (struct flow6_entry_data *)(resp + 1); #endif int i, max; i = req->hash_id; if (i > NBUCKETS-1) return (EINVAL); #ifdef INET6 if (req->version == 6) { resp->version = 6; hsh = priv->hash6 + i; max = NREC6_AT_ONCE; } else #endif if (req->version == 4) { resp->version = 4; hsh = priv->hash + i; max = NREC_AT_ONCE; } else return (EINVAL); /* * We will transfer not more than NREC_AT_ONCE. More data * will come in next message. * We send current hash index and current record number in list * to userland, and userland should return it back to us. * Then, we will restart with new entry. * * The resulting cache snapshot can be inaccurate if flow expiration * is taking place on hash item between userland data requests for * this hash item id. */ resp->nentries = 0; for (; i < NBUCKETS; hsh++, i++) { int list_id; if (mtx_trylock(&hsh->mtx) == 0) { /* * Requested hash index is not available, * relay decision to skip or re-request data * to userland. */ resp->hash_id = i; resp->list_id = 0; return (0); } list_id = 0; TAILQ_FOREACH(fle, &hsh->head, fle_hash) { if (hsh->mtx.mtx_lock & MTX_CONTESTED) { resp->hash_id = i; resp->list_id = list_id; mtx_unlock(&hsh->mtx); return (0); } list_id++; /* Search for particular record in list. */ if (req->list_id > 0) { if (list_id < req->list_id) continue; /* Requested list position found. */ req->list_id = 0; } #ifdef INET6 if (req->version == 6) { struct flow6_entry *fle6; fle6 = (struct flow6_entry *)fle; bcopy(&fle6->f, data6 + resp->nentries, sizeof(fle6->f)); } else #endif bcopy(&fle->f, data + resp->nentries, sizeof(fle->f)); resp->nentries++; if (resp->nentries == max) { resp->hash_id = i; /* * If it was the last item in list * we simply skip to next hash_id. */ resp->list_id = list_id + 1; mtx_unlock(&hsh->mtx); return (0); } } mtx_unlock(&hsh->mtx); } resp->hash_id = resp->list_id = 0; return (0); } /* We have full datagram in privdata. Send it to export hook. */ static int export_send(priv_p priv, fib_export_p fe, item_p item, int flags) { struct mbuf *m = NGI_M(item); struct netflow_v5_export_dgram *dgram = mtod(m, struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct timespec ts; int error = 0; /* Fill mbuf header. */ m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) * header->count + sizeof(struct netflow_v5_header); /* Fill export header. */ header->sys_uptime = htonl(MILLIUPTIME(time_uptime)); getnanotime(&ts); header->unix_secs = htonl(ts.tv_sec); header->unix_nsecs = htonl(ts.tv_nsec); header->engine_type = 0; header->engine_id = fe->domain_id; header->pad = 0; header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq, header->count)); header->count = htons(header->count); if (priv->export != NULL) NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags); else NG_FREE_ITEM(item); return (error); } /* Add export record to dgram. */ static int export_add(item_p item, struct flow_entry *fle) { struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item), struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct netflow_v5_record *rec; rec = &dgram->r[header->count]; header->count ++; KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS, ("ng_netflow: export too big")); /* Fill in export record. */ rec->src_addr = fle->f.r.r_src.s_addr; rec->dst_addr = fle->f.r.r_dst.s_addr; rec->next_hop = fle->f.next_hop.s_addr; rec->i_ifx = htons(fle->f.fle_i_ifx); rec->o_ifx = htons(fle->f.fle_o_ifx); rec->packets = htonl(fle->f.packets); rec->octets = htonl(fle->f.bytes); rec->first = htonl(MILLIUPTIME(fle->f.first)); rec->last = htonl(MILLIUPTIME(fle->f.last)); rec->s_port = fle->f.r.r_sport; rec->d_port = fle->f.r.r_dport; rec->flags = fle->f.tcp_flags; rec->prot = fle->f.r.r_ip_p; rec->tos = fle->f.r.r_tos; rec->dst_mask = fle->f.dst_mask; rec->src_mask = fle->f.src_mask; rec->pad1 = 0; rec->pad2 = 0; /* Not supported fields. */ rec->src_as = rec->dst_as = 0; if (header->count == NETFLOW_V5_MAX_RECORDS) return (1); /* end of datagram */ else return (0); } /* Periodic flow expiry run. */ void ng_netflow_expire(void *arg) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; priv_p priv = (priv_p )arg; int used, i; /* * Going through all the cache. */ used = uma_zone_get_cur(priv->zone); for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) { /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ if (hsh->mtx.mtx_lock & MTX_CONTESTED) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle)) break; if ((INACTIVE(fle) && (SMALL(fle) || (used > (NBUCKETS*2)))) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #ifdef INET6 used = uma_zone_get_cur(priv->zone6); for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) { struct flow6_entry *fle6; /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { fle6 = (struct flow6_entry *)fle; /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ if (hsh->mtx.mtx_lock & MTX_CONTESTED) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle6)) break; if ((INACTIVE(fle6) && (SMALL(fle6) || (used > (NBUCKETS*2)))) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #endif /* Schedule next expire. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } Index: projects/clang380-import/sys/netgraph/netflow/netflow_v9.c =================================================================== --- projects/clang380-import/sys/netgraph/netflow/netflow_v9.c (revision 293686) +++ projects/clang380-import/sys/netgraph/netflow/netflow_v9.c (revision 293687) @@ -1,490 +1,491 @@ /*- * Copyright (c) 2010 Alexander V. Chernikov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DECLARE(M_NETFLOW_GENERAL); MALLOC_DEFINE(M_NETFLOW_GENERAL, "netflow_general", "plog, V9 templates data"); /* * Base V9 templates for L4+ IPv4/IPv6 protocols */ struct netflow_v9_template _netflow_v9_record_ipv4_tcp[] = { { NETFLOW_V9_FIELD_IPV4_SRC_ADDR, 4}, { NETFLOW_V9_FIELD_IPV4_DST_ADDR, 4}, { NETFLOW_V9_FIELD_IPV4_NEXT_HOP, 4}, { NETFLOW_V9_FIELD_INPUT_SNMP, 2}, { NETFLOW_V9_FIELD_OUTPUT_SNMP, 2}, { NETFLOW_V9_FIELD_IN_PKTS, sizeof(CNTR)}, { NETFLOW_V9_FIELD_IN_BYTES, sizeof(CNTR)}, { NETFLOW_V9_FIELD_OUT_PKTS, sizeof(CNTR)}, { NETFLOW_V9_FIELD_OUT_BYTES, sizeof(CNTR)}, { NETFLOW_V9_FIELD_FIRST_SWITCHED, 4}, { NETFLOW_V9_FIELD_LAST_SWITCHED, 4}, { NETFLOW_V9_FIELD_L4_SRC_PORT, 2}, { NETFLOW_V9_FIELD_L4_DST_PORT, 2}, { NETFLOW_V9_FIELD_TCP_FLAGS, 1}, { NETFLOW_V9_FIELD_PROTOCOL, 1}, { NETFLOW_V9_FIELD_TOS, 1}, { NETFLOW_V9_FIELD_SRC_AS, 4}, { NETFLOW_V9_FIELD_DST_AS, 4}, { NETFLOW_V9_FIELD_SRC_MASK, 1}, { NETFLOW_V9_FIELD_DST_MASK, 1}, {0, 0} }; struct netflow_v9_template _netflow_v9_record_ipv6_tcp[] = { { NETFLOW_V9_FIELD_IPV6_SRC_ADDR, 16}, { NETFLOW_V9_FIELD_IPV6_DST_ADDR, 16}, { NETFLOW_V9_FIELD_IPV6_NEXT_HOP, 16}, { NETFLOW_V9_FIELD_INPUT_SNMP, 2}, { NETFLOW_V9_FIELD_OUTPUT_SNMP, 2}, { NETFLOW_V9_FIELD_IN_PKTS, sizeof(CNTR)}, { NETFLOW_V9_FIELD_IN_BYTES, sizeof(CNTR)}, { NETFLOW_V9_FIELD_OUT_PKTS, sizeof(CNTR)}, { NETFLOW_V9_FIELD_OUT_BYTES, sizeof(CNTR)}, { NETFLOW_V9_FIELD_FIRST_SWITCHED, 4}, { NETFLOW_V9_FIELD_LAST_SWITCHED, 4}, { NETFLOW_V9_FIELD_L4_SRC_PORT, 2}, { NETFLOW_V9_FIELD_L4_DST_PORT, 2}, { NETFLOW_V9_FIELD_TCP_FLAGS, 1}, { NETFLOW_V9_FIELD_PROTOCOL, 1}, { NETFLOW_V9_FIELD_TOS, 1}, { NETFLOW_V9_FIELD_SRC_AS, 4}, { NETFLOW_V9_FIELD_DST_AS, 4}, { NETFLOW_V9_FIELD_SRC_MASK, 1}, { NETFLOW_V9_FIELD_DST_MASK, 1}, {0, 0} }; /* * Pre-compiles flow exporter for all possible FlowSets * so we can add flowset to packet via simple memcpy() */ static void generate_v9_templates(priv_p priv) { uint16_t *p, *template_fields_cnt; int cnt; int flowset_size = sizeof(struct netflow_v9_flowset_header) + _NETFLOW_V9_TEMPLATE_SIZE(_netflow_v9_record_ipv4_tcp) + /* netflow_v9_record_ipv4_tcp */ _NETFLOW_V9_TEMPLATE_SIZE(_netflow_v9_record_ipv6_tcp); /* netflow_v9_record_ipv6_tcp */ priv->v9_flowsets[0] = malloc(flowset_size, M_NETFLOW_GENERAL, M_WAITOK | M_ZERO); if (flowset_size % 4) flowset_size += 4 - (flowset_size % 4); /* Padding to 4-byte boundary */ priv->flowsets_count = 1; p = (uint16_t *)priv->v9_flowsets[0]; *p++ = 0; /* Flowset ID, 0 is reserved for Template FlowSets */ *p++ = htons(flowset_size); /* Total FlowSet length */ /* * Most common TCP/UDP IPv4 template, ID = 256 */ *p++ = htons(NETFLOW_V9_MAX_RESERVED_FLOWSET + NETFLOW_V9_FLOW_V4_L4); template_fields_cnt = p++; for (cnt = 0; _netflow_v9_record_ipv4_tcp[cnt].field_id != 0; cnt++) { *p++ = htons(_netflow_v9_record_ipv4_tcp[cnt].field_id); *p++ = htons(_netflow_v9_record_ipv4_tcp[cnt].field_length); } *template_fields_cnt = htons(cnt); /* * TCP/UDP IPv6 template, ID = 257 */ *p++ = htons(NETFLOW_V9_MAX_RESERVED_FLOWSET + NETFLOW_V9_FLOW_V6_L4); template_fields_cnt = p++; for (cnt = 0; _netflow_v9_record_ipv6_tcp[cnt].field_id != 0; cnt++) { *p++ = htons(_netflow_v9_record_ipv6_tcp[cnt].field_id); *p++ = htons(_netflow_v9_record_ipv6_tcp[cnt].field_length); } *template_fields_cnt = htons(cnt); priv->flowset_records[0] = 2; } /* Closes current data flowset */ static void inline close_flowset(struct mbuf *m, struct netflow_v9_packet_opt *t) { struct mbuf *m_old; uint32_t zero = 0; int offset = 0; uint16_t *flowset_length, len; /* Hack to ensure we are not crossing mbuf boundary, length is uint16_t */ m_old = m_getptr(m, t->flow_header + offsetof(struct netflow_v9_flowset_header, length), &offset); flowset_length = (uint16_t *)(mtod(m_old, char *) + offset); len = (uint16_t)(m_pktlen(m) - t->flow_header); /* Align on 4-byte boundary (RFC 3954, Clause 5.3) */ if (len % 4) { if (m_append(m, 4 - (len % 4), (void *)&zero) != 1) panic("ng_netflow: m_append() failed!"); len += 4 - (len % 4); } *flowset_length = htons(len); } /* * Non-static functions called from ng_netflow.c */ /* We have full datagram in fib data. Send it to export hook. */ int export9_send(priv_p priv, fib_export_p fe, item_p item, struct netflow_v9_packet_opt *t, int flags) { struct mbuf *m = NGI_M(item); struct netflow_v9_export_dgram *dgram = mtod(m, struct netflow_v9_export_dgram *); struct netflow_v9_header *header = &dgram->header; struct timespec ts; int error = 0; if (t == NULL) { CTR0(KTR_NET, "export9_send(): V9 export packet without tag"); NG_FREE_ITEM(item); return (0); } /* Close flowset if not closed already */ if (m_pktlen(m) != t->flow_header) close_flowset(m, t); /* Fill export header. */ header->count = t->count; header->sys_uptime = htonl(MILLIUPTIME(time_uptime)); getnanotime(&ts); header->unix_secs = htonl(ts.tv_sec); header->seq_num = htonl(atomic_fetchadd_32(&fe->flow9_seq, 1)); header->count = htons(t->count); header->source_id = htonl(fe->domain_id); if (priv->export9 != NULL) NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export9, flags); else NG_FREE_ITEM(item); free(t, M_NETFLOW_GENERAL); return (error); } /* Add V9 record to dgram. */ int export9_add(item_p item, struct netflow_v9_packet_opt *t, struct flow_entry *fle) { size_t len = 0; struct netflow_v9_flowset_header fsh; struct netflow_v9_record_general rg; struct mbuf *m = NGI_M(item); uint16_t flow_type; struct flow_entry_data *fed; #ifdef INET6 struct flow6_entry_data *fed6; #endif if (t == NULL) { CTR0(KTR_NET, "ng_netflow: V9 export packet without tag!"); return (0); } /* Prepare flow record */ fed = (struct flow_entry_data *)&fle->f; #ifdef INET6 fed6 = (struct flow6_entry_data *)&fle->f; #endif /* We can use flow_type field since fle6 offset is equal to fle */ flow_type = fed->r.flow_type; switch (flow_type) { case NETFLOW_V9_FLOW_V4_L4: { /* IPv4 TCP/UDP/[SCTP] */ struct netflow_v9_record_ipv4_tcp *rec = &rg.rec.v4_tcp; rec->src_addr = fed->r.r_src.s_addr; rec->dst_addr = fed->r.r_dst.s_addr; rec->next_hop = fed->next_hop.s_addr; rec->i_ifx = htons(fed->fle_i_ifx); rec->o_ifx = htons(fed->fle_o_ifx); rec->i_packets = htonl(fed->packets); rec->i_octets = htonl(fed->bytes); rec->o_packets = htonl(0); rec->o_octets = htonl(0); rec->first = htonl(MILLIUPTIME(fed->first)); rec->last = htonl(MILLIUPTIME(fed->last)); rec->s_port = fed->r.r_sport; rec->d_port = fed->r.r_dport; rec->flags = fed->tcp_flags; rec->prot = fed->r.r_ip_p; rec->tos = fed->r.r_tos; rec->dst_mask = fed->dst_mask; rec->src_mask = fed->src_mask; /* Not supported fields. */ rec->src_as = rec->dst_as = 0; len = sizeof(struct netflow_v9_record_ipv4_tcp); break; } #ifdef INET6 case NETFLOW_V9_FLOW_V6_L4: { /* IPv6 TCP/UDP/[SCTP] */ struct netflow_v9_record_ipv6_tcp *rec = &rg.rec.v6_tcp; rec->src_addr = fed6->r.src.r_src6; rec->dst_addr = fed6->r.dst.r_dst6; rec->next_hop = fed6->n.next_hop6; rec->i_ifx = htons(fed6->fle_i_ifx); rec->o_ifx = htons(fed6->fle_o_ifx); rec->i_packets = htonl(fed6->packets); rec->i_octets = htonl(fed6->bytes); rec->o_packets = htonl(0); rec->o_octets = htonl(0); rec->first = htonl(MILLIUPTIME(fed6->first)); rec->last = htonl(MILLIUPTIME(fed6->last)); rec->s_port = fed6->r.r_sport; rec->d_port = fed6->r.r_dport; rec->flags = fed6->tcp_flags; rec->prot = fed6->r.r_ip_p; rec->tos = fed6->r.r_tos; rec->dst_mask = fed6->dst_mask; rec->src_mask = fed6->src_mask; /* Not supported fields. */ rec->src_as = rec->dst_as = 0; len = sizeof(struct netflow_v9_record_ipv6_tcp); break; } #endif default: { CTR1(KTR_NET, "export9_add(): Don't know what to do with %d flow type!", flow_type); return (0); } } /* Check if new records has the same template */ if (flow_type != t->flow_type) { /* close old flowset */ if (t->flow_type != 0) close_flowset(m, t); t->flow_type = flow_type; t->flow_header = m_pktlen(m); /* Generate data flowset ID */ fsh.id = htons(NETFLOW_V9_MAX_RESERVED_FLOWSET + flow_type); fsh.length = 0; /* m_append should not fail since all data is already allocated */ if (m_append(m, sizeof(fsh), (void *)&fsh) != 1) panic("ng_netflow: m_append() failed"); } if (m_append(m, len, (void *)&rg.rec) != 1) panic("ng_netflow: m_append() failed"); t->count++; if (m_pktlen(m) + sizeof(struct netflow_v9_record_general) + sizeof(struct netflow_v9_flowset_header) >= _NETFLOW_V9_MAX_SIZE(t->mtu)) return (1); /* end of datagram */ return (0); } /* * Detach export datagram from fib instance, if there is any. * If there is no, allocate a new one. */ item_p get_export9_dgram(priv_p priv, fib_export_p fe, struct netflow_v9_packet_opt **tt) { item_p item = NULL; struct netflow_v9_packet_opt *t = NULL; mtx_lock(&fe->export9_mtx); if (fe->exp.item9 != NULL) { item = fe->exp.item9; fe->exp.item9 = NULL; t = fe->exp.item9_opt; fe->exp.item9_opt = NULL; } mtx_unlock(&fe->export9_mtx); if (item == NULL) { struct netflow_v9_export_dgram *dgram; struct mbuf *m; uint16_t mtu = priv->mtu; /* Allocate entire packet at once, allowing easy m_append() calls */ m = m_getm(NULL, mtu, M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); t = malloc(sizeof(struct netflow_v9_packet_opt), M_NETFLOW_GENERAL, M_NOWAIT | M_ZERO); if (t == NULL) { m_free(m); return (NULL); } item = ng_package_data(m, NG_NOFLAGS); if (item == NULL) { free(t, M_NETFLOW_GENERAL); return (NULL); } dgram = mtod(m, struct netflow_v9_export_dgram *); dgram->header.count = 0; dgram->header.version = htons(NETFLOW_V9); /* Set mbuf current data length */ m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v9_header); t->count = 0; t->mtu = mtu; t->flow_header = m->m_len; /* * Check if we need to insert templates into packet */ struct netflow_v9_flowset_header *fl; if ((time_uptime >= priv->templ_time + fe->templ_last_ts) || (fe->sent_packets >= priv->templ_packets + fe->templ_last_pkt)) { fe->templ_last_ts = time_uptime; fe->templ_last_pkt = fe->sent_packets; fl = priv->v9_flowsets[0]; m_append(m, ntohs(fl->length), (void *)fl); t->flow_header = m->m_len; t->count += priv->flowset_records[0]; } } *tt = t; return (item); } /* * Re-attach incomplete datagram back to fib instance. * If there is already another one, then send incomplete. */ void return_export9_dgram(priv_p priv, fib_export_p fe, item_p item, struct netflow_v9_packet_opt *t, int flags) { /* * It may happen on SMP, that some thread has already * put its item there, in this case we bail out and * send what we have to collector. */ mtx_lock(&fe->export9_mtx); if (fe->exp.item9 == NULL) { fe->exp.item9 = item; fe->exp.item9_opt = t; mtx_unlock(&fe->export9_mtx); } else { mtx_unlock(&fe->export9_mtx); export9_send(priv, fe, item, t, flags); } } /* Allocate memory and set up flow cache */ void ng_netflow_v9_cache_init(priv_p priv) { generate_v9_templates(priv); priv->templ_time = NETFLOW_V9_MAX_TIME_TEMPL; priv->templ_packets = NETFLOW_V9_MAX_PACKETS_TEMPL; priv->mtu = BASE_MTU; } /* Free all flow cache memory. Called from ng_netflow_cache_flush() */ void ng_netflow_v9_cache_flush(priv_p priv) { int i; /* Free flowsets*/ for (i = 0; i < priv->flowsets_count; i++) free(priv->v9_flowsets[i], M_NETFLOW_GENERAL); } /* Get a snapshot of NetFlow v9 settings */ void ng_netflow_copyv9info(priv_p priv, struct ng_netflow_v9info *i) { i->templ_time = priv->templ_time; i->templ_packets = priv->templ_packets; i->mtu = priv->mtu; } Index: projects/clang380-import/sys/netgraph/netflow/ng_netflow.c =================================================================== --- projects/clang380-import/sys/netgraph/netflow/ng_netflow.c (revision 293686) +++ projects/clang380-import/sys/netgraph/netflow/ng_netflow.c (revision 293687) @@ -1,1036 +1,1037 @@ /*- * Copyright (c) 2010-2011 Alexander V. Chernikov * Copyright (c) 2004-2005 Gleb Smirnoff * Copyright (c) 2001-2003 Roman V. Palagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $SourceForge: ng_netflow.c,v 1.30 2004/09/05 11:37:43 glebius Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Netgraph methods */ static ng_constructor_t ng_netflow_constructor; static ng_rcvmsg_t ng_netflow_rcvmsg; static ng_close_t ng_netflow_close; static ng_shutdown_t ng_netflow_rmnode; static ng_newhook_t ng_netflow_newhook; static ng_rcvdata_t ng_netflow_rcvdata; static ng_disconnect_t ng_netflow_disconnect; /* Parse type for struct ng_netflow_info */ static const struct ng_parse_struct_field ng_netflow_info_type_fields[] = NG_NETFLOW_INFO_TYPE; static const struct ng_parse_type ng_netflow_info_type = { &ng_parse_struct_type, &ng_netflow_info_type_fields }; /* Parse type for struct ng_netflow_ifinfo */ static const struct ng_parse_struct_field ng_netflow_ifinfo_type_fields[] = NG_NETFLOW_IFINFO_TYPE; static const struct ng_parse_type ng_netflow_ifinfo_type = { &ng_parse_struct_type, &ng_netflow_ifinfo_type_fields }; /* Parse type for struct ng_netflow_setdlt */ static const struct ng_parse_struct_field ng_netflow_setdlt_type_fields[] = NG_NETFLOW_SETDLT_TYPE; static const struct ng_parse_type ng_netflow_setdlt_type = { &ng_parse_struct_type, &ng_netflow_setdlt_type_fields }; /* Parse type for ng_netflow_setifindex */ static const struct ng_parse_struct_field ng_netflow_setifindex_type_fields[] = NG_NETFLOW_SETIFINDEX_TYPE; static const struct ng_parse_type ng_netflow_setifindex_type = { &ng_parse_struct_type, &ng_netflow_setifindex_type_fields }; /* Parse type for ng_netflow_settimeouts */ static const struct ng_parse_struct_field ng_netflow_settimeouts_type_fields[] = NG_NETFLOW_SETTIMEOUTS_TYPE; static const struct ng_parse_type ng_netflow_settimeouts_type = { &ng_parse_struct_type, &ng_netflow_settimeouts_type_fields }; /* Parse type for ng_netflow_setconfig */ static const struct ng_parse_struct_field ng_netflow_setconfig_type_fields[] = NG_NETFLOW_SETCONFIG_TYPE; static const struct ng_parse_type ng_netflow_setconfig_type = { &ng_parse_struct_type, &ng_netflow_setconfig_type_fields }; /* Parse type for ng_netflow_settemplate */ static const struct ng_parse_struct_field ng_netflow_settemplate_type_fields[] = NG_NETFLOW_SETTEMPLATE_TYPE; static const struct ng_parse_type ng_netflow_settemplate_type = { &ng_parse_struct_type, &ng_netflow_settemplate_type_fields }; /* Parse type for ng_netflow_setmtu */ static const struct ng_parse_struct_field ng_netflow_setmtu_type_fields[] = NG_NETFLOW_SETMTU_TYPE; static const struct ng_parse_type ng_netflow_setmtu_type = { &ng_parse_struct_type, &ng_netflow_setmtu_type_fields }; /* Parse type for struct ng_netflow_v9info */ static const struct ng_parse_struct_field ng_netflow_v9info_type_fields[] = NG_NETFLOW_V9INFO_TYPE; static const struct ng_parse_type ng_netflow_v9info_type = { &ng_parse_struct_type, &ng_netflow_v9info_type_fields }; /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ng_netflow_cmds[] = { { NGM_NETFLOW_COOKIE, NGM_NETFLOW_INFO, "info", NULL, &ng_netflow_info_type }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_IFINFO, "ifinfo", &ng_parse_uint16_type, &ng_netflow_ifinfo_type }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETDLT, "setdlt", &ng_netflow_setdlt_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETIFINDEX, "setifindex", &ng_netflow_setifindex_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETTIMEOUTS, "settimeouts", &ng_netflow_settimeouts_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETCONFIG, "setconfig", &ng_netflow_setconfig_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETTEMPLATE, "settemplate", &ng_netflow_settemplate_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETMTU, "setmtu", &ng_netflow_setmtu_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_V9INFO, "v9info", NULL, &ng_netflow_v9info_type }, { 0 } }; /* Netgraph node type descriptor */ static struct ng_type ng_netflow_typestruct = { .version = NG_ABI_VERSION, .name = NG_NETFLOW_NODE_TYPE, .constructor = ng_netflow_constructor, .rcvmsg = ng_netflow_rcvmsg, .close = ng_netflow_close, .shutdown = ng_netflow_rmnode, .newhook = ng_netflow_newhook, .rcvdata = ng_netflow_rcvdata, .disconnect = ng_netflow_disconnect, .cmdlist = ng_netflow_cmds, }; NETGRAPH_INIT(netflow, &ng_netflow_typestruct); /* Called at node creation */ static int ng_netflow_constructor(node_p node) { priv_p priv; int i; /* Initialize private data */ priv = malloc(sizeof(*priv), M_NETGRAPH, M_WAITOK | M_ZERO); /* Initialize fib data */ priv->maxfibs = rt_numfibs; priv->fib_data = malloc(sizeof(fib_export_p) * priv->maxfibs, M_NETGRAPH, M_WAITOK | M_ZERO); /* Make node and its data point at each other */ NG_NODE_SET_PRIVATE(node, priv); priv->node = node; /* Initialize timeouts to default values */ priv->nfinfo_inact_t = INACTIVE_TIMEOUT; priv->nfinfo_act_t = ACTIVE_TIMEOUT; /* Set default config */ for (i = 0; i < NG_NETFLOW_MAXIFACES; i++) priv->ifaces[i].info.conf = NG_NETFLOW_CONF_INGRESS; /* Initialize callout handle */ callout_init(&priv->exp_callout, 1); /* Allocate memory and set up flow cache */ ng_netflow_cache_init(priv); return (0); } /* * ng_netflow supports two hooks: data and export. * Incoming traffic is expected on data, and expired * netflow datagrams are sent to export. */ static int ng_netflow_newhook(node_p node, hook_p hook, const char *name) { const priv_p priv = NG_NODE_PRIVATE(node); if (strncmp(name, NG_NETFLOW_HOOK_DATA, /* an iface hook? */ strlen(NG_NETFLOW_HOOK_DATA)) == 0) { iface_p iface; int ifnum = -1; const char *cp; char *eptr; cp = name + strlen(NG_NETFLOW_HOOK_DATA); if (!isdigit(*cp) || (cp[0] == '0' && cp[1] != '\0')) return (EINVAL); ifnum = (int)strtoul(cp, &eptr, 10); if (*eptr != '\0' || ifnum < 0 || ifnum >= NG_NETFLOW_MAXIFACES) return (EINVAL); /* See if hook is already connected */ if (priv->ifaces[ifnum].hook != NULL) return (EISCONN); iface = &priv->ifaces[ifnum]; /* Link private info and hook together */ NG_HOOK_SET_PRIVATE(hook, iface); iface->hook = hook; /* * In most cases traffic accounting is done on an * Ethernet interface, so default data link type * will be DLT_EN10MB. */ iface->info.ifinfo_dlt = DLT_EN10MB; } else if (strncmp(name, NG_NETFLOW_HOOK_OUT, strlen(NG_NETFLOW_HOOK_OUT)) == 0) { iface_p iface; int ifnum = -1; const char *cp; char *eptr; cp = name + strlen(NG_NETFLOW_HOOK_OUT); if (!isdigit(*cp) || (cp[0] == '0' && cp[1] != '\0')) return (EINVAL); ifnum = (int)strtoul(cp, &eptr, 10); if (*eptr != '\0' || ifnum < 0 || ifnum >= NG_NETFLOW_MAXIFACES) return (EINVAL); /* See if hook is already connected */ if (priv->ifaces[ifnum].out != NULL) return (EISCONN); iface = &priv->ifaces[ifnum]; /* Link private info and hook together */ NG_HOOK_SET_PRIVATE(hook, iface); iface->out = hook; } else if (strcmp(name, NG_NETFLOW_HOOK_EXPORT) == 0) { if (priv->export != NULL) return (EISCONN); /* Netflow version 5 supports 32-bit counters only */ if (CNTR_MAX == UINT64_MAX) return (EINVAL); priv->export = hook; /* Exporter is ready. Let's schedule expiry. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } else if (strcmp(name, NG_NETFLOW_HOOK_EXPORT9) == 0) { if (priv->export9 != NULL) return (EISCONN); priv->export9 = hook; /* Exporter is ready. Let's schedule expiry. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } else return (EINVAL); return (0); } /* Get a netgraph control message. */ static int ng_netflow_rcvmsg (node_p node, item_p item, hook_p lasthook) { const priv_p priv = NG_NODE_PRIVATE(node); struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; NGI_GET_MSG(item, msg); /* Deal with message according to cookie and command */ switch (msg->header.typecookie) { case NGM_NETFLOW_COOKIE: switch (msg->header.cmd) { case NGM_NETFLOW_INFO: { struct ng_netflow_info *i; NG_MKRESPONSE(resp, msg, sizeof(struct ng_netflow_info), M_NOWAIT); i = (struct ng_netflow_info *)resp->data; ng_netflow_copyinfo(priv, i); break; } case NGM_NETFLOW_IFINFO: { struct ng_netflow_ifinfo *i; const uint16_t *index; if (msg->header.arglen != sizeof(uint16_t)) ERROUT(EINVAL); index = (uint16_t *)msg->data; if (*index >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); /* connected iface? */ if (priv->ifaces[*index].hook == NULL) ERROUT(EINVAL); NG_MKRESPONSE(resp, msg, sizeof(struct ng_netflow_ifinfo), M_NOWAIT); i = (struct ng_netflow_ifinfo *)resp->data; memcpy((void *)i, (void *)&priv->ifaces[*index].info, sizeof(priv->ifaces[*index].info)); break; } case NGM_NETFLOW_SETDLT: { struct ng_netflow_setdlt *set; struct ng_netflow_iface *iface; if (msg->header.arglen != sizeof(struct ng_netflow_setdlt)) ERROUT(EINVAL); set = (struct ng_netflow_setdlt *)msg->data; if (set->iface >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); iface = &priv->ifaces[set->iface]; /* connected iface? */ if (iface->hook == NULL) ERROUT(EINVAL); switch (set->dlt) { case DLT_EN10MB: iface->info.ifinfo_dlt = DLT_EN10MB; break; case DLT_RAW: iface->info.ifinfo_dlt = DLT_RAW; break; default: ERROUT(EINVAL); } break; } case NGM_NETFLOW_SETIFINDEX: { struct ng_netflow_setifindex *set; struct ng_netflow_iface *iface; if (msg->header.arglen != sizeof(struct ng_netflow_setifindex)) ERROUT(EINVAL); set = (struct ng_netflow_setifindex *)msg->data; if (set->iface >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); iface = &priv->ifaces[set->iface]; /* connected iface? */ if (iface->hook == NULL) ERROUT(EINVAL); iface->info.ifinfo_index = set->index; break; } case NGM_NETFLOW_SETTIMEOUTS: { struct ng_netflow_settimeouts *set; if (msg->header.arglen != sizeof(struct ng_netflow_settimeouts)) ERROUT(EINVAL); set = (struct ng_netflow_settimeouts *)msg->data; priv->nfinfo_inact_t = set->inactive_timeout; priv->nfinfo_act_t = set->active_timeout; break; } case NGM_NETFLOW_SETCONFIG: { struct ng_netflow_setconfig *set; if (msg->header.arglen != sizeof(struct ng_netflow_setconfig)) ERROUT(EINVAL); set = (struct ng_netflow_setconfig *)msg->data; if (set->iface >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); priv->ifaces[set->iface].info.conf = set->conf; break; } case NGM_NETFLOW_SETTEMPLATE: { struct ng_netflow_settemplate *set; if (msg->header.arglen != sizeof(struct ng_netflow_settemplate)) ERROUT(EINVAL); set = (struct ng_netflow_settemplate *)msg->data; priv->templ_packets = set->packets; priv->templ_time = set->time; break; } case NGM_NETFLOW_SETMTU: { struct ng_netflow_setmtu *set; if (msg->header.arglen != sizeof(struct ng_netflow_setmtu)) ERROUT(EINVAL); set = (struct ng_netflow_setmtu *)msg->data; if ((set->mtu < MIN_MTU) || (set->mtu > MAX_MTU)) ERROUT(EINVAL); priv->mtu = set->mtu; break; } case NGM_NETFLOW_SHOW: if (msg->header.arglen != sizeof(struct ngnf_show_header)) ERROUT(EINVAL); NG_MKRESPONSE(resp, msg, NGRESP_SIZE, M_NOWAIT); if (!resp) ERROUT(ENOMEM); error = ng_netflow_flow_show(priv, (struct ngnf_show_header *)msg->data, (struct ngnf_show_header *)resp->data); if (error) NG_FREE_MSG(resp); break; case NGM_NETFLOW_V9INFO: { struct ng_netflow_v9info *i; NG_MKRESPONSE(resp, msg, sizeof(struct ng_netflow_v9info), M_NOWAIT); i = (struct ng_netflow_v9info *)resp->data; ng_netflow_copyv9info(priv, i); break; } default: ERROUT(EINVAL); /* unknown command */ break; } break; default: ERROUT(EINVAL); /* incorrect cookie */ break; } /* * Take care of synchronous response, if any. * Free memory and return. */ done: NG_RESPOND_MSG(error, node, item, resp); NG_FREE_MSG(msg); return (error); } /* Receive data on hook. */ static int ng_netflow_rcvdata (hook_p hook, item_p item) { const node_p node = NG_HOOK_NODE(hook); const priv_p priv = NG_NODE_PRIVATE(node); const iface_p iface = NG_HOOK_PRIVATE(hook); hook_p out; struct mbuf *m = NULL, *m_old = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct m_tag *mtag; int pullup_len = 0, off; uint8_t acct = 0, bypass = 0, flags = 0, upper_proto = 0; int error = 0, l3_off = 0; unsigned int src_if_index; caddr_t upper_ptr = NULL; fib_export_p fe; uint32_t fib; if ((hook == priv->export) || (hook == priv->export9)) { /* * Data arrived on export hook. * This must not happen. */ log(LOG_ERR, "ng_netflow: incoming data on export hook!\n"); ERROUT(EINVAL); }; if (hook == iface->hook) { if ((iface->info.conf & NG_NETFLOW_CONF_INGRESS) == 0) bypass = 1; out = iface->out; } else if (hook == iface->out) { if ((iface->info.conf & NG_NETFLOW_CONF_EGRESS) == 0) bypass = 1; out = iface->hook; } else ERROUT(EINVAL); if ((!bypass) && (iface->info.conf & (NG_NETFLOW_CONF_ONCE | NG_NETFLOW_CONF_THISONCE))) { mtag = m_tag_locate(NGI_M(item), MTAG_NETFLOW, MTAG_NETFLOW_CALLED, NULL); while (mtag != NULL) { if ((iface->info.conf & NG_NETFLOW_CONF_ONCE) || ((ng_ID_t *)(mtag + 1))[0] == NG_NODE_ID(node)) { bypass = 1; break; } mtag = m_tag_locate(NGI_M(item), MTAG_NETFLOW, MTAG_NETFLOW_CALLED, mtag); } } if (bypass) { if (out == NULL) ERROUT(ENOTCONN); NG_FWD_ITEM_HOOK(error, item, out); return (error); } if (iface->info.conf & (NG_NETFLOW_CONF_ONCE | NG_NETFLOW_CONF_THISONCE)) { mtag = m_tag_alloc(MTAG_NETFLOW, MTAG_NETFLOW_CALLED, sizeof(ng_ID_t), M_NOWAIT); if (mtag) { ((ng_ID_t *)(mtag + 1))[0] = NG_NODE_ID(node); m_tag_prepend(NGI_M(item), mtag); } } /* Import configuration flags related to flow creation */ flags = iface->info.conf & NG_NETFLOW_FLOW_FLAGS; NGI_GET_M(item, m); m_old = m; /* Increase counters. */ iface->info.ifinfo_packets++; /* * Depending on interface data link type and packet contents * we pullup enough data, so that ng_netflow_flow_add() does not * need to know about mbuf at all. We keep current length of data * needed to be contiguous in pullup_len. mtod() is done at the * very end one more time, since m can had changed after pulluping. * * In case of unrecognized data we don't return error, but just * pass data to downstream hook, if it is available. */ #define M_CHECK(length) do { \ pullup_len += length; \ if (((m)->m_pkthdr.len < (pullup_len)) || \ ((pullup_len) > MHLEN)) { \ error = EINVAL; \ goto bypass; \ } \ if ((m)->m_len < (pullup_len) && \ (((m) = m_pullup((m),(pullup_len))) == NULL)) { \ error = ENOBUFS; \ goto done; \ } \ } while (0) switch (iface->info.ifinfo_dlt) { case DLT_EN10MB: /* Ethernet */ { struct ether_header *eh; uint16_t etype; M_CHECK(sizeof(struct ether_header)); eh = mtod(m, struct ether_header *); /* Make sure this is IP frame. */ etype = ntohs(eh->ether_type); switch (etype) { case ETHERTYPE_IP: M_CHECK(sizeof(struct ip)); eh = mtod(m, struct ether_header *); ip = (struct ip *)(eh + 1); l3_off = sizeof(struct ether_header); break; #ifdef INET6 case ETHERTYPE_IPV6: /* * m_pullup() called by M_CHECK() pullups * kern.ipc.max_protohdr (default 60 bytes) * which is enough. */ M_CHECK(sizeof(struct ip6_hdr)); eh = mtod(m, struct ether_header *); ip6 = (struct ip6_hdr *)(eh + 1); l3_off = sizeof(struct ether_header); break; #endif case ETHERTYPE_VLAN: { struct ether_vlan_header *evh; M_CHECK(sizeof(struct ether_vlan_header) - sizeof(struct ether_header)); evh = mtod(m, struct ether_vlan_header *); etype = ntohs(evh->evl_proto); l3_off = sizeof(struct ether_vlan_header); if (etype == ETHERTYPE_IP) { M_CHECK(sizeof(struct ip)); ip = (struct ip *)(evh + 1); break; #ifdef INET6 } else if (etype == ETHERTYPE_IPV6) { M_CHECK(sizeof(struct ip6_hdr)); ip6 = (struct ip6_hdr *)(evh + 1); break; #endif } } default: goto bypass; /* pass this frame */ } break; } case DLT_RAW: /* IP packets */ M_CHECK(sizeof(struct ip)); ip = mtod(m, struct ip *); /* l3_off is already zero */ #ifdef INET6 /* * If INET6 is not defined IPv6 packets * will be discarded in ng_netflow_flow_add(). */ if (ip->ip_v == IP6VERSION) { ip = NULL; M_CHECK(sizeof(struct ip6_hdr) - sizeof(struct ip)); ip6 = mtod(m, struct ip6_hdr *); } #endif break; default: goto bypass; break; } off = pullup_len; if ((ip != NULL) && ((ip->ip_off & htons(IP_OFFMASK)) == 0)) { if ((ip->ip_v != IPVERSION) || ((ip->ip_hl << 2) < sizeof(struct ip))) goto bypass; /* * In case of IPv4 header with options, we haven't pulled * up enough, yet. */ M_CHECK((ip->ip_hl << 2) - sizeof(struct ip)); /* Save upper layer offset and proto */ off = pullup_len; upper_proto = ip->ip_p; /* * XXX: in case of wrong upper layer header we will * forward this packet but skip this record in netflow. */ switch (ip->ip_p) { case IPPROTO_TCP: M_CHECK(sizeof(struct tcphdr)); break; case IPPROTO_UDP: M_CHECK(sizeof(struct udphdr)); break; case IPPROTO_SCTP: M_CHECK(sizeof(struct sctphdr)); break; } } else if (ip != NULL) { /* * Nothing to save except upper layer proto, * since this is a packet fragment. */ flags |= NG_NETFLOW_IS_FRAG; upper_proto = ip->ip_p; if ((ip->ip_v != IPVERSION) || ((ip->ip_hl << 2) < sizeof(struct ip))) goto bypass; #ifdef INET6 } else if (ip6 != NULL) { int cur = ip6->ip6_nxt, hdr_off = 0; struct ip6_ext *ip6e; struct ip6_frag *ip6f; if (priv->export9 == NULL) goto bypass; /* Save upper layer info. */ off = pullup_len; upper_proto = cur; if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) goto bypass; /* * Loop thru IPv6 extended headers to get upper * layer header / frag. */ for (;;) { switch (cur) { /* * Same as in IPv4, we can forward a 'bad' * packet without accounting. */ case IPPROTO_TCP: M_CHECK(sizeof(struct tcphdr)); goto loopend; case IPPROTO_UDP: M_CHECK(sizeof(struct udphdr)); goto loopend; case IPPROTO_SCTP: M_CHECK(sizeof(struct sctphdr)); goto loopend; /* Loop until 'real' upper layer headers */ case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: M_CHECK(sizeof(struct ip6_ext)); ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); upper_proto = ip6e->ip6e_nxt; hdr_off = (ip6e->ip6e_len + 1) << 3; break; /* RFC4302, can be before DSTOPTS */ case IPPROTO_AH: M_CHECK(sizeof(struct ip6_ext)); ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); upper_proto = ip6e->ip6e_nxt; hdr_off = (ip6e->ip6e_len + 2) << 2; break; case IPPROTO_FRAGMENT: M_CHECK(sizeof(struct ip6_frag)); ip6f = (struct ip6_frag *)(mtod(m, caddr_t) + off); upper_proto = ip6f->ip6f_nxt; hdr_off = sizeof(struct ip6_frag); off += hdr_off; flags |= NG_NETFLOW_IS_FRAG; goto loopend; #if 0 case IPPROTO_NONE: goto loopend; #endif /* * Any unknown header (new extension or IPv6/IPv4 * header for tunnels) ends loop. */ default: goto loopend; } off += hdr_off; cur = upper_proto; } #endif } #undef M_CHECK #ifdef INET6 loopend: #endif /* Just in case of real reallocation in M_CHECK() / m_pullup() */ if (m != m_old) { priv->nfinfo_realloc_mbuf++; /* Restore ip/ipv6 pointer */ if (ip != NULL) ip = (struct ip *)(mtod(m, caddr_t) + l3_off); else if (ip6 != NULL) ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + l3_off); } upper_ptr = (caddr_t)(mtod(m, caddr_t) + off); /* Determine packet input interface. Prefer configured. */ src_if_index = 0; if (hook == iface->out || iface->info.ifinfo_index == 0) { if (m->m_pkthdr.rcvif != NULL) src_if_index = m->m_pkthdr.rcvif->if_index; } else src_if_index = iface->info.ifinfo_index; /* Check packet FIB */ fib = M_GETFIB(m); if (fib >= priv->maxfibs) { CTR2(KTR_NET, "ng_netflow_rcvdata(): packet fib %d is out of " "range of available fibs: 0 .. %d", fib, priv->maxfibs); goto bypass; } if ((fe = priv_to_fib(priv, fib)) == NULL) { /* Setup new FIB */ if (ng_netflow_fib_init(priv, fib) != 0) { /* malloc() failed */ goto bypass; } fe = priv_to_fib(priv, fib); } if (ip != NULL) error = ng_netflow_flow_add(priv, fe, ip, upper_ptr, upper_proto, flags, src_if_index); #ifdef INET6 else if (ip6 != NULL) error = ng_netflow_flow6_add(priv, fe, ip6, upper_ptr, upper_proto, flags, src_if_index); #endif else goto bypass; acct = 1; bypass: if (out != NULL) { if (acct == 0) { /* Accounting failure */ if (ip != NULL) { counter_u64_add(priv->nfinfo_spackets, 1); counter_u64_add(priv->nfinfo_sbytes, m->m_pkthdr.len); } else if (ip6 != NULL) { counter_u64_add(priv->nfinfo_spackets6, 1); counter_u64_add(priv->nfinfo_sbytes6, m->m_pkthdr.len); } } /* XXX: error gets overwritten here */ NG_FWD_NEW_DATA(error, item, out, m); return (error); } done: if (item) NG_FREE_ITEM(item); if (m) NG_FREE_M(m); return (error); } /* We will be shut down in a moment */ static int ng_netflow_close(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); callout_drain(&priv->exp_callout); ng_netflow_cache_flush(priv); return (0); } /* Do local shutdown processing. */ static int ng_netflow_rmnode(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); NG_NODE_SET_PRIVATE(node, NULL); NG_NODE_UNREF(priv->node); free(priv->fib_data, M_NETGRAPH); free(priv, M_NETGRAPH); return (0); } /* Hook disconnection. */ static int ng_netflow_disconnect(hook_p hook) { node_p node = NG_HOOK_NODE(hook); priv_p priv = NG_NODE_PRIVATE(node); iface_p iface = NG_HOOK_PRIVATE(hook); if (iface != NULL) { if (iface->hook == hook) iface->hook = NULL; if (iface->out == hook) iface->out = NULL; } /* if export hook disconnected stop running expire(). */ if (hook == priv->export) { if (priv->export9 == NULL) callout_drain(&priv->exp_callout); priv->export = NULL; } if (hook == priv->export9) { if (priv->export == NULL) callout_drain(&priv->exp_callout); priv->export9 = NULL; } /* Removal of the last link destroys the node. */ if (NG_NODE_NUMHOOKS(node) == 0) ng_rmnode_self(node); return (0); } Index: projects/clang380-import/sys/netinet/in_fib.c =================================================================== --- projects/clang380-import/sys/netinet/in_fib.c (revision 293686) +++ projects/clang380-import/sys/netinet/in_fib.c (revision 293687) @@ -1,225 +1,232 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_route.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #ifdef INET static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4); static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4); #define RNTORT(p) ((struct rtentry *)(p)) static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4) { struct sockaddr_in *gw; if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = rte->rt_ifa->ifa_ifp; else pnh4->nh_ifp = rte->rt_ifp; pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in *)rte->rt_gateway; pnh4->nh_addr = gw->sin_addr; } else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; /* TODO: Handle RTF_BROADCAST here */ } static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4) { struct sockaddr_in *gw; struct in_ifaddr *ia; if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = rte->rt_ifa->ifa_ifp; else pnh4->nh_ifp = rte->rt_ifp; pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in *)rte->rt_gateway; pnh4->nh_addr = gw->sin_addr; } else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; /* XXX: Set RTF_BROADCAST if GW address is broadcast */ ia = ifatoia(rte->rt_ifa); pnh4->nh_src = IA_SIN(ia)->sin_addr; } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address on interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4) { struct radix_node_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct rtentry *rte; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RADIX_NODE_HEAD_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib4_rte_to_nh_basic(rte, dst, flags, pnh4); RADIX_NODE_HEAD_RUNLOCK(rh); return (0); } } RADIX_NODE_HEAD_RUNLOCK(rh); return (ENOENT); } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores extende nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. * - in that case you need to call fib4_free_nh_ext() * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address of interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_extended *pnh4) { struct radix_node_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct rtentry *rte; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RADIX_NODE_HEAD_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); +#ifdef RADIX_MPATH + rte = rt_mpath_select(rte, flowid); + if (rte == NULL) { + RADIX_NODE_HEAD_RUNLOCK(rh); + return (ENOENT); + } +#endif /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib4_rte_to_nh_extended(rte, dst, flags, pnh4); if ((flags & NHR_REF) != 0) { /* TODO: lwref on egress ifp's ? */ } RADIX_NODE_HEAD_RUNLOCK(rh); return (0); } } RADIX_NODE_HEAD_RUNLOCK(rh); return (ENOENT); } void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4) { } #endif Index: projects/clang380-import/sys/netinet/in_pcb.c =================================================================== --- projects/clang380-import/sys/netinet/in_pcb.c (revision 293686) +++ projects/clang380-import/sys/netinet/in_pcb.c (revision 293687) @@ -1,2657 +1,2658 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993, 1995 * The Regents of the University of California. * Copyright (c) 2007-2009 Robert N. M. Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_pcbgroup.h" #include "opt_rss.h" #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #include #endif #ifdef INET #include #endif #ifdef INET6 #include #include #include #include #endif /* INET6 */ #ifdef IPSEC #include #include #endif /* IPSEC */ #include static struct callout ipport_tick_callout; /* * These configure the range of local port addresses assigned to * "unspecified" outgoing connections/packets/whatever. */ VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */ VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */ VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */ VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */ VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */ VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */ /* * Reserved ports accessible only to root. There are significant * security considerations that must be accounted for when changing these, * but the security benefits can be great. Please be careful. */ VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */ VNET_DEFINE(int, ipport_reservedlow); /* Variables dealing with random ephemeral port allocation. */ VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */ VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */ VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */ VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */ VNET_DEFINE(int, ipport_tcpallocs); static VNET_DEFINE(int, ipport_tcplastcount); #define V_ipport_tcplastcount VNET(ipport_tcplastcount) static void in_pcbremlists(struct inpcb *inp); #ifdef INET static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp); #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } static int sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, arg1, arg2, req); if (error == 0) { RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1); RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX); RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX); RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX); RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX); } return (error); } #undef RANGECHK static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipport_randomized), 0, "Enable random port allocation"); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port " "allocations before switching to a sequental one"); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipport_randomtime), 0, "Minimum time to keep sequental port " "allocation before switching to a random one"); #endif /* INET */ /* * in_pcb.c: manage the Protocol Control Blocks. * * NOTE: It is assumed that most of these functions will be called with * the pcbinfo lock held, and often, the inpcb lock held, as these utility * functions often modify hash chains or addresses in pcbs. */ /* * Initialize an inpcbinfo -- we should be able to reduce the number of * arguments in time. */ void in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, uint32_t inpcbzone_flags, u_int hashfields) { INP_INFO_LOCK_INIT(pcbinfo, name); INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */ INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist"); #ifdef VIMAGE pcbinfo->ipi_vnet = curvnet; #endif pcbinfo->ipi_listhead = listhead; LIST_INIT(pcbinfo->ipi_listhead); pcbinfo->ipi_count = 0; pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, &pcbinfo->ipi_hashmask); pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, &pcbinfo->ipi_porthashmask); #ifdef PCBGROUP in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); #endif pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, inpcbzone_flags); uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); uma_zone_set_warning(pcbinfo->ipi_zone, "kern.ipc.maxsockets limit reached"); } /* * Destroy an inpcbinfo. */ void in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) { KASSERT(pcbinfo->ipi_count == 0, ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count)); hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, pcbinfo->ipi_porthashmask); #ifdef PCBGROUP in_pcbgroup_destroy(pcbinfo); #endif uma_zdestroy(pcbinfo->ipi_zone); INP_LIST_LOCK_DESTROY(pcbinfo); INP_HASH_LOCK_DESTROY(pcbinfo); INP_INFO_LOCK_DESTROY(pcbinfo); } /* * Allocate a PCB and associate it with the socket. * On success return with the PCB locked. */ int in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) { struct inpcb *inp; int error; #ifdef INVARIANTS if (pcbinfo == &V_tcbinfo) { INP_INFO_RLOCK_ASSERT(pcbinfo); } else { INP_INFO_WLOCK_ASSERT(pcbinfo); } #endif error = 0; inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); if (inp == NULL) return (ENOBUFS); bzero(inp, inp_zero_size); inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; inp->inp_cred = crhold(so->so_cred); inp->inp_inc.inc_fibnum = so->so_fibnum; #ifdef MAC error = mac_inpcb_init(inp, M_NOWAIT); if (error != 0) goto out; mac_inpcb_create(so, inp); #endif #ifdef IPSEC error = ipsec_init_policy(so, &inp->inp_sp); if (error != 0) { #ifdef MAC mac_inpcb_destroy(inp); #endif goto out; } #endif /*IPSEC*/ #ifdef INET6 if (INP_SOCKAF(so) == AF_INET6) { inp->inp_vflag |= INP_IPV6PROTO; if (V_ip6_v6only) inp->inp_flags |= IN6P_IPV6_V6ONLY; } #endif INP_WLOCK(inp); INP_LIST_WLOCK(pcbinfo); LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; #ifdef INET6 if (V_ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif inp->inp_gencnt = ++pcbinfo->ipi_gencnt; refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */ INP_LIST_WUNLOCK(pcbinfo); #if defined(IPSEC) || defined(MAC) out: if (error != 0) { crfree(inp->inp_cred); uma_zfree(pcbinfo->ipi_zone, inp); } #endif return (error); } #ifdef INET int in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { int anonport, error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); anonport = nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0; error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, &inp->inp_lport, cred); if (error) return (error); if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } #endif /* * Select a local port (number) to use. */ #if defined(INET) || defined(INET6) int in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, struct ucred *cred, int lookupflags) { struct inpcbinfo *pcbinfo; struct inpcb *tmpinp; unsigned short *lastport; int count, dorandom, error; u_short aux, first, last, lport; #ifdef INET struct in_addr laddr; #endif pcbinfo = inp->inp_pcbinfo; /* * Because no actual state changes occur here, a global write lock on * the pcbinfo isn't required. */ INP_LOCK_ASSERT(inp); INP_HASH_LOCK_ASSERT(pcbinfo); if (inp->inp_flags & INP_HIGHPORT) { first = V_ipport_hifirstauto; /* sysctl */ last = V_ipport_hilastauto; lastport = &pcbinfo->ipi_lasthi; } else if (inp->inp_flags & INP_LOWPORT) { error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); if (error) return (error); first = V_ipport_lowfirstauto; /* 1023 */ last = V_ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->ipi_lastlow; } else { first = V_ipport_firstauto; /* sysctl */ last = V_ipport_lastauto; lastport = &pcbinfo->ipi_lastport; } /* * For UDP(-Lite), use random port allocation as long as the user * allows it. For TCP (and as of yet unknown) connections, * use random port allocation only if the user allows it AND * ipport_tick() allows it. */ if (V_ipport_randomized && (!V_ipport_stoprandom || pcbinfo == &V_udbinfo || pcbinfo == &V_ulitecbinfo)) dorandom = 1; else dorandom = 0; /* * It makes no sense to do random port allocation if * we have the only port available. */ if (first == last) dorandom = 0; /* Make sure to not include UDP(-Lite) packets in the count. */ if (pcbinfo != &V_udbinfo || pcbinfo != &V_ulitecbinfo) V_ipport_tcpallocs++; /* * Instead of having two loops further down counting up or down * make sure that first is always <= last and go with only one * code path implementing all logic. */ if (first > last) { aux = first; first = last; last = aux; } #ifdef INET /* Make the compiler happy. */ laddr.s_addr = 0; if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) { KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p", __func__, inp)); laddr = *laddrp; } #endif tmpinp = NULL; /* Make compiler happy. */ lport = *lportp; if (dorandom) *lastport = first + (arc4random() % (last - first)); count = last - first; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) tmpinp = in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, lookupflags, cred); #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET tmpinp = in_pcblookup_local(pcbinfo, laddr, lport, lookupflags, cred); #endif } while (tmpinp != NULL); #ifdef INET if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) laddrp->s_addr = laddr.s_addr; #endif *lportp = lport; return (0); } /* * Return cached socket options. */ short inp_so_options(const struct inpcb *inp) { short so_options; so_options = 0; if ((inp->inp_flags2 & INP_REUSEPORT) != 0) so_options |= SO_REUSEPORT; if ((inp->inp_flags2 & INP_REUSEADDR) != 0) so_options |= SO_REUSEADDR; return (so_options); } #endif /* INET || INET6 */ /* * Check if a new BINDMULTI socket is allowed to be created. * * ni points to the new inp. * oi points to the exisitng inp. * * This checks whether the existing inp also has BINDMULTI and * whether the credentials match. */ int in_pcbbind_check_bindmulti(const struct inpcb *ni, const struct inpcb *oi) { /* Check permissions match */ if ((ni->inp_flags2 & INP_BINDMULTI) && (ni->inp_cred->cr_uid != oi->inp_cred->cr_uid)) return (0); /* Check the existing inp has BINDMULTI set */ if ((ni->inp_flags2 & INP_BINDMULTI) && ((oi->inp_flags2 & INP_BINDMULTI) == 0)) return (0); /* * We're okay - either INP_BINDMULTI isn't set on ni, or * it is and it matches the checks. */ return (1); } #ifdef INET /* * Set up a bind operation on a PCB, performing port allocation * as required, but do not actually modify the PCB. Callers can * either complete the bind by setting inp_laddr/inp_lport and * calling in_pcbinshash(), or they can just use the resulting * port and address to authorise the sending of a once-off packet. * * On error, the values of *laddrp and *lportp are not changed. */ int in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, u_short *lportp, struct ucred *cred) { struct socket *so = inp->inp_socket; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct in_addr laddr; u_short lport = 0; int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT); int error; /* * No state changes, so read locks are sufficient here. */ INP_LOCK_ASSERT(inp); INP_HASH_LOCK_ASSERT(pcbinfo); if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; if (nam != NULL && laddr.s_addr != INADDR_ANY) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip4(cred, &laddr)) != 0) return (error); } else { sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sin)) return (EINVAL); #ifdef notdef /* * We should check the family, but old programs * incorrectly fail to initialize it. */ if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); #endif error = prison_local_ip4(cred, &sin->sin_addr); if (error) return (error); if (sin->sin_port != *lportp) { /* Don't allow the port to change. */ if (*lportp != 0) return (EINVAL); lport = sin->sin_port; } /* NB: lport is left as 0 if the port isn't being changed. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow complete duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (sin->sin_addr.s_addr != INADDR_ANY) { sin->sin_port = 0; /* yech... */ bzero(&sin->sin_zero, sizeof(sin->sin_zero)); /* * Is the address a local IP address? * If INP_BINDANY is set, then the socket may be bound * to any endpoint address, local or not. */ if ((inp->inp_flags & INP_BINDANY) == 0 && ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) return (EADDRNOTAVAIL); } laddr = sin->sin_addr; if (lport) { struct inpcb *t; struct tcptw *tw; /* GROSS */ if (ntohs(lport) <= V_ipport_reservedhigh && ntohs(lport) >= V_ipport_reservedlow && priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) return (EACCES); if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT, 0) != 0) { t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, INPLOOKUP_WILDCARD, cred); /* * XXX * This entire block sorely needs a rewrite. */ if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_flags2 & INP_REUSEPORT) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); /* * If the socket is a BINDMULTI socket, then * the credentials need to match and the * original socket also has to have been bound * with BINDMULTI. */ if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, lookupflags, cred); if (t && (t->inp_flags & INP_TIMEWAIT)) { /* * XXXRW: If an incpb has had its timewait * state recycled, we treat the address as * being in use (for now). This is better * than a panic, but not desirable. */ tw = intotw(t); if (tw == NULL || (reuseport & tw->tw_so_options) == 0) return (EADDRINUSE); } else if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && (reuseport & inp_so_options(t)) == 0) { #ifdef INET6 if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (inp->inp_vflag & INP_IPV6PROTO) == 0 || (t->inp_vflag & INP_IPV6PROTO) == 0) #endif return (EADDRINUSE); if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } } } if (*lportp != 0) lport = *lportp; if (lport == 0) { error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags); if (error != 0) return (error); } *laddrp = laddr.s_addr; *lportp = lport; return (0); } /* * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred, struct mbuf *m) { u_short lport, fport; in_addr_t laddr, faddr; int anonport, error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); lport = inp->inp_lport; laddr = inp->inp_laddr.s_addr; anonport = (lport == 0); error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, NULL, cred); if (error) return (error); /* Do the initial binding of the local address if required. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } } /* Commit the remaining changes. */ inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; inp->inp_faddr.s_addr = faddr; inp->inp_fport = fport; in_pcbrehash_mbuf(inp, m); if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } int in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { return (in_pcbconnect_mbuf(inp, nam, cred, NULL)); } /* * Do proper source address selection on an unbound socket in case * of connect. Take jails into account as well. */ int in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, struct ucred *cred) { struct ifaddr *ifa; struct sockaddr *sa; struct sockaddr_in *sin; struct route sro; int error; KASSERT(laddr != NULL, ("%s: laddr NULL", __func__)); /* * Bypass source address selection and use the primary jail IP * if requested. */ if (cred != NULL && !prison_saddrsel_ip4(cred, laddr)) return (0); error = 0; bzero(&sro, sizeof(sro)); sin = (struct sockaddr_in *)&sro.ro_dst; sin->sin_family = AF_INET; sin->sin_len = sizeof(struct sockaddr_in); sin->sin_addr.s_addr = faddr->s_addr; /* * If route is known our src addr is taken from the i/f, * else punt. * * Find out route to destination. */ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum); /* * If we found a route, use the address corresponding to * the outgoing interface. * * Otherwise assume faddr is reachable on a directly connected * network and try to find a corresponding interface to take * the source address from. */ if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) { struct in_ifaddr *ia; struct ifnet *ifp; ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin, inp->inp_socket->so_fibnum)); if (ia == NULL) ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0, inp->inp_socket->so_fibnum)); if (ia == NULL) { error = ENETUNREACH; goto done; } if (cred == NULL || !prison_flag(cred, PR_IP4)) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; ifa_free(&ia->ia_ifa); goto done; } ifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); ia = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; sin = (struct sockaddr_in *)sa; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)ifa; break; } } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; IF_ADDR_RUNLOCK(ifp); goto done; } IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); goto done; } /* * If the outgoing interface on the route found is not * a loopback interface, use the address from that interface. * In case of jails do those three steps: * 1. check if the interface address belongs to the jail. If so use it. * 2. check if we have any address on the outgoing interface * belonging to this jail. If so use it. * 3. as a last resort return the 'default' jail address. */ if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) { struct in_ifaddr *ia; struct ifnet *ifp; /* If not jailed, use the default returned. */ if (cred == NULL || !prison_flag(cred, PR_IP4)) { ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; laddr->s_addr = ia->ia_addr.sin_addr.s_addr; goto done; } /* Jailed. */ /* 1. Check if the iface address belongs to the jail. */ sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; laddr->s_addr = ia->ia_addr.sin_addr.s_addr; goto done; } /* * 2. Check if we have any address on the outgoing interface * belonging to this jail. */ ia = NULL; ifp = sro.ro_rt->rt_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; sin = (struct sockaddr_in *)sa; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)ifa; break; } } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; IF_ADDR_RUNLOCK(ifp); goto done; } IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); goto done; } /* * The outgoing interface is marked with 'loopback net', so a route * to ourselves is here. * Try to find the interface of the destination address and then * take the address from there. That interface is not necessarily * a loopback interface. * In case of jails, check that it is an address of the jail * and if we cannot find, fall back to the 'default' jail address. */ if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { struct sockaddr_in sain; struct in_ifaddr *ia; bzero(&sain, sizeof(struct sockaddr_in)); sain.sin_family = AF_INET; sain.sin_len = sizeof(struct sockaddr_in); sain.sin_addr.s_addr = faddr->s_addr; ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain), inp->inp_socket->so_fibnum)); if (ia == NULL) ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0, inp->inp_socket->so_fibnum)); if (ia == NULL) ia = ifatoia(ifa_ifwithaddr(sintosa(&sain))); if (cred == NULL || !prison_flag(cred, PR_IP4)) { if (ia == NULL) { error = ENETUNREACH; goto done; } laddr->s_addr = ia->ia_addr.sin_addr.s_addr; ifa_free(&ia->ia_ifa); goto done; } /* Jailed. */ if (ia != NULL) { struct ifnet *ifp; ifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); ia = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; sin = (struct sockaddr_in *)sa; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)ifa; break; } } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; IF_ADDR_RUNLOCK(ifp); goto done; } IF_ADDR_RUNLOCK(ifp); } /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); goto done; } done: if (sro.ro_rt != NULL) RTFREE(sro.ro_rt); return (error); } /* * Set up for a connect from a socket to the specified address. * On entry, *laddrp and *lportp should contain the current local * address and port for the PCB; these are updated to the values * that should be placed in inp_laddr and inp_lport to complete * the connect. * * On success, *faddrp and *fportp will be set to the remote address * and port. These are not updated in the error case. * * If the operation fails because the connection already exists, * *oinpp will be set to the PCB of that connection so that the * caller can decide to override it. In all other cases, *oinpp * is set to NULL. */ int in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp, struct inpcb **oinpp, struct ucred *cred) { struct rm_priotracker in_ifa_tracker; struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct in_ifaddr *ia; struct inpcb *oinp; struct in_addr laddr, faddr; u_short lport, fport; int error; /* * Because a global state change doesn't actually occur here, a read * lock is sufficient. */ INP_LOCK_ASSERT(inp); INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo); if (oinpp != NULL) *oinpp = NULL; if (nam->sa_len != sizeof (*sin)) return (EINVAL); if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; if (!TAILQ_EMPTY(&V_in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, * use the primary local address. * If the supplied address is INADDR_BROADCAST, * and the primary interface supports broadcast, * choose the broadcast address for that interface. */ if (faddr.s_addr == INADDR_ANY) { IN_IFADDR_RLOCK(&in_ifa_tracker); faddr = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr; IN_IFADDR_RUNLOCK(&in_ifa_tracker); if (cred != NULL && (error = prison_get_ip4(cred, &faddr)) != 0) return (error); } else if (faddr.s_addr == (u_long)INADDR_BROADCAST) { IN_IFADDR_RLOCK(&in_ifa_tracker); if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST) faddr = satosin(&TAILQ_FIRST( &V_in_ifaddrhead)->ia_broadaddr)->sin_addr; IN_IFADDR_RUNLOCK(&in_ifa_tracker); } } if (laddr.s_addr == INADDR_ANY) { error = in_pcbladdr(inp, &faddr, &laddr, cred); /* * If the destination address is multicast and an outgoing * interface has been set as a multicast option, prefer the * address of that interface as our source address. */ if (IN_MULTICAST(ntohl(faddr.s_addr)) && inp->inp_moptions != NULL) { struct ip_moptions *imo; struct ifnet *ifp; imo = inp->inp_moptions; if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if ((ia->ia_ifp == ifp) && (cred == NULL || prison_check_ip4(cred, &ia->ia_addr.sin_addr) == 0)) break; } if (ia == NULL) error = EADDRNOTAVAIL; else { laddr = ia->ia_addr.sin_addr; error = 0; } IN_IFADDR_RUNLOCK(&in_ifa_tracker); } } if (error) return (error); } oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport, laddr, lport, 0, NULL); if (oinp != NULL) { if (oinpp != NULL) *oinpp = oinp; return (EADDRINUSE); } if (lport == 0) { error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, cred); if (error) return (error); } *laddrp = laddr.s_addr; *lportp = lport; *faddrp = faddr.s_addr; *fportp = fport; return (0); } void in_pcbdisconnect(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; in_pcbrehash(inp); } #endif /* INET */ /* * in_pcbdetach() is responsibe for disassociating a socket from an inpcb. * For most protocols, this will be invoked immediately prior to calling * in_pcbfree(). However, with TCP the inpcb may significantly outlive the * socket, in which case in_pcbfree() is deferred. */ void in_pcbdetach(struct inpcb *inp) { KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); inp->inp_socket->so_pcb = NULL; inp->inp_socket = NULL; } /* * in_pcbref() bumps the reference count on an inpcb in order to maintain * stability of an inpcb pointer despite the inpcb lock being released. This * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, * but where the inpcb lock may already held, or when acquiring a reference * via a pcbgroup. * * in_pcbref() should be used only to provide brief memory stability, and * must always be followed by a call to INP_WLOCK() and in_pcbrele() to * garbage collect the inpcb if it has been in_pcbfree()'d from another * context. Until in_pcbrele() has returned that the inpcb is still valid, * lock and rele are the *only* safe operations that may be performed on the * inpcb. * * While the inpcb will not be freed, releasing the inpcb lock means that the * connection's state may change, so the caller should be careful to * revalidate any cached state on reacquiring the lock. Drop the reference * using in_pcbrele(). */ void in_pcbref(struct inpcb *inp) { KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); refcount_acquire(&inp->inp_refcount); } /* * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we * return a flag indicating whether or not the inpcb remains valid. If it is * valid, we return with the inpcb lock held. * * Notice that, unlike in_pcbref(), the inpcb lock must be held to drop a * reference on an inpcb. Historically more work was done here (actually, in * in_pcbfree_internal()) but has been moved to in_pcbfree() to avoid the * need for the pcbinfo lock in in_pcbrele(). Deferring the free is entirely * about memory stability (and continued use of the write lock). */ int in_pcbrele_rlocked(struct inpcb *inp) { struct inpcbinfo *pcbinfo; KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); INP_RLOCK_ASSERT(inp); if (refcount_release(&inp->inp_refcount) == 0) { /* * If the inpcb has been freed, let the caller know, even if * this isn't the last reference. */ if (inp->inp_flags2 & INP_FREED) { INP_RUNLOCK(inp); return (1); } return (0); } KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); INP_RUNLOCK(inp); pcbinfo = inp->inp_pcbinfo; uma_zfree(pcbinfo->ipi_zone, inp); return (1); } int in_pcbrele_wlocked(struct inpcb *inp) { struct inpcbinfo *pcbinfo; KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); INP_WLOCK_ASSERT(inp); if (refcount_release(&inp->inp_refcount) == 0) { /* * If the inpcb has been freed, let the caller know, even if * this isn't the last reference. */ if (inp->inp_flags2 & INP_FREED) { INP_WUNLOCK(inp); return (1); } return (0); } KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); INP_WUNLOCK(inp); pcbinfo = inp->inp_pcbinfo; uma_zfree(pcbinfo->ipi_zone, inp); return (1); } /* * Temporary wrapper. */ int in_pcbrele(struct inpcb *inp) { return (in_pcbrele_wlocked(inp)); } /* * Unconditionally schedule an inpcb to be freed by decrementing its * reference count, which should occur only after the inpcb has been detached * from its socket. If another thread holds a temporary reference (acquired * using in_pcbref()) then the free is deferred until that reference is * released using in_pcbrele(), but the inpcb is still unlocked. Almost all * work, including removal from global lists, is done in this context, where * the pcbinfo lock is held. */ void in_pcbfree(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); #ifdef INVARIANTS if (pcbinfo == &V_tcbinfo) { INP_INFO_LOCK_ASSERT(pcbinfo); } else { INP_INFO_WLOCK_ASSERT(pcbinfo); } #endif INP_WLOCK_ASSERT(inp); /* XXXRW: Do as much as possible here. */ #ifdef IPSEC if (inp->inp_sp != NULL) ipsec_delete_pcbpolicy(inp); #endif INP_LIST_WLOCK(pcbinfo); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; in_pcbremlists(inp); INP_LIST_WUNLOCK(pcbinfo); #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) { ip6_freepcbopts(inp->in6p_outputopts); if (inp->in6p_moptions != NULL) ip6_freemoptions(inp->in6p_moptions); } #endif if (inp->inp_options) (void)m_free(inp->inp_options); #ifdef INET if (inp->inp_moptions != NULL) inp_freemoptions(inp->inp_moptions); #endif inp->inp_vflag = 0; inp->inp_flags2 |= INP_FREED; crfree(inp->inp_cred); #ifdef MAC mac_inpcb_destroy(inp); #endif if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); } /* * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and * port reservation, and preventing it from being returned by inpcb lookups. * * It is used by TCP to mark an inpcb as unused and avoid future packet * delivery or event notification when a socket remains open but TCP has * closed. This might occur as a result of a shutdown()-initiated TCP close * or a RST on the wire, and allows the port binding to be reused while still * maintaining the invariant that so_pcb always points to a valid inpcb until * in_pcbdetach(). * * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by * in_pcbnotifyall() and in_pcbpurgeif0()? */ void in_pcbdrop(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); /* * XXXRW: Possibly we should protect the setting of INP_DROPPED with * the hash lock...? */ inp->inp_flags |= INP_DROPPED; if (inp->inp_flags & INP_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; INP_HASH_WLOCK(inp->inp_pcbinfo); LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } INP_HASH_WUNLOCK(inp->inp_pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; #ifdef PCBGROUP in_pcbgroup_remove(inp); #endif } } #ifdef INET /* * Common routines to return the socket addresses associated with inpcbs. */ struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr_p) { struct sockaddr_in *sin; sin = malloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = *addr_p; sin->sin_port = port; return (struct sockaddr *)sin; } int in_getsockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_lport; addr = inp->inp_laddr; INP_RUNLOCK(inp); *nam = in_sockaddr(port, &addr); return 0; } int in_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_fport; addr = inp->inp_faddr; INP_RUNLOCK(inp); *nam = in_sockaddr(port, &addr); return 0; } void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno, struct inpcb *(*notify)(struct inpcb *, int)) { struct inpcb *inp, *inp_temp; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) { INP_WUNLOCK(inp); continue; } #endif if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == NULL) { INP_WUNLOCK(inp); continue; } if ((*notify)(inp, errno)) INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } void in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *inp; struct ip_moptions *imo; int i, gap; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(inp); imo = inp->inp_moptions; if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { /* * Unselect the outgoing interface if it is being * detached. */ if (imo->imo_multicast_ifp == ifp) imo->imo_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) { if (imo->imo_membership[i]->inm_ifp == ifp) { in_delmulti(imo->imo_membership[i]); gap++; } else if (gap != 0) imo->imo_membership[i - gap] = imo->imo_membership[i]; } imo->imo_num_memberships -= gap; } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } /* * Lookup a PCB based on the local address and port. Caller must hold the * hash lock. No inpcb locks or references are acquired. */ #define INP_LOOKUP_MAPPED_PCB_COST 3 struct inpcb * in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport, int lookupflags, struct ucred *cred) { struct inpcb *inp; #ifdef INET6 int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST; #else int matchwild = 3; #endif int wildcard; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_lport == lport) { /* * Found? */ if (cred == NULL || prison_equal_ip4(cred->cr_prison, inp->inp_cred->cr_prison)) return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip4(inp->inp_cred->cr_prison, cred->cr_prison)) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; /* * We never select the PCB that has * INP_IPV6 flag and is bound to :: if * we have another PCB which is bound * to 0.0.0.0. If a PCB has the * INP_IPV6 flag, then we set its cost * higher than IPv4 only PCBs. * * Note that the case only happens * when a socket is bound to ::, under * the condition that the use of the * mapped address is allowed. */ if ((inp->inp_vflag & INP_IPV6) != 0) wildcard += INP_LOOKUP_MAPPED_PCB_COST; #endif if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (laddr.s_addr == INADDR_ANY) wildcard++; else if (inp->inp_laddr.s_addr != laddr.s_addr) continue; } else { if (laddr.s_addr != INADDR_ANY) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) break; } } } return (match); } } #undef INP_LOOKUP_MAPPED_PCB_COST #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. */ static struct inpcb * in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, struct in_addr faddr, u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ tmpinp = NULL; INP_GROUP_LOCK(pcbgroup); head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP4)) goto found; if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) { inp = tmpinp; goto found; } #ifdef RSS /* * For incoming connections, we may wish to do a wildcard * match for an RSS-local socket. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; #ifdef INET6 struct inpcb *local_wild_mapped = NULL; #endif struct inpcb *jail_wild = NULL; struct inpcbhead *head; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport) continue; injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, &laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (inp->inp_laddr.s_addr == laddr.s_addr) { if (injail) goto found; else local_exact = inp; } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 /* XXX inp locking, NULL check */ if (inp->inp_vflag & INP_IPV6PROTO) local_wild_mapped = inp; else #endif if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; #ifdef INET6 if (inp == NULL) inp = local_wild_mapped; #endif if (inp != NULL) goto found; } #endif /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; #ifdef INET6 struct inpcb *local_wild_mapped = NULL; #endif struct inpcb *jail_wild = NULL; struct inpcbhead *head; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_wildmask)]; LIST_FOREACH(inp, head, inp_pcbgroup_wild) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport) continue; injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, &laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (inp->inp_laddr.s_addr == laddr.s_addr) { if (injail) goto found; else local_exact = inp; } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 /* XXX inp locking, NULL check */ if (inp->inp_vflag & INP_IPV6PROTO) local_wild_mapped = inp; else #endif if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; #ifdef INET6 if (inp == NULL) inp = local_wild_mapped; #endif if (inp != NULL) goto found; } /* if (lookupflags & INPLOOKUP_WILDCARD) */ INP_GROUP_UNLOCK(pcbgroup); return (NULL); found: in_pcbref(inp); INP_GROUP_UNLOCK(pcbgroup); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking bug", __func__); return (inp); } #endif /* PCBGROUP */ /* * Lookup PCB in hash list, using pcbinfo tables. This variation assumes * that the caller has locked the hash list, and will not perform any further * locking or reference operations on either the hash list or the connection. */ static struct inpcb * in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP4)) return (inp); if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) return (tmpinp); /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; #ifdef INET6 struct inpcb *local_wild_mapped = NULL; #endif struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport) continue; injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, &laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (inp->inp_laddr.s_addr == laddr.s_addr) { if (injail) return (inp); else local_exact = inp; } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 /* XXX inp locking, NULL check */ if (inp->inp_vflag & INP_IPV6PROTO) local_wild_mapped = inp; else #endif if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ if (jail_wild != NULL) return (jail_wild); if (local_exact != NULL) return (local_exact); if (local_wild != NULL) return (local_wild); #ifdef INET6 if (local_wild_mapped != NULL) return (local_wild_mapped); #endif } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ return (NULL); } /* * Lookup PCB in hash list, using pcbinfo tables. This variation locks the * hash list lock, and will return the inpcb locked (i.e., requires * INPLOOKUP_LOCKPCB). */ static struct inpcb * in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp) { struct inpcb *inp; INP_HASH_RLOCK(pcbinfo); inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); if (inp != NULL) { in_pcbref(inp); INP_HASH_RUNLOCK(pcbinfo); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking bug", __func__); } else INP_HASH_RUNLOCK(pcbinfo); return (inp); } /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. * * Possibly more of this logic should be in in_pcbgroup.c. */ struct inpcb * in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp) { #if defined(PCBGROUP) && !defined(RSS) struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); /* * When not using RSS, use connection groups in preference to the * reservation table when looking up 4-tuples. When using RSS, just * use the reservation table, due to the cost of the Toeplitz hash * in software. * * XXXRW: This policy belongs in the pcbgroup code, as in principle * we could be doing RSS with a non-Toeplitz hash that is affordable * in software. */ #if defined(PCBGROUP) && !defined(RSS) if (in_pcbgroup_enabled(pcbinfo)) { pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); } #endif return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } struct inpcb * in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp, struct mbuf *m) { #ifdef PCBGROUP struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP /* * If we can use a hardware-generated hash to look up the connection * group, use that connection group to find the inpcb. Otherwise * fall back on a software hash -- or the reservation table if we're * using RSS. * * XXXRW: As above, that policy belongs in the pcbgroup code. */ if (in_pcbgroup_enabled(pcbinfo) && !(M_HASHTYPE_TEST(m, M_HASHTYPE_NONE))) { pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), m->m_pkthdr.flowid); if (pcbgroup != NULL) return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #ifndef RSS pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #endif } #endif return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } #endif /* INET */ /* * Insert PCB onto various hash lists. */ static int in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbport *phd; u_int32_t hashkey_faddr; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, ("in_pcbinshash: INP_INHASHLIST")); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr); else #endif hashkey_faddr = inp->inp_faddr.s_addr; pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; pcbporthash = &pcbinfo->ipi_porthashbase[ INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; /* * Go through port list and look for a head for this lport. */ LIST_FOREACH(phd, pcbporthash, phd_hash) { if (phd->phd_port == inp->inp_lport) break; } /* * If none exists, malloc one and tack it on. */ if (phd == NULL) { phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT); if (phd == NULL) { return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); inp->inp_flags |= INP_INHASHLIST; #ifdef PCBGROUP if (do_pcbgroup_update) in_pcbgroup_update(inp); #endif return (0); } /* * For now, there are two public interfaces to insert an inpcb into the hash * lists -- one that does update pcbgroups, and one that doesn't. The latter * is used only in the TCP syncache, where in_pcbinshash is called before the * full 4-tuple is set for the inpcb, and we don't want to install in the * pcbgroup until later. * * XXXRW: This seems like a misfeature. in_pcbinshash should always update * connection groups, and partially initialised inpcbs should not be exposed * to either reservation hash tables or pcbgroups. */ int in_pcbinshash(struct inpcb *inp) { return (in_pcbinshash_internal(inp, 1)); } int in_pcbinshash_nopcbgroup(struct inpcb *inp) { return (in_pcbinshash_internal(inp, 0)); } /* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the * hashed port list would have to be updated as well), so the lport must * not change after in_pcbinshash() has been called. */ void in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *head; u_int32_t hashkey_faddr; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); KASSERT(inp->inp_flags & INP_INHASHLIST, ("in_pcbrehash: !INP_INHASHLIST")); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr); else #endif hashkey_faddr = inp->inp_faddr.s_addr; head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); #ifdef PCBGROUP if (m != NULL) in_pcbgroup_update_mbuf(inp, m); else in_pcbgroup_update(inp); #endif } void in_pcbrehash(struct inpcb *inp) { in_pcbrehash_mbuf(inp, NULL); } /* * Remove PCB from various lists. */ static void in_pcbremlists(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; #ifdef INVARIANTS if (pcbinfo == &V_tcbinfo) { INP_INFO_RLOCK_ASSERT(pcbinfo); } else { INP_INFO_WLOCK_ASSERT(pcbinfo); } #endif INP_WLOCK_ASSERT(inp); INP_LIST_WLOCK_ASSERT(pcbinfo); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; if (inp->inp_flags & INP_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; INP_HASH_WLOCK(pcbinfo); LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } INP_HASH_WUNLOCK(pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; } LIST_REMOVE(inp, inp_list); pcbinfo->ipi_count--; #ifdef PCBGROUP in_pcbgroup_remove(inp); #endif } /* * A set label operation has occurred at the socket layer, propagate the * label change into the in_pcb for the socket. */ void in_pcbsosetlabel(struct socket *so) { #ifdef MAC struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL")); INP_WLOCK(inp); SOCK_LOCK(so); mac_inpcb_sosetlabel(so, inp); SOCK_UNLOCK(so); INP_WUNLOCK(inp); #endif } /* * ipport_tick runs once per second, determining if random port allocation * should be continued. If more than ipport_randomcps ports have been * allocated in the last second, then we return to sequential port * allocation. We return to random allocation only once we drop below * ipport_randomcps for at least ipport_randomtime seconds. */ static void ipport_tick(void *xtp) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */ if (V_ipport_tcpallocs <= V_ipport_tcplastcount + V_ipport_randomcps) { if (V_ipport_stoprandom > 0) V_ipport_stoprandom--; } else V_ipport_stoprandom = V_ipport_randomtime; V_ipport_tcplastcount = V_ipport_tcpallocs; CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL); } static void ip_fini(void *xtp) { callout_stop(&ipport_tick_callout); } /* * The ipport_callout should start running at about the time we attach the * inet or inet6 domains. */ static void ipport_tick_init(const void *unused __unused) { /* Start ipport_tick. */ callout_init(&ipport_tick_callout, 1); callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL); EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, SHUTDOWN_PRI_DEFAULT); } SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipport_tick_init, NULL); void inp_wlock(struct inpcb *inp) { INP_WLOCK(inp); } void inp_wunlock(struct inpcb *inp) { INP_WUNLOCK(inp); } void inp_rlock(struct inpcb *inp) { INP_RLOCK(inp); } void inp_runlock(struct inpcb *inp) { INP_RUNLOCK(inp); } #ifdef INVARIANTS void inp_lock_assert(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); } void inp_unlock_assert(struct inpcb *inp) { INP_UNLOCK_ASSERT(inp); } #endif void inp_apply_all(void (*func)(struct inpcb *, void *), void *arg) { struct inpcb *inp; INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { INP_WLOCK(inp); func(inp, arg); INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); } struct socket * inp_inpcbtosocket(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); return (inp->inp_socket); } struct tcpcb * inp_inpcbtotcpcb(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); return ((struct tcpcb *)inp->inp_ppcb); } int inp_ip_tos_get(const struct inpcb *inp) { return (inp->inp_ip_tos); } void inp_ip_tos_set(struct inpcb *inp, int val) { inp->inp_ip_tos = val; } void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, uint32_t *faddr, uint16_t *fp) { INP_LOCK_ASSERT(inp); *laddr = inp->inp_laddr.s_addr; *faddr = inp->inp_faddr.s_addr; *lp = inp->inp_lport; *fp = inp->inp_fport; } struct inpcb * so_sotoinpcb(struct socket *so) { return (sotoinpcb(so)); } struct tcpcb * so_sototcpcb(struct socket *so) { return (sototcpcb(so)); } #ifdef DDB static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent) { char faddr_str[48], laddr_str[48]; db_print_indent(indent); db_printf("%s at %p\n", name, inc); indent += 2; #ifdef INET6 if (inc->inc_flags & INC_ISIPV6) { /* IPv6. */ ip6_sprintf(laddr_str, &inc->inc6_laddr); ip6_sprintf(faddr_str, &inc->inc6_faddr); } else #endif { /* IPv4. */ inet_ntoa_r(inc->inc_laddr, laddr_str); inet_ntoa_r(inc->inc_faddr, faddr_str); } db_print_indent(indent); db_printf("inc_laddr %s inc_lport %u\n", laddr_str, ntohs(inc->inc_lport)); db_print_indent(indent); db_printf("inc_faddr %s inc_fport %u\n", faddr_str, ntohs(inc->inc_fport)); } static void db_print_inpflags(int inp_flags) { int comma; comma = 0; if (inp_flags & INP_RECVOPTS) { db_printf("%sINP_RECVOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVRETOPTS) { db_printf("%sINP_RECVRETOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVDSTADDR) { db_printf("%sINP_RECVDSTADDR", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_HDRINCL) { db_printf("%sINP_HDRINCL", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_HIGHPORT) { db_printf("%sINP_HIGHPORT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_LOWPORT) { db_printf("%sINP_LOWPORT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_ANONPORT) { db_printf("%sINP_ANONPORT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVIF) { db_printf("%sINP_RECVIF", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_MTUDISC) { db_printf("%sINP_MTUDISC", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVTTL) { db_printf("%sINP_RECVTTL", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_DONTFRAG) { db_printf("%sINP_DONTFRAG", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVTOS) { db_printf("%sINP_RECVTOS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_IPV6_V6ONLY) { db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_PKTINFO) { db_printf("%sIN6P_PKTINFO", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_HOPLIMIT) { db_printf("%sIN6P_HOPLIMIT", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_HOPOPTS) { db_printf("%sIN6P_HOPOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_DSTOPTS) { db_printf("%sIN6P_DSTOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_RTHDR) { db_printf("%sIN6P_RTHDR", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_RTHDRDSTOPTS) { db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_TCLASS) { db_printf("%sIN6P_TCLASS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_AUTOFLOWLABEL) { db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_TIMEWAIT) { db_printf("%sINP_TIMEWAIT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_ONESBCAST) { db_printf("%sINP_ONESBCAST", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_DROPPED) { db_printf("%sINP_DROPPED", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_SOCKREF) { db_printf("%sINP_SOCKREF", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_RFC2292) { db_printf("%sIN6P_RFC2292", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_MTU) { db_printf("IN6P_MTU%s", comma ? ", " : ""); comma = 1; } } static void db_print_inpvflag(u_char inp_vflag) { int comma; comma = 0; if (inp_vflag & INP_IPV4) { db_printf("%sINP_IPV4", comma ? ", " : ""); comma = 1; } if (inp_vflag & INP_IPV6) { db_printf("%sINP_IPV6", comma ? ", " : ""); comma = 1; } if (inp_vflag & INP_IPV6PROTO) { db_printf("%sINP_IPV6PROTO", comma ? ", " : ""); comma = 1; } } static void db_print_inpcb(struct inpcb *inp, const char *name, int indent) { db_print_indent(indent); db_printf("%s at %p\n", name, inp); indent += 2; db_print_indent(indent); db_printf("inp_flow: 0x%x\n", inp->inp_flow); db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent); db_print_indent(indent); db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n", inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket); db_print_indent(indent); db_printf("inp_label: %p inp_flags: 0x%x (", inp->inp_label, inp->inp_flags); db_print_inpflags(inp->inp_flags); db_printf(")\n"); db_print_indent(indent); db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp, inp->inp_vflag); db_print_inpvflag(inp->inp_vflag); db_printf(")\n"); db_print_indent(indent); db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n", inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl); db_print_indent(indent); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { db_printf("in6p_options: %p in6p_outputopts: %p " "in6p_moptions: %p\n", inp->in6p_options, inp->in6p_outputopts, inp->in6p_moptions); db_printf("in6p_icmp6filt: %p in6p_cksum %d " "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum, inp->in6p_hops); } else #endif { db_printf("inp_ip_tos: %d inp_ip_options: %p " "inp_ip_moptions: %p\n", inp->inp_ip_tos, inp->inp_options, inp->inp_moptions); } db_print_indent(indent); db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd, (uintmax_t)inp->inp_gencnt); } DB_SHOW_COMMAND(inpcb, db_show_inpcb) { struct inpcb *inp; if (!have_addr) { db_printf("usage: show inpcb \n"); return; } inp = (struct inpcb *)addr; db_print_inpcb(inp, "inpcb", 0); } #endif /* DDB */ Index: projects/clang380-import/sys/netinet/ip_encap.c =================================================================== --- projects/clang380-import/sys/netinet/ip_encap.c (revision 293686) +++ projects/clang380-import/sys/netinet/ip_encap.c (revision 293687) @@ -1,477 +1,479 @@ /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * My grandfather said that there's a devil inside tunnelling technology... * * We have surprisingly many protocols that want packets with IP protocol * #4 or #41. Here's a list of protocols that want protocol #41: * RFC1933 configured tunnel * RFC1933 automatic tunnel * RFC2401 IPsec tunnel * RFC2473 IPv6 generic packet tunnelling * RFC2529 6over4 tunnel * mobile-ip6 (uses RFC2473) * RFC3056 6to4 tunnel * isatap tunnel * Here's a list of protocol that want protocol #4: * RFC1853 IPv4-in-IPv4 tunnelling * RFC2003 IPv4 encapsulation within IPv4 * RFC2344 reverse tunnelling for mobile-ip4 * RFC2401 IPsec tunnel * Well, what can I say. They impose different en/decapsulation mechanism * from each other, so they need separate protocol handler. The only one * we can easily determine by protocol # is IPsec, which always has * AH/ESP/IPComp header right after outer IP header. * * So, clearly good old protosw does not work for protocol #4 and #41. * The code will let you match protocol via src/dst address pair. */ /* XXX is M_NETADDR correct? */ #include __FBSDID("$FreeBSD$"); #include "opt_mrouting.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include +#include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #include #include #include static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure"); static void encap_add(struct encaptab *); static int mask_match(const struct encaptab *, const struct sockaddr *, const struct sockaddr *); static void encap_fillarg(struct mbuf *, void *); /* * All global variables in ip_encap.c are locked using encapmtx. */ static struct mtx encapmtx; MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF); static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab); /* * We currently keey encap_init() for source code compatibility reasons -- * it's referenced by KAME pieces in netinet6. */ void encap_init(void) { } #ifdef INET int encap4_input(struct mbuf **mp, int *offp, int proto) { struct ip *ip; struct mbuf *m; struct sockaddr_in s, d; const struct protosw *psw; struct encaptab *ep, *match; void *arg; int matchprio, off, prio; m = *mp; off = *offp; ip = mtod(m, struct ip *); bzero(&s, sizeof(s)); s.sin_family = AF_INET; s.sin_len = sizeof(struct sockaddr_in); s.sin_addr = ip->ip_src; bzero(&d, sizeof(d)); d.sin_family = AF_INET; d.sin_len = sizeof(struct sockaddr_in); d.sin_addr = ip->ip_dst; arg = NULL; psw = NULL; match = NULL; matchprio = 0; mtx_lock(&encapmtx); LIST_FOREACH(ep, &encaptab, chain) { if (ep->af != AF_INET) continue; if (ep->proto >= 0 && ep->proto != proto) continue; if (ep->func) prio = (*ep->func)(m, off, proto, ep->arg); else { /* * it's inbound traffic, we need to match in reverse * order */ prio = mask_match(ep, (struct sockaddr *)&d, (struct sockaddr *)&s); } /* * We prioritize the matches by using bit length of the * matches. mask_match() and user-supplied matching function * should return the bit length of the matches (for example, * if both src/dst are matched for IPv4, 64 should be returned). * 0 or negative return value means "it did not match". * * The question is, since we have two "mask" portion, we * cannot really define total order between entries. * For example, which of these should be preferred? * mask_match() returns 48 (32 + 16) for both of them. * src=3ffe::/16, dst=3ffe:501::/32 * src=3ffe:501::/32, dst=3ffe::/16 * * We need to loop through all the possible candidates * to get the best match - the search takes O(n) for * n attachments (i.e. interfaces). */ if (prio <= 0) continue; if (prio > matchprio) { matchprio = prio; match = ep; } } if (match != NULL) { psw = match->psw; arg = match->arg; } mtx_unlock(&encapmtx); if (match != NULL) { /* found a match, "match" has the best one */ if (psw != NULL && psw->pr_input != NULL) { encap_fillarg(m, arg); (*psw->pr_input)(mp, offp, proto); } else m_freem(m); return (IPPROTO_DONE); } /* last resort: inject to raw socket */ return (rip_input(mp, offp, proto)); } #endif #ifdef INET6 int encap6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6; struct sockaddr_in6 s, d; const struct protosw *psw; struct encaptab *ep, *match; void *arg; int prio, matchprio; ip6 = mtod(m, struct ip6_hdr *); bzero(&s, sizeof(s)); s.sin6_family = AF_INET6; s.sin6_len = sizeof(struct sockaddr_in6); s.sin6_addr = ip6->ip6_src; bzero(&d, sizeof(d)); d.sin6_family = AF_INET6; d.sin6_len = sizeof(struct sockaddr_in6); d.sin6_addr = ip6->ip6_dst; arg = NULL; psw = NULL; match = NULL; matchprio = 0; mtx_lock(&encapmtx); LIST_FOREACH(ep, &encaptab, chain) { if (ep->af != AF_INET6) continue; if (ep->proto >= 0 && ep->proto != proto) continue; if (ep->func) prio = (*ep->func)(m, *offp, proto, ep->arg); else { /* * it's inbound traffic, we need to match in reverse * order */ prio = mask_match(ep, (struct sockaddr *)&d, (struct sockaddr *)&s); } /* see encap4_input() for issues here */ if (prio <= 0) continue; if (prio > matchprio) { matchprio = prio; match = ep; } } if (match != NULL) { psw = match->psw; arg = match->arg; } mtx_unlock(&encapmtx); if (match != NULL) { /* found a match */ if (psw != NULL && psw->pr_input != NULL) { encap_fillarg(m, arg); return (*psw->pr_input)(mp, offp, proto); } else { m_freem(m); return (IPPROTO_DONE); } } /* last resort: inject to raw socket */ return rip6_input(mp, offp, proto); } #endif /*lint -sem(encap_add, custodial(1)) */ static void encap_add(struct encaptab *ep) { mtx_assert(&encapmtx, MA_OWNED); LIST_INSERT_HEAD(&encaptab, ep, chain); } /* * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. * length of mask (sm and dm) is assumed to be same as sp/dp. * Return value will be necessary as input (cookie) for encap_detach(). */ const struct encaptab * encap_attach(int af, int proto, const struct sockaddr *sp, const struct sockaddr *sm, const struct sockaddr *dp, const struct sockaddr *dm, const struct protosw *psw, void *arg) { struct encaptab *ep; /* sanity check on args */ if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) return (NULL); if (sp->sa_len != dp->sa_len) return (NULL); if (af != sp->sa_family || af != dp->sa_family) return (NULL); /* check if anyone have already attached with exactly same config */ mtx_lock(&encapmtx); LIST_FOREACH(ep, &encaptab, chain) { if (ep->af != af) continue; if (ep->proto != proto) continue; if (ep->src.ss_len != sp->sa_len || bcmp(&ep->src, sp, sp->sa_len) != 0 || bcmp(&ep->srcmask, sm, sp->sa_len) != 0) continue; if (ep->dst.ss_len != dp->sa_len || bcmp(&ep->dst, dp, dp->sa_len) != 0 || bcmp(&ep->dstmask, dm, dp->sa_len) != 0) continue; mtx_unlock(&encapmtx); return (NULL); } ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ if (ep == NULL) { mtx_unlock(&encapmtx); return (NULL); } bzero(ep, sizeof(*ep)); ep->af = af; ep->proto = proto; bcopy(sp, &ep->src, sp->sa_len); bcopy(sm, &ep->srcmask, sp->sa_len); bcopy(dp, &ep->dst, dp->sa_len); bcopy(dm, &ep->dstmask, dp->sa_len); ep->psw = psw; ep->arg = arg; encap_add(ep); mtx_unlock(&encapmtx); return (ep); } const struct encaptab * encap_attach_func(int af, int proto, int (*func)(const struct mbuf *, int, int, void *), const struct protosw *psw, void *arg) { struct encaptab *ep; /* sanity check on args */ if (!func) return (NULL); ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ if (ep == NULL) return (NULL); bzero(ep, sizeof(*ep)); ep->af = af; ep->proto = proto; ep->func = func; ep->psw = psw; ep->arg = arg; mtx_lock(&encapmtx); encap_add(ep); mtx_unlock(&encapmtx); return (ep); } int encap_detach(const struct encaptab *cookie) { const struct encaptab *ep = cookie; struct encaptab *p; mtx_lock(&encapmtx); LIST_FOREACH(p, &encaptab, chain) { if (p == ep) { LIST_REMOVE(p, chain); mtx_unlock(&encapmtx); free(p, M_NETADDR); /*XXX*/ return 0; } } mtx_unlock(&encapmtx); return EINVAL; } static int mask_match(const struct encaptab *ep, const struct sockaddr *sp, const struct sockaddr *dp) { struct sockaddr_storage s; struct sockaddr_storage d; int i; const u_int8_t *p, *q; u_int8_t *r; int matchlen; if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) return 0; if (sp->sa_family != ep->af || dp->sa_family != ep->af) return 0; if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) return 0; matchlen = 0; p = (const u_int8_t *)sp; q = (const u_int8_t *)&ep->srcmask; r = (u_int8_t *)&s; for (i = 0 ; i < sp->sa_len; i++) { r[i] = p[i] & q[i]; /* XXX estimate */ matchlen += (q[i] ? 8 : 0); } p = (const u_int8_t *)dp; q = (const u_int8_t *)&ep->dstmask; r = (u_int8_t *)&d; for (i = 0 ; i < dp->sa_len; i++) { r[i] = p[i] & q[i]; /* XXX rough estimate */ matchlen += (q[i] ? 8 : 0); } /* need to overwrite len/family portion as we don't compare them */ s.ss_len = sp->sa_len; s.ss_family = sp->sa_family; d.ss_len = dp->sa_len; d.ss_family = dp->sa_family; if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { return matchlen; } else return 0; } static void encap_fillarg(struct mbuf *m, void *arg) { struct m_tag *tag; if (arg != NULL) { tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT); if (tag != NULL) { *(void**)(tag+1) = arg; m_tag_prepend(m, tag); } } } void * encap_getarg(struct mbuf *m) { void *p = NULL; struct m_tag *tag; tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL); if (tag) { p = *(void**)(tag+1); m_tag_delete(m, tag); } return p; } Index: projects/clang380-import/sys/netinet/ip_mroute.c =================================================================== --- projects/clang380-import/sys/netinet/ip_mroute.c (revision 293686) +++ projects/clang380-import/sys/netinet/ip_mroute.c (revision 293687) @@ -1,2948 +1,2949 @@ /*- * Copyright (c) 1989 Stephen Deering * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 */ /* * IP multicast forwarding procedures * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. * Modified by Mark J. Steiglitz, Stanford, May, 1991 * Modified by Van Jacobson, LBL, January 1993 * Modified by Ajit Thyagarajan, PARC, August 1993 * Modified by Bill Fenner, PARC, April 1995 * Modified by Ahmed Helmy, SGI, June 1996 * Modified by George Edmond Eddy (Rusty), ISI, February 1998 * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 * Modified by Hitoshi Asaeda, WIDE, August 2000 * Modified by Pavlin Radoslavov, ICSI, October 2002 * * MROUTING Revision: 3.5 * and PIM-SMv2 and PIM-DM support, advanced API support, * bandwidth metering and signaling */ /* * TODO: Prefix functions with ipmf_. * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol * domain attachment (if_afdata) so we can track consumers of that service. * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT, * move it to socket options. * TODO: Cleanup LSRR removal further. * TODO: Push RSVP stubs into raw_ip.c. * TODO: Use bitstring.h for vif set. * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded. * TODO: Sync ip6_mroute.c with this file. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_mrouting.h" #define _PIM_VT 1 #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_IPMF #define KTR_IPMF KTR_INET #endif #define VIFI_INVALID ((vifi_t) -1) static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */ #define V_last_tv_sec VNET(last_tv_sec) static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache"); /* * Locking. We use two locks: one for the virtual interface table and * one for the forwarding table. These locks may be nested in which case * the VIF lock must always be taken first. Note that each lock is used * to cover not only the specific data structure but also related data * structures. */ static struct mtx mrouter_mtx; #define MROUTER_LOCK() mtx_lock(&mrouter_mtx) #define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx) #define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED) #define MROUTER_LOCK_INIT() \ mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF) #define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx) static int ip_mrouter_cnt; /* # of vnets with active mrouters */ static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */ static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat); VNET_PCPUSTAT_SYSINIT(mrtstat); VNET_PCPUSTAT_SYSUNINIT(mrtstat); SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat, mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, " "netinet/ip_mroute.h)"); static VNET_DEFINE(u_long, mfchash); #define V_mfchash VNET(mfchash) #define MFCHASH(a, g) \ ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash) #define MFCHASHSIZE 256 static u_long mfchashsize; /* Hash size */ static VNET_DEFINE(u_char *, nexpire); /* 0..mfchashsize-1 */ #define V_nexpire VNET(nexpire) static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl); #define V_mfchashtbl VNET(mfchashtbl) static struct mtx mfc_mtx; #define MFC_LOCK() mtx_lock(&mfc_mtx) #define MFC_UNLOCK() mtx_unlock(&mfc_mtx) #define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED) #define MFC_LOCK_INIT() \ mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF) #define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) static VNET_DEFINE(vifi_t, numvifs); #define V_numvifs VNET(numvifs) static VNET_DEFINE(struct vif, viftable[MAXVIFS]); #define V_viftable VNET(viftable) SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]", "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); static struct mtx vif_mtx; #define VIF_LOCK() mtx_lock(&vif_mtx) #define VIF_UNLOCK() mtx_unlock(&vif_mtx) #define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED) #define VIF_LOCK_INIT() \ mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF) #define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx) static eventhandler_tag if_detach_event_tag = NULL; static VNET_DEFINE(struct callout, expire_upcalls_ch); #define V_expire_upcalls_ch VNET(expire_upcalls_ch) #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ #define UPCALL_EXPIRE 6 /* number of timeouts */ /* * Bandwidth meter variables and constants */ static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters"); /* * Pending timeouts are stored in a hash table, the key being the * expiration time. Periodically, the entries are analysed and processed. */ #define BW_METER_BUCKETS 1024 static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]); #define V_bw_meter_timers VNET(bw_meter_timers) static VNET_DEFINE(struct callout, bw_meter_ch); #define V_bw_meter_ch VNET(bw_meter_ch) #define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ /* * Pending upcalls are stored in a vector which is flushed when * full, or periodically */ static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]); #define V_bw_upcalls VNET(bw_upcalls) static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */ #define V_bw_upcalls_n VNET(bw_upcalls_n) static VNET_DEFINE(struct callout, bw_upcalls_ch); #define V_bw_upcalls_ch VNET(bw_upcalls_ch) #define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat); VNET_PCPUSTAT_SYSINIT(pimstat); VNET_PCPUSTAT_SYSUNINIT(pimstat); SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat, pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)"); static u_long pim_squelch_wholepkt = 0; SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW, &pim_squelch_wholepkt, 0, "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified"); extern struct domain inetdomain; static const struct protosw in_pim_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_PIM, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = pim_input, .pr_output = rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; static const struct encaptab *pim_encap_cookie; static int pim_encapcheck(const struct mbuf *, int, int, void *); /* * Note: the PIM Register encapsulation adds the following in front of a * data packet: * * struct pim_encap_hdr { * struct ip ip; * struct pim_encap_pimhdr pim; * } * */ struct pim_encap_pimhdr { struct pim pim; uint32_t flags; }; #define PIM_ENCAP_TTL 64 static struct ip pim_encap_iphdr = { #if BYTE_ORDER == LITTLE_ENDIAN sizeof(struct ip) >> 2, IPVERSION, #else IPVERSION, sizeof(struct ip) >> 2, #endif 0, /* tos */ sizeof(struct ip), /* total length */ 0, /* id */ 0, /* frag offset */ PIM_ENCAP_TTL, IPPROTO_PIM, 0, /* checksum */ }; static struct pim_encap_pimhdr pim_encap_pimhdr = { { PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 0, /* reserved */ 0, /* checksum */ }, 0 /* flags */ }; static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID; #define V_reg_vif_num VNET(reg_vif_num) static VNET_DEFINE(struct ifnet, multicast_register_if); #define V_multicast_register_if VNET(multicast_register_if) /* * Private variables. */ static u_long X_ip_mcast_src(int); static int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); static int X_ip_mrouter_done(void); static int X_ip_mrouter_get(struct socket *, struct sockopt *); static int X_ip_mrouter_set(struct socket *, struct sockopt *); static int X_legal_vif_num(int); static int X_mrt_ioctl(u_long, caddr_t, int); static int add_bw_upcall(struct bw_upcall *); static int add_mfc(struct mfcctl2 *); static int add_vif(struct vifctl *); static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *); static void bw_meter_process(void); static void bw_meter_receive_packet(struct bw_meter *, int, struct timeval *); static void bw_upcalls_send(void); static int del_bw_upcall(struct bw_upcall *); static int del_mfc(struct mfcctl2 *); static int del_vif(vifi_t); static int del_vif_locked(vifi_t); static void expire_bw_meter_process(void *); static void expire_bw_upcalls_send(void *); static void expire_mfc(struct mfc *); static void expire_upcalls(void *); static void free_bw_list(struct bw_meter *); static int get_sg_cnt(struct sioc_sg_req *); static int get_vif_cnt(struct sioc_vif_req *); static void if_detached_event(void *, struct ifnet *); static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); static int ip_mrouter_init(struct socket *, int); static __inline struct mfc * mfc_find(struct in_addr *, struct in_addr *); static void phyint_send(struct ip *, struct vif *, struct mbuf *); static struct mbuf * pim_register_prepare(struct ip *, struct mbuf *); static int pim_register_send(struct ip *, struct vif *, struct mbuf *, struct mfc *); static int pim_register_send_rp(struct ip *, struct vif *, struct mbuf *, struct mfc *); static int pim_register_send_upcall(struct ip *, struct vif *, struct mbuf *, struct mfc *); static void schedule_bw_meter(struct bw_meter *, struct timeval *); static void send_packet(struct vif *, struct mbuf *); static int set_api_config(uint32_t *); static int set_assert(int); static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); static void unschedule_bw_meter(struct bw_meter *); /* * Kernel multicast forwarding API capabilities and setup. * If more API capabilities are added to the kernel, they should be * recorded in `mrt_api_support'. */ #define MRT_API_VERSION 0x0305 static const int mrt_api_version = MRT_API_VERSION; static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | MRT_MFC_FLAGS_BORDER_VIF | MRT_MFC_RP | MRT_MFC_BW_UPCALL); static VNET_DEFINE(uint32_t, mrt_api_config); #define V_mrt_api_config VNET(mrt_api_config) static VNET_DEFINE(int, pim_assert_enabled); #define V_pim_assert_enabled VNET(pim_assert_enabled) static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */ /* * Find a route for a given origin IP address and multicast group address. * Statistics must be updated by the caller. */ static __inline struct mfc * mfc_find(struct in_addr *o, struct in_addr *g) { struct mfc *rt; MFC_LOCK_ASSERT(); LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) { if (in_hosteq(rt->mfc_origin, *o) && in_hosteq(rt->mfc_mcastgrp, *g) && TAILQ_EMPTY(&rt->mfc_stall)) break; } return (rt); } /* * Handle MRT setsockopt commands to modify the multicast forwarding tables. */ static int X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) { int error, optval; vifi_t vifi; struct vifctl vifc; struct mfcctl2 mfc; struct bw_upcall bw_upcall; uint32_t i; if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT) return EPERM; error = 0; switch (sopt->sopt_name) { case MRT_INIT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; error = ip_mrouter_init(so, optval); break; case MRT_DONE: error = ip_mrouter_done(); break; case MRT_ADD_VIF: error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); if (error) break; error = add_vif(&vifc); break; case MRT_DEL_VIF: error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); if (error) break; error = del_vif(vifi); break; case MRT_ADD_MFC: case MRT_DEL_MFC: /* * select data size depending on API version. */ if (sopt->sopt_name == MRT_ADD_MFC && V_mrt_api_config & MRT_API_FLAGS_ALL) { error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2), sizeof(struct mfcctl2)); } else { error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl), sizeof(struct mfcctl)); bzero((caddr_t)&mfc + sizeof(struct mfcctl), sizeof(mfc) - sizeof(struct mfcctl)); } if (error) break; if (sopt->sopt_name == MRT_ADD_MFC) error = add_mfc(&mfc); else error = del_mfc(&mfc); break; case MRT_ASSERT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; set_assert(optval); break; case MRT_API_CONFIG: error = sooptcopyin(sopt, &i, sizeof i, sizeof i); if (!error) error = set_api_config(&i); if (!error) error = sooptcopyout(sopt, &i, sizeof i); break; case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall, sizeof bw_upcall); if (error) break; if (sopt->sopt_name == MRT_ADD_BW_UPCALL) error = add_bw_upcall(&bw_upcall); else error = del_bw_upcall(&bw_upcall); break; default: error = EOPNOTSUPP; break; } return error; } /* * Handle MRT getsockopt commands */ static int X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) { int error; switch (sopt->sopt_name) { case MRT_VERSION: error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version); break; case MRT_ASSERT: error = sooptcopyout(sopt, &V_pim_assert_enabled, sizeof V_pim_assert_enabled); break; case MRT_API_SUPPORT: error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support); break; case MRT_API_CONFIG: error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config); break; default: error = EOPNOTSUPP; break; } return error; } /* * Handle ioctl commands to obtain information from the cache */ static int X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused) { int error = 0; /* * Currently the only function calling this ioctl routine is rtioctl(). * Typically, only root can create the raw socket in order to execute * this ioctl method, however the request might be coming from a prison */ error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error) return (error); switch (cmd) { case (SIOCGETVIFCNT): error = get_vif_cnt((struct sioc_vif_req *)data); break; case (SIOCGETSGCNT): error = get_sg_cnt((struct sioc_sg_req *)data); break; default: error = EINVAL; break; } return error; } /* * returns the packet, byte, rpf-failure count for the source group provided */ static int get_sg_cnt(struct sioc_sg_req *req) { struct mfc *rt; MFC_LOCK(); rt = mfc_find(&req->src, &req->grp); if (rt == NULL) { MFC_UNLOCK(); req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; return EADDRNOTAVAIL; } req->pktcnt = rt->mfc_pkt_cnt; req->bytecnt = rt->mfc_byte_cnt; req->wrong_if = rt->mfc_wrong_if; MFC_UNLOCK(); return 0; } /* * returns the input and output packet and byte counts on the vif provided */ static int get_vif_cnt(struct sioc_vif_req *req) { vifi_t vifi = req->vifi; VIF_LOCK(); if (vifi >= V_numvifs) { VIF_UNLOCK(); return EINVAL; } req->icount = V_viftable[vifi].v_pkt_in; req->ocount = V_viftable[vifi].v_pkt_out; req->ibytes = V_viftable[vifi].v_bytes_in; req->obytes = V_viftable[vifi].v_bytes_out; VIF_UNLOCK(); return 0; } static void if_detached_event(void *arg __unused, struct ifnet *ifp) { vifi_t vifi; u_long i; MROUTER_LOCK(); if (V_ip_mrouter == NULL) { MROUTER_UNLOCK(); return; } VIF_LOCK(); MFC_LOCK(); /* * Tear down multicast forwarder state associated with this ifnet. * 1. Walk the vif list, matching vifs against this ifnet. * 2. Walk the multicast forwarding cache (mfc) looking for * inner matches with this vif's index. * 3. Expire any matching multicast forwarding cache entries. * 4. Free vif state. This should disable ALLMULTI on the interface. */ for (vifi = 0; vifi < V_numvifs; vifi++) { if (V_viftable[vifi].v_ifp != ifp) continue; for (i = 0; i < mfchashsize; i++) { struct mfc *rt, *nrt; LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { if (rt->mfc_parent == vifi) { expire_mfc(rt); } } } del_vif_locked(vifi); } MFC_UNLOCK(); VIF_UNLOCK(); MROUTER_UNLOCK(); } /* * Enable multicast forwarding. */ static int ip_mrouter_init(struct socket *so, int version) { CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__, so->so_type, so->so_proto->pr_protocol); if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; if (version != 1) return ENOPROTOOPT; MROUTER_LOCK(); if (ip_mrouter_unloading) { MROUTER_UNLOCK(); return ENOPROTOOPT; } if (V_ip_mrouter != NULL) { MROUTER_UNLOCK(); return EADDRINUSE; } V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash, HASH_NOWAIT); callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, curvnet); callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, curvnet); callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, curvnet); V_ip_mrouter = so; ip_mrouter_cnt++; MROUTER_UNLOCK(); CTR1(KTR_IPMF, "%s: done", __func__); return 0; } /* * Disable multicast forwarding. */ static int X_ip_mrouter_done(void) { struct ifnet *ifp; u_long i; vifi_t vifi; MROUTER_LOCK(); if (V_ip_mrouter == NULL) { MROUTER_UNLOCK(); return EINVAL; } /* * Detach/disable hooks to the reset of the system. */ V_ip_mrouter = NULL; ip_mrouter_cnt--; V_mrt_api_config = 0; VIF_LOCK(); /* * For each phyint in use, disable promiscuous reception of all IP * multicasts. */ for (vifi = 0; vifi < V_numvifs; vifi++) { if (!in_nullhost(V_viftable[vifi].v_lcl_addr) && !(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { ifp = V_viftable[vifi].v_ifp; if_allmulti(ifp, 0); } } bzero((caddr_t)V_viftable, sizeof(V_viftable)); V_numvifs = 0; V_pim_assert_enabled = 0; VIF_UNLOCK(); callout_stop(&V_expire_upcalls_ch); callout_stop(&V_bw_upcalls_ch); callout_stop(&V_bw_meter_ch); MFC_LOCK(); /* * Free all multicast forwarding cache entries. * Do not use hashdestroy(), as we must perform other cleanup. */ for (i = 0; i < mfchashsize; i++) { struct mfc *rt, *nrt; LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { expire_mfc(rt); } } free(V_mfchashtbl, M_MRTABLE); V_mfchashtbl = NULL; bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize); V_bw_upcalls_n = 0; bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); MFC_UNLOCK(); V_reg_vif_num = VIFI_INVALID; MROUTER_UNLOCK(); CTR1(KTR_IPMF, "%s: done", __func__); return 0; } /* * Set PIM assert processing global */ static int set_assert(int i) { if ((i != 1) && (i != 0)) return EINVAL; V_pim_assert_enabled = i; return 0; } /* * Configure API capabilities */ int set_api_config(uint32_t *apival) { u_long i; /* * We can set the API capabilities only if it is the first operation * after MRT_INIT. I.e.: * - there are no vifs installed * - pim_assert is not enabled * - the MFC table is empty */ if (V_numvifs > 0) { *apival = 0; return EPERM; } if (V_pim_assert_enabled) { *apival = 0; return EPERM; } MFC_LOCK(); for (i = 0; i < mfchashsize; i++) { if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) { MFC_UNLOCK(); *apival = 0; return EPERM; } } MFC_UNLOCK(); V_mrt_api_config = *apival & mrt_api_support; *apival = V_mrt_api_config; return 0; } /* * Add a vif to the vif table */ static int add_vif(struct vifctl *vifcp) { struct vif *vifp = V_viftable + vifcp->vifc_vifi; struct sockaddr_in sin = {sizeof sin, AF_INET}; struct ifaddr *ifa; struct ifnet *ifp; int error; VIF_LOCK(); if (vifcp->vifc_vifi >= MAXVIFS) { VIF_UNLOCK(); return EINVAL; } /* rate limiting is no longer supported by this code */ if (vifcp->vifc_rate_limit != 0) { log(LOG_ERR, "rate limiting is no longer supported\n"); VIF_UNLOCK(); return EINVAL; } if (!in_nullhost(vifp->v_lcl_addr)) { VIF_UNLOCK(); return EADDRINUSE; } if (in_nullhost(vifcp->vifc_lcl_addr)) { VIF_UNLOCK(); return EADDRNOTAVAIL; } /* Find the interface with an address in AF_INET family */ if (vifcp->vifc_flags & VIFF_REGISTER) { /* * XXX: Because VIFF_REGISTER does not really need a valid * local interface (e.g. it could be 127.0.0.2), we don't * check its address. */ ifp = NULL; } else { sin.sin_addr = vifcp->vifc_lcl_addr; ifa = ifa_ifwithaddr((struct sockaddr *)&sin); if (ifa == NULL) { VIF_UNLOCK(); return EADDRNOTAVAIL; } ifp = ifa->ifa_ifp; ifa_free(ifa); } if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) { CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__); VIF_UNLOCK(); return EOPNOTSUPP; } else if (vifcp->vifc_flags & VIFF_REGISTER) { ifp = &V_multicast_register_if; CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp); if (V_reg_vif_num == VIFI_INVALID) { if_initname(&V_multicast_register_if, "register_vif", 0); V_multicast_register_if.if_flags = IFF_LOOPBACK; V_reg_vif_num = vifcp->vifc_vifi; } } else { /* Make sure the interface supports multicast */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { VIF_UNLOCK(); return EOPNOTSUPP; } /* Enable promiscuous reception of all IP multicasts from the if */ error = if_allmulti(ifp, 1); if (error) { VIF_UNLOCK(); return error; } } vifp->v_flags = vifcp->vifc_flags; vifp->v_threshold = vifcp->vifc_threshold; vifp->v_lcl_addr = vifcp->vifc_lcl_addr; vifp->v_rmt_addr = vifcp->vifc_rmt_addr; vifp->v_ifp = ifp; /* initialize per vif pkt counters */ vifp->v_pkt_in = 0; vifp->v_pkt_out = 0; vifp->v_bytes_in = 0; vifp->v_bytes_out = 0; /* Adjust numvifs up if the vifi is higher than numvifs */ if (V_numvifs <= vifcp->vifc_vifi) V_numvifs = vifcp->vifc_vifi + 1; VIF_UNLOCK(); CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__, (int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr), (int)vifcp->vifc_threshold); return 0; } /* * Delete a vif from the vif table */ static int del_vif_locked(vifi_t vifi) { struct vif *vifp; VIF_LOCK_ASSERT(); if (vifi >= V_numvifs) { return EINVAL; } vifp = &V_viftable[vifi]; if (in_nullhost(vifp->v_lcl_addr)) { return EADDRNOTAVAIL; } if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) if_allmulti(vifp->v_ifp, 0); if (vifp->v_flags & VIFF_REGISTER) V_reg_vif_num = VIFI_INVALID; bzero((caddr_t)vifp, sizeof (*vifp)); CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi); /* Adjust numvifs down */ for (vifi = V_numvifs; vifi > 0; vifi--) if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr)) break; V_numvifs = vifi; return 0; } static int del_vif(vifi_t vifi) { int cc; VIF_LOCK(); cc = del_vif_locked(vifi); VIF_UNLOCK(); return cc; } /* * update an mfc entry without resetting counters and S,G addresses. */ static void update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) { int i; rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < V_numvifs; i++) { rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config & MRT_MFC_FLAGS_ALL; } /* set the RP address */ if (V_mrt_api_config & MRT_MFC_RP) rt->mfc_rp = mfccp->mfcc_rp; else rt->mfc_rp.s_addr = INADDR_ANY; } /* * fully initialize an mfc entry from the parameter. */ static void init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) { rt->mfc_origin = mfccp->mfcc_origin; rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; update_mfc_params(rt, mfccp); /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; rt->mfc_byte_cnt = 0; rt->mfc_wrong_if = 0; timevalclear(&rt->mfc_last_assert); } static void expire_mfc(struct mfc *rt) { struct rtdetq *rte, *nrte; MFC_LOCK_ASSERT(); free_bw_list(rt->mfc_bw_meter); TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { m_freem(rte->m); TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); free(rte, M_MRTABLE); } LIST_REMOVE(rt, mfc_hash); free(rt, M_MRTABLE); } /* * Add an mfc entry */ static int add_mfc(struct mfcctl2 *mfccp) { struct mfc *rt; struct rtdetq *rte, *nrte; u_long hash = 0; u_short nstl; VIF_LOCK(); MFC_LOCK(); rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); /* If an entry already exists, just update the fields */ if (rt) { CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x", __func__, inet_ntoa(mfccp->mfcc_origin), (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); update_mfc_params(rt, mfccp); MFC_UNLOCK(); VIF_UNLOCK(); return (0); } /* * Find the entry for which the upcall was made and update */ nstl = 0; hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && !TAILQ_EMPTY(&rt->mfc_stall)) { CTR5(KTR_IPMF, "%s: add mfc orig %s group %lx parent %x qh %p", __func__, inet_ntoa(mfccp->mfcc_origin), (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent, TAILQ_FIRST(&rt->mfc_stall)); if (nstl++) CTR1(KTR_IPMF, "%s: multiple matches", __func__); init_mfc_params(rt, mfccp); rt->mfc_expire = 0; /* Don't clean this guy up */ V_nexpire[hash]--; /* Free queued packets, but attempt to forward them first. */ TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { if (rte->ifp != NULL) ip_mdq(rte->m, rte->ifp, rt, -1); m_freem(rte->m); TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); rt->mfc_nstall--; free(rte, M_MRTABLE); } } } /* * It is possible that an entry is being inserted without an upcall */ if (nstl == 0) { CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__); LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) { init_mfc_params(rt, mfccp); if (rt->mfc_expire) V_nexpire[hash]--; rt->mfc_expire = 0; break; /* XXX */ } } if (rt == NULL) { /* no upcall, so make a new entry */ rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); if (rt == NULL) { MFC_UNLOCK(); VIF_UNLOCK(); return (ENOBUFS); } init_mfc_params(rt, mfccp); TAILQ_INIT(&rt->mfc_stall); rt->mfc_nstall = 0; rt->mfc_expire = 0; rt->mfc_bw_meter = NULL; /* insert new entry at head of hash chain */ LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); } } MFC_UNLOCK(); VIF_UNLOCK(); return (0); } /* * Delete an mfc entry */ static int del_mfc(struct mfcctl2 *mfccp) { struct in_addr origin; struct in_addr mcastgrp; struct mfc *rt; origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__, inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr)); MFC_LOCK(); rt = mfc_find(&origin, &mcastgrp); if (rt == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; } /* * free the bw_meter entries */ free_bw_list(rt->mfc_bw_meter); rt->mfc_bw_meter = NULL; LIST_REMOVE(rt, mfc_hash); free(rt, M_MRTABLE); MFC_UNLOCK(); return (0); } /* * Send a message to the routing daemon on the multicast routing socket. */ static int socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) { if (s) { SOCKBUF_LOCK(&s->so_rcv); if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) { sorwakeup_locked(s); return 0; } SOCKBUF_UNLOCK(&s->so_rcv); } m_freem(mm); return -1; } /* * IP multicast forwarding function. This function assumes that the packet * pointed to by "ip" has arrived on (or is about to be sent to) the interface * pointed to by "ifp", and the packet is to be relayed to other networks * that have members of the packet's destination IP multicast group. * * The packet is returned unscathed to the caller, unless it is * erroneous, in which case a non-zero return value tells the caller to * discard it. */ #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo) { struct mfc *rt; int error; vifi_t vifi; CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p", inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp); if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { /* * Packet arrived via a physical interface or * an encapsulated tunnel or a register_vif. */ } else { /* * Packet arrived through a source-route tunnel. * Source-route tunnels are no longer supported. */ return (1); } VIF_LOCK(); MFC_LOCK(); if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) { if (ip->ip_ttl < MAXTTL) ip->ip_ttl++; /* compensate for -1 in *_send routines */ error = ip_mdq(m, ifp, NULL, vifi); MFC_UNLOCK(); VIF_UNLOCK(); return error; } /* * Don't forward a packet with time-to-live of zero or one, * or a packet destined to a local-only group. */ if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) { MFC_UNLOCK(); VIF_UNLOCK(); return 0; } /* * Determine forwarding vifs from the forwarding cache table */ MRTSTAT_INC(mrts_mfc_lookups); rt = mfc_find(&ip->ip_src, &ip->ip_dst); /* Entry exists, so forward if necessary */ if (rt != NULL) { error = ip_mdq(m, ifp, rt, -1); MFC_UNLOCK(); VIF_UNLOCK(); return error; } else { /* * If we don't have a route for packet's origin, * Make a copy of the packet & send message to routing daemon */ struct mbuf *mb0; struct rtdetq *rte; u_long hash; int hlen = ip->ip_hl << 2; MRTSTAT_INC(mrts_mfc_misses); MRTSTAT_INC(mrts_no_route); CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)", inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr)); /* * Allocate mbufs early so that we don't do extra work if we are * just going to fail anyway. Make sure to pullup the header so * that other people can't step on it. */ rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT|M_ZERO); if (rte == NULL) { MFC_UNLOCK(); VIF_UNLOCK(); return ENOBUFS; } mb0 = m_copypacket(m, M_NOWAIT); if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < hlen)) mb0 = m_pullup(mb0, hlen); if (mb0 == NULL) { free(rte, M_MRTABLE); MFC_UNLOCK(); VIF_UNLOCK(); return ENOBUFS; } /* is there an upcall waiting for this flow ? */ hash = MFCHASH(ip->ip_src, ip->ip_dst); LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { if (in_hosteq(ip->ip_src, rt->mfc_origin) && in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && !TAILQ_EMPTY(&rt->mfc_stall)) break; } if (rt == NULL) { int i; struct igmpmsg *im; struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; struct mbuf *mm; /* * Locate the vifi for the incoming interface for this packet. * If none found, drop packet. */ for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp; vifi++) ; if (vifi >= V_numvifs) /* vif not found, drop packet */ goto non_fatal; /* no upcall, so make a new entry */ rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); if (rt == NULL) goto fail; /* Make a copy of the header to send to the user level process */ mm = m_copy(mb0, 0, hlen); if (mm == NULL) goto fail1; /* * Send message to routing daemon to install * a route into the kernel table */ im = mtod(mm, struct igmpmsg *); im->im_msgtype = IGMPMSG_NOCACHE; im->im_mbz = 0; im->im_vif = vifi; MRTSTAT_INC(mrts_upcalls); k_igmpsrc.sin_addr = ip->ip_src; if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { CTR0(KTR_IPMF, "ip_mforward: socket queue full"); MRTSTAT_INC(mrts_upq_sockfull); fail1: free(rt, M_MRTABLE); fail: free(rte, M_MRTABLE); m_freem(mb0); MFC_UNLOCK(); VIF_UNLOCK(); return ENOBUFS; } /* insert new entry at head of hash chain */ rt->mfc_origin.s_addr = ip->ip_src.s_addr; rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; rt->mfc_expire = UPCALL_EXPIRE; V_nexpire[hash]++; for (i = 0; i < V_numvifs; i++) { rt->mfc_ttls[i] = 0; rt->mfc_flags[i] = 0; } rt->mfc_parent = -1; /* clear the RP address */ rt->mfc_rp.s_addr = INADDR_ANY; rt->mfc_bw_meter = NULL; /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; rt->mfc_byte_cnt = 0; rt->mfc_wrong_if = 0; timevalclear(&rt->mfc_last_assert); TAILQ_INIT(&rt->mfc_stall); rt->mfc_nstall = 0; /* link into table */ LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link); rt->mfc_nstall++; } else { /* determine if queue has overflowed */ if (rt->mfc_nstall > MAX_UPQ) { MRTSTAT_INC(mrts_upq_ovflw); non_fatal: free(rte, M_MRTABLE); m_freem(mb0); MFC_UNLOCK(); VIF_UNLOCK(); return (0); } TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link); rt->mfc_nstall++; } rte->m = mb0; rte->ifp = ifp; MFC_UNLOCK(); VIF_UNLOCK(); return 0; } } /* * Clean up the cache entry if upcall is not serviced */ static void expire_upcalls(void *arg) { u_long i; CURVNET_SET((struct vnet *) arg); MFC_LOCK(); for (i = 0; i < mfchashsize; i++) { struct mfc *rt, *nrt; if (V_nexpire[i] == 0) continue; LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { if (TAILQ_EMPTY(&rt->mfc_stall)) continue; if (rt->mfc_expire == 0 || --rt->mfc_expire > 0) continue; /* * free the bw_meter entries */ while (rt->mfc_bw_meter != NULL) { struct bw_meter *x = rt->mfc_bw_meter; rt->mfc_bw_meter = x->bm_mfc_next; free(x, M_BWMETER); } MRTSTAT_INC(mrts_cache_cleanups); CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__, (u_long)ntohl(rt->mfc_origin.s_addr), (u_long)ntohl(rt->mfc_mcastgrp.s_addr)); expire_mfc(rt); } } MFC_UNLOCK(); callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, curvnet); CURVNET_RESTORE(); } /* * Packet forwarding routine once entry in the cache is made */ static int ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) { struct ip *ip = mtod(m, struct ip *); vifi_t vifi; int plen = ntohs(ip->ip_len); VIF_LOCK_ASSERT(); /* * If xmt_vif is not -1, send on only the requested vif. * * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) */ if (xmt_vif < V_numvifs) { if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER) pim_register_send(ip, V_viftable + xmt_vif, m, rt); else phyint_send(ip, V_viftable + xmt_vif, m); return 1; } /* * Don't forward if it didn't arrive from the parent vif for its origin. */ vifi = rt->mfc_parent; if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) { CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)", __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp); MRTSTAT_INC(mrts_wrong_if); ++rt->mfc_wrong_if; /* * If we are doing PIM assert processing, send a message * to the routing daemon. * * XXX: A PIM-SM router needs the WRONGVIF detection so it * can complete the SPT switch, regardless of the type * of the iif (broadcast media, GRE tunnel, etc). */ if (V_pim_assert_enabled && (vifi < V_numvifs) && V_viftable[vifi].v_ifp) { if (ifp == &V_multicast_register_if) PIMSTAT_INC(pims_rcv_registers_wrongiif); /* Get vifi for the incoming packet */ for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp; vifi++) ; if (vifi >= V_numvifs) return 0; /* The iif is not found: ignore the packet. */ if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) return 0; /* WRONGVIF disabled: ignore the packet */ if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) { struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; struct igmpmsg *im; int hlen = ip->ip_hl << 2; struct mbuf *mm = m_copy(m, 0, hlen); if (mm && (!M_WRITABLE(mm) || mm->m_len < hlen)) mm = m_pullup(mm, hlen); if (mm == NULL) return ENOBUFS; im = mtod(mm, struct igmpmsg *); im->im_msgtype = IGMPMSG_WRONGVIF; im->im_mbz = 0; im->im_vif = vifi; MRTSTAT_INC(mrts_upcalls); k_igmpsrc.sin_addr = im->im_src; if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { CTR1(KTR_IPMF, "%s: socket queue full", __func__); MRTSTAT_INC(mrts_upq_sockfull); return ENOBUFS; } } } return 0; } /* If I sourced this packet, it counts as output, else it was input. */ if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) { V_viftable[vifi].v_pkt_out++; V_viftable[vifi].v_bytes_out += plen; } else { V_viftable[vifi].v_pkt_in++; V_viftable[vifi].v_bytes_in += plen; } rt->mfc_pkt_cnt++; rt->mfc_byte_cnt += plen; /* * For each vif, decide if a copy of the packet should be forwarded. * Forward if: * - the ttl exceeds the vif's threshold * - there are group members downstream on interface */ for (vifi = 0; vifi < V_numvifs; vifi++) if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { V_viftable[vifi].v_pkt_out++; V_viftable[vifi].v_bytes_out += plen; if (V_viftable[vifi].v_flags & VIFF_REGISTER) pim_register_send(ip, V_viftable + vifi, m, rt); else phyint_send(ip, V_viftable + vifi, m); } /* * Perform upcall-related bw measuring. */ if (rt->mfc_bw_meter != NULL) { struct bw_meter *x; struct timeval now; microtime(&now); MFC_LOCK_ASSERT(); for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) bw_meter_receive_packet(x, plen, &now); } return 0; } /* * Check if a vif number is legal/ok. This is used by in_mcast.c. */ static int X_legal_vif_num(int vif) { int ret; ret = 0; if (vif < 0) return (ret); VIF_LOCK(); if (vif < V_numvifs) ret = 1; VIF_UNLOCK(); return (ret); } /* * Return the local address used by this vif */ static u_long X_ip_mcast_src(int vifi) { in_addr_t addr; addr = INADDR_ANY; if (vifi < 0) return (addr); VIF_LOCK(); if (vifi < V_numvifs) addr = V_viftable[vifi].v_lcl_addr.s_addr; VIF_UNLOCK(); return (addr); } static void phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) { struct mbuf *mb_copy; int hlen = ip->ip_hl << 2; VIF_LOCK_ASSERT(); /* * Make a new reference to the packet; make sure that * the IP header is actually copied, not just referenced, * so that ip_output() only scribbles on the copy. */ mb_copy = m_copypacket(m, M_NOWAIT); if (mb_copy && (!M_WRITABLE(mb_copy) || mb_copy->m_len < hlen)) mb_copy = m_pullup(mb_copy, hlen); if (mb_copy == NULL) return; send_packet(vifp, mb_copy); } static void send_packet(struct vif *vifp, struct mbuf *m) { struct ip_moptions imo; struct in_multi *imm[2]; int error; VIF_LOCK_ASSERT(); imo.imo_multicast_ifp = vifp->v_ifp; imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; imo.imo_multicast_loop = 1; imo.imo_multicast_vif = -1; imo.imo_num_memberships = 0; imo.imo_max_memberships = 2; imo.imo_membership = &imm[0]; /* * Re-entrancy should not be a problem here, because * the packets that we send out and are looped back at us * should get rejected because they appear to come from * the loopback interface, thus preventing looping. */ error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL); CTR3(KTR_IPMF, "%s: vif %td err %d", __func__, (ptrdiff_t)(vifp - V_viftable), error); } /* * Stubs for old RSVP socket shim implementation. */ static int X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused) { return (EOPNOTSUPP); } static void X_ip_rsvp_force_done(struct socket *so __unused) { } static int X_rsvp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m; m = *mp; *mp = NULL; if (!V_rsvp_on) m_freem(m); return (IPPROTO_DONE); } /* * Code for bandwidth monitors */ /* * Define common interface for timeval-related methods */ #define BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp) #define BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp)) #define BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp)) static uint32_t compute_bw_meter_flags(struct bw_upcall *req) { uint32_t flags = 0; if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) flags |= BW_METER_UNIT_PACKETS; if (req->bu_flags & BW_UPCALL_UNIT_BYTES) flags |= BW_METER_UNIT_BYTES; if (req->bu_flags & BW_UPCALL_GEQ) flags |= BW_METER_GEQ; if (req->bu_flags & BW_UPCALL_LEQ) flags |= BW_METER_LEQ; return flags; } /* * Add a bw_meter entry */ static int add_bw_upcall(struct bw_upcall *req) { struct mfc *mfc; struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; struct timeval now; struct bw_meter *x; uint32_t flags; if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) return EOPNOTSUPP; /* Test if the flags are valid */ if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) return EINVAL; if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) return EINVAL; if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) return EINVAL; /* Test if the threshold time interval is valid */ if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) return EINVAL; flags = compute_bw_meter_flags(req); /* * Find if we have already same bw_meter entry */ MFC_LOCK(); mfc = mfc_find(&req->bu_src, &req->bu_dst); if (mfc == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; } for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, &req->bu_threshold.b_time, ==)) && (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && (x->bm_flags & BW_METER_USER_FLAGS) == flags) { MFC_UNLOCK(); return 0; /* XXX Already installed */ } } /* Allocate the new bw_meter entry */ x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); if (x == NULL) { MFC_UNLOCK(); return ENOBUFS; } /* Set the new bw_meter entry */ x->bm_threshold.b_time = req->bu_threshold.b_time; microtime(&now); x->bm_start_time = now; x->bm_threshold.b_packets = req->bu_threshold.b_packets; x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags = flags; x->bm_time_next = NULL; x->bm_time_hash = BW_METER_BUCKETS; /* Add the new bw_meter entry to the front of entries for this MFC */ x->bm_mfc = mfc; x->bm_mfc_next = mfc->mfc_bw_meter; mfc->mfc_bw_meter = x; schedule_bw_meter(x, &now); MFC_UNLOCK(); return 0; } static void free_bw_list(struct bw_meter *list) { while (list != NULL) { struct bw_meter *x = list; list = list->bm_mfc_next; unschedule_bw_meter(x); free(x, M_BWMETER); } } /* * Delete one or multiple bw_meter entries */ static int del_bw_upcall(struct bw_upcall *req) { struct mfc *mfc; struct bw_meter *x; if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) return EOPNOTSUPP; MFC_LOCK(); /* Find the corresponding MFC entry */ mfc = mfc_find(&req->bu_src, &req->bu_dst); if (mfc == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { /* * Delete all bw_meter entries for this mfc */ struct bw_meter *list; list = mfc->mfc_bw_meter; mfc->mfc_bw_meter = NULL; free_bw_list(list); MFC_UNLOCK(); return 0; } else { /* Delete a single bw_meter entry */ struct bw_meter *prev; uint32_t flags = 0; flags = compute_bw_meter_flags(req); /* Find the bw_meter entry to delete */ for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; prev = x, x = x->bm_mfc_next) { if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, &req->bu_threshold.b_time, ==)) && (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && (x->bm_flags & BW_METER_USER_FLAGS) == flags) break; } if (x != NULL) { /* Delete entry from the list for this MFC */ if (prev != NULL) prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ else x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ unschedule_bw_meter(x); MFC_UNLOCK(); /* Free the bw_meter entry */ free(x, M_BWMETER); return 0; } else { MFC_UNLOCK(); return EINVAL; } } /* NOTREACHED */ } /* * Perform bandwidth measurement processing that may result in an upcall */ static void bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) { struct timeval delta; MFC_LOCK_ASSERT(); delta = *nowp; BW_TIMEVALDECR(&delta, &x->bm_start_time); if (x->bm_flags & BW_METER_GEQ) { /* * Processing for ">=" type of bw_meter entry */ if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { /* Reset the bw_meter entry */ x->bm_start_time = *nowp; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; } /* Record that a packet is received */ x->bm_measured.b_packets++; x->bm_measured.b_bytes += plen; /* * Test if we should deliver an upcall */ if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { if (((x->bm_flags & BW_METER_UNIT_PACKETS) && (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || ((x->bm_flags & BW_METER_UNIT_BYTES) && (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { /* Prepare an upcall for delivery */ bw_meter_prepare_upcall(x, nowp); x->bm_flags |= BW_METER_UPCALL_DELIVERED; } } } else if (x->bm_flags & BW_METER_LEQ) { /* * Processing for "<=" type of bw_meter entry */ if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { /* * We are behind time with the multicast forwarding table * scanning for "<=" type of bw_meter entries, so test now * if we should deliver an upcall. */ if (((x->bm_flags & BW_METER_UNIT_PACKETS) && (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || ((x->bm_flags & BW_METER_UNIT_BYTES) && (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { /* Prepare an upcall for delivery */ bw_meter_prepare_upcall(x, nowp); } /* Reschedule the bw_meter entry */ unschedule_bw_meter(x); schedule_bw_meter(x, nowp); } /* Record that a packet is received */ x->bm_measured.b_packets++; x->bm_measured.b_bytes += plen; /* * Test if we should restart the measuring interval */ if ((x->bm_flags & BW_METER_UNIT_PACKETS && x->bm_measured.b_packets <= x->bm_threshold.b_packets) || (x->bm_flags & BW_METER_UNIT_BYTES && x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { /* Don't restart the measuring interval */ } else { /* Do restart the measuring interval */ /* * XXX: note that we don't unschedule and schedule, because this * might be too much overhead per packet. Instead, when we process * all entries for a given timer hash bin, we check whether it is * really a timeout. If not, we reschedule at that time. */ x->bm_start_time = *nowp; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; } } } /* * Prepare a bandwidth-related upcall */ static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) { struct timeval delta; struct bw_upcall *u; MFC_LOCK_ASSERT(); /* * Compute the measured time interval */ delta = *nowp; BW_TIMEVALDECR(&delta, &x->bm_start_time); /* * If there are too many pending upcalls, deliver them now */ if (V_bw_upcalls_n >= BW_UPCALLS_MAX) bw_upcalls_send(); /* * Set the bw_upcall entry */ u = &V_bw_upcalls[V_bw_upcalls_n++]; u->bu_src = x->bm_mfc->mfc_origin; u->bu_dst = x->bm_mfc->mfc_mcastgrp; u->bu_threshold.b_time = x->bm_threshold.b_time; u->bu_threshold.b_packets = x->bm_threshold.b_packets; u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; u->bu_measured.b_time = delta; u->bu_measured.b_packets = x->bm_measured.b_packets; u->bu_measured.b_bytes = x->bm_measured.b_bytes; u->bu_flags = 0; if (x->bm_flags & BW_METER_UNIT_PACKETS) u->bu_flags |= BW_UPCALL_UNIT_PACKETS; if (x->bm_flags & BW_METER_UNIT_BYTES) u->bu_flags |= BW_UPCALL_UNIT_BYTES; if (x->bm_flags & BW_METER_GEQ) u->bu_flags |= BW_UPCALL_GEQ; if (x->bm_flags & BW_METER_LEQ) u->bu_flags |= BW_UPCALL_LEQ; } /* * Send the pending bandwidth-related upcalls */ static void bw_upcalls_send(void) { struct mbuf *m; int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]); struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; static struct igmpmsg igmpmsg = { 0, /* unused1 */ 0, /* unused2 */ IGMPMSG_BW_UPCALL,/* im_msgtype */ 0, /* im_mbz */ 0, /* im_vif */ 0, /* unused3 */ { 0 }, /* im_src */ { 0 } }; /* im_dst */ MFC_LOCK_ASSERT(); if (V_bw_upcalls_n == 0) return; /* No pending upcalls */ V_bw_upcalls_n = 0; /* * Allocate a new mbuf, initialize it with the header and * the payload for the pending calls. */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); return; } m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg); m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]); /* * Send the upcalls * XXX do we need to set the address in k_igmpsrc ? */ MRTSTAT_INC(mrts_upcalls); if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) { log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); MRTSTAT_INC(mrts_upq_sockfull); } } /* * Compute the timeout hash value for the bw_meter entries */ #define BW_METER_TIMEHASH(bw_meter, hash) \ do { \ struct timeval next_timeval = (bw_meter)->bm_start_time; \ \ BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ (hash) = next_timeval.tv_sec; \ if (next_timeval.tv_usec) \ (hash)++; /* XXX: make sure we don't timeout early */ \ (hash) %= BW_METER_BUCKETS; \ } while (0) /* * Schedule a timer to process periodically bw_meter entry of type "<=" * by linking the entry in the proper hash bucket. */ static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp) { int time_hash; MFC_LOCK_ASSERT(); if (!(x->bm_flags & BW_METER_LEQ)) return; /* XXX: we schedule timers only for "<=" entries */ /* * Reset the bw_meter entry */ x->bm_start_time = *nowp; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; /* * Compute the timeout hash value and insert the entry */ BW_METER_TIMEHASH(x, time_hash); x->bm_time_next = V_bw_meter_timers[time_hash]; V_bw_meter_timers[time_hash] = x; x->bm_time_hash = time_hash; } /* * Unschedule the periodic timer that processes bw_meter entry of type "<=" * by removing the entry from the proper hash bucket. */ static void unschedule_bw_meter(struct bw_meter *x) { int time_hash; struct bw_meter *prev, *tmp; MFC_LOCK_ASSERT(); if (!(x->bm_flags & BW_METER_LEQ)) return; /* XXX: we schedule timers only for "<=" entries */ /* * Compute the timeout hash value and delete the entry */ time_hash = x->bm_time_hash; if (time_hash >= BW_METER_BUCKETS) return; /* Entry was not scheduled */ for (prev = NULL, tmp = V_bw_meter_timers[time_hash]; tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) if (tmp == x) break; if (tmp == NULL) panic("unschedule_bw_meter: bw_meter entry not found"); if (prev != NULL) prev->bm_time_next = x->bm_time_next; else V_bw_meter_timers[time_hash] = x->bm_time_next; x->bm_time_next = NULL; x->bm_time_hash = BW_METER_BUCKETS; } /* * Process all "<=" type of bw_meter that should be processed now, * and for each entry prepare an upcall if necessary. Each processed * entry is rescheduled again for the (periodic) processing. * * This is run periodically (once per second normally). On each round, * all the potentially matching entries are in the hash slot that we are * looking at. */ static void bw_meter_process() { uint32_t loops; int i; struct timeval now, process_endtime; microtime(&now); if (V_last_tv_sec == now.tv_sec) return; /* nothing to do */ loops = now.tv_sec - V_last_tv_sec; V_last_tv_sec = now.tv_sec; if (loops > BW_METER_BUCKETS) loops = BW_METER_BUCKETS; MFC_LOCK(); /* * Process all bins of bw_meter entries from the one after the last * processed to the current one. On entry, i points to the last bucket * visited, so we need to increment i at the beginning of the loop. */ for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { struct bw_meter *x, *tmp_list; if (++i >= BW_METER_BUCKETS) i = 0; /* Disconnect the list of bw_meter entries from the bin */ tmp_list = V_bw_meter_timers[i]; V_bw_meter_timers[i] = NULL; /* Process the list of bw_meter entries */ while (tmp_list != NULL) { x = tmp_list; tmp_list = tmp_list->bm_time_next; /* Test if the time interval is over */ process_endtime = x->bm_start_time; BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); if (BW_TIMEVALCMP(&process_endtime, &now, >)) { /* Not yet: reschedule, but don't reset */ int time_hash; BW_METER_TIMEHASH(x, time_hash); if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { /* * XXX: somehow the bin processing is a bit ahead of time. * Put the entry in the next bin. */ if (++time_hash >= BW_METER_BUCKETS) time_hash = 0; } x->bm_time_next = V_bw_meter_timers[time_hash]; V_bw_meter_timers[time_hash] = x; x->bm_time_hash = time_hash; continue; } /* * Test if we should deliver an upcall */ if (((x->bm_flags & BW_METER_UNIT_PACKETS) && (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || ((x->bm_flags & BW_METER_UNIT_BYTES) && (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { /* Prepare an upcall for delivery */ bw_meter_prepare_upcall(x, &now); } /* * Reschedule for next processing */ schedule_bw_meter(x, &now); } } /* Send all upcalls that are pending delivery */ bw_upcalls_send(); MFC_UNLOCK(); } /* * A periodic function for sending all upcalls that are pending delivery */ static void expire_bw_upcalls_send(void *arg) { CURVNET_SET((struct vnet *) arg); MFC_LOCK(); bw_upcalls_send(); MFC_UNLOCK(); callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, curvnet); CURVNET_RESTORE(); } /* * A periodic function for periodic scanning of the multicast forwarding * table for processing all "<=" bw_meter entries. */ static void expire_bw_meter_process(void *arg) { CURVNET_SET((struct vnet *) arg); if (V_mrt_api_config & MRT_MFC_BW_UPCALL) bw_meter_process(); callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, curvnet); CURVNET_RESTORE(); } /* * End of bandwidth monitoring code */ /* * Send the packet up to the user daemon, or eventually do kernel encapsulation * */ static int pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m, struct mfc *rt) { struct mbuf *mb_copy, *mm; /* * Do not send IGMP_WHOLEPKT notifications to userland, if the * rendezvous point was unspecified, and we were told not to. */ if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) && in_nullhost(rt->mfc_rp)) return 0; mb_copy = pim_register_prepare(ip, m); if (mb_copy == NULL) return ENOBUFS; /* * Send all the fragments. Note that the mbuf for each fragment * is freed by the sending machinery. */ for (mm = mb_copy; mm; mm = mb_copy) { mb_copy = mm->m_nextpkt; mm->m_nextpkt = 0; mm = m_pullup(mm, sizeof(struct ip)); if (mm != NULL) { ip = mtod(mm, struct ip *); if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) { pim_register_send_rp(ip, vifp, mm, rt); } else { pim_register_send_upcall(ip, vifp, mm, rt); } } } return 0; } /* * Return a copy of the data packet that is ready for PIM Register * encapsulation. * XXX: Note that in the returned copy the IP header is a valid one. */ static struct mbuf * pim_register_prepare(struct ip *ip, struct mbuf *m) { struct mbuf *mb_copy = NULL; int mtu; /* Take care of delayed checksums */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } /* * Copy the old packet & pullup its IP header into the * new mbuf so we can modify it. */ mb_copy = m_copypacket(m, M_NOWAIT); if (mb_copy == NULL) return NULL; mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); if (mb_copy == NULL) return NULL; /* take care of the TTL */ ip = mtod(mb_copy, struct ip *); --ip->ip_ttl; /* Compute the MTU after the PIM Register encapsulation */ mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); if (ntohs(ip->ip_len) <= mtu) { /* Turn the IP header into a valid one */ ip->ip_sum = 0; ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); } else { /* Fragment the packet */ mb_copy->m_pkthdr.csum_flags |= CSUM_IP; if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) { m_freem(mb_copy); return NULL; } } return mb_copy; } /* * Send an upcall with the data packet to the user-level process. */ static int pim_register_send_upcall(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, struct mfc *rt) { struct mbuf *mb_first; int len = ntohs(ip->ip_len); struct igmpmsg *im; struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; VIF_LOCK_ASSERT(); /* * Add a new mbuf with an upcall header */ mb_first = m_gethdr(M_NOWAIT, MT_DATA); if (mb_first == NULL) { m_freem(mb_copy); return ENOBUFS; } mb_first->m_data += max_linkhdr; mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); mb_first->m_len = sizeof(struct igmpmsg); mb_first->m_next = mb_copy; /* Send message to routing daemon */ im = mtod(mb_first, struct igmpmsg *); im->im_msgtype = IGMPMSG_WHOLEPKT; im->im_mbz = 0; im->im_vif = vifp - V_viftable; im->im_src = ip->ip_src; im->im_dst = ip->ip_dst; k_igmpsrc.sin_addr = ip->ip_src; MRTSTAT_INC(mrts_upcalls); if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) { CTR1(KTR_IPMF, "%s: socket queue full", __func__); MRTSTAT_INC(mrts_upq_sockfull); return ENOBUFS; } /* Keep statistics */ PIMSTAT_INC(pims_snd_registers_msgs); PIMSTAT_ADD(pims_snd_registers_bytes, len); return 0; } /* * Encapsulate the data packet in PIM Register message and send it to the RP. */ static int pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, struct mfc *rt) { struct mbuf *mb_first; struct ip *ip_outer; struct pim_encap_pimhdr *pimhdr; int len = ntohs(ip->ip_len); vifi_t vifi = rt->mfc_parent; VIF_LOCK_ASSERT(); if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) { m_freem(mb_copy); return EADDRNOTAVAIL; /* The iif vif is invalid */ } /* * Add a new mbuf with the encapsulating header */ mb_first = m_gethdr(M_NOWAIT, MT_DATA); if (mb_first == NULL) { m_freem(mb_copy); return ENOBUFS; } mb_first->m_data += max_linkhdr; mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); mb_first->m_next = mb_copy; mb_first->m_pkthdr.len = len + mb_first->m_len; /* * Fill in the encapsulating IP and PIM header */ ip_outer = mtod(mb_first, struct ip *); *ip_outer = pim_encap_iphdr; ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr)); ip_outer->ip_src = V_viftable[vifi].v_lcl_addr; ip_outer->ip_dst = rt->mfc_rp; /* * Copy the inner header TOS to the outer header, and take care of the * IP_DF bit. */ ip_outer->ip_tos = ip->ip_tos; if (ip->ip_off & htons(IP_DF)) ip_outer->ip_off |= htons(IP_DF); ip_fillid(ip_outer); pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer + sizeof(pim_encap_iphdr)); *pimhdr = pim_encap_pimhdr; /* If the iif crosses a border, set the Border-bit */ if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config) pimhdr->flags |= htonl(PIM_BORDER_REGISTER); mb_first->m_data += sizeof(pim_encap_iphdr); pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); mb_first->m_data -= sizeof(pim_encap_iphdr); send_packet(vifp, mb_first); /* Keep statistics */ PIMSTAT_INC(pims_snd_registers_msgs); PIMSTAT_ADD(pims_snd_registers_bytes, len); return 0; } /* * pim_encapcheck() is called by the encap4_input() path at runtime to * determine if a packet is for PIM; allowing PIM to be dynamically loaded * into the kernel. */ static int pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { #ifdef DIAGNOSTIC KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); #endif if (proto != IPPROTO_PIM) return 0; /* not for us; reject the datagram. */ return 64; /* claim the datagram. */ } /* * PIM-SMv2 and PIM-DM messages processing. * Receives and verifies the PIM control messages, and passes them * up to the listening socket, using rip_input(). * The only message with special processing is the PIM_REGISTER message * (used by PIM-SM): the PIM header is stripped off, and the inner packet * is passed to if_simloop(). */ int pim_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct pim *pim; int iphlen = *offp; int minlen; int datalen = ntohs(ip->ip_len) - iphlen; int ip_tos; *mp = NULL; /* Keep statistics */ PIMSTAT_INC(pims_rcv_total_msgs); PIMSTAT_ADD(pims_rcv_total_bytes, datalen); /* * Validate lengths */ if (datalen < PIM_MINLEN) { PIMSTAT_INC(pims_rcv_tooshort); CTR3(KTR_IPMF, "%s: short packet (%d) from %s", __func__, datalen, inet_ntoa(ip->ip_src)); m_freem(m); return (IPPROTO_DONE); } /* * If the packet is at least as big as a REGISTER, go agead * and grab the PIM REGISTER header size, to avoid another * possible m_pullup() later. * * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 */ minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); /* * Get the IP and PIM headers in contiguous memory, and * possibly the PIM REGISTER header. */ if (m->m_len < minlen && (m = m_pullup(m, minlen)) == 0) { CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__); return (IPPROTO_DONE); } /* m_pullup() may have given us a new mbuf so reset ip. */ ip = mtod(m, struct ip *); ip_tos = ip->ip_tos; /* adjust mbuf to point to the PIM header */ m->m_data += iphlen; m->m_len -= iphlen; pim = mtod(m, struct pim *); /* * Validate checksum. If PIM REGISTER, exclude the data packet. * * XXX: some older PIMv2 implementations don't make this distinction, * so for compatibility reason perform the checksum over part of the * message, and if error, then over the whole message. */ if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { /* do nothing, checksum okay */ } else if (in_cksum(m, datalen)) { PIMSTAT_INC(pims_rcv_badsum); CTR1(KTR_IPMF, "%s: invalid checksum", __func__); m_freem(m); return (IPPROTO_DONE); } /* PIM version check */ if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { PIMSTAT_INC(pims_rcv_badversion); CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__, (int)PIM_VT_V(pim->pim_vt), PIM_VERSION); m_freem(m); return (IPPROTO_DONE); } /* restore mbuf back to the outer IP */ m->m_data -= iphlen; m->m_len += iphlen; if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { /* * Since this is a REGISTER, we'll make a copy of the register * headers ip + pim + u_int32 + encap_ip, to be passed up to the * routing daemon. */ struct sockaddr_in dst = { sizeof(dst), AF_INET }; struct mbuf *mcp; struct ip *encap_ip; u_int32_t *reghdr; struct ifnet *vifp; VIF_LOCK(); if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) { VIF_UNLOCK(); CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__, (int)V_reg_vif_num); m_freem(m); return (IPPROTO_DONE); } /* XXX need refcnt? */ vifp = V_viftable[V_reg_vif_num].v_ifp; VIF_UNLOCK(); /* * Validate length */ if (datalen < PIM_REG_MINLEN) { PIMSTAT_INC(pims_rcv_tooshort); PIMSTAT_INC(pims_rcv_badregisters); CTR1(KTR_IPMF, "%s: register packet size too small", __func__); m_freem(m); return (IPPROTO_DONE); } reghdr = (u_int32_t *)(pim + 1); encap_ip = (struct ip *)(reghdr + 1); CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d", __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len)); /* verify the version number of the inner packet */ if (encap_ip->ip_v != IPVERSION) { PIMSTAT_INC(pims_rcv_badregisters); CTR1(KTR_IPMF, "%s: bad encap ip version", __func__); m_freem(m); return (IPPROTO_DONE); } /* verify the inner packet is destined to a mcast group */ if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { PIMSTAT_INC(pims_rcv_badregisters); CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__, inet_ntoa(encap_ip->ip_dst)); m_freem(m); return (IPPROTO_DONE); } /* If a NULL_REGISTER, pass it to the daemon */ if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) goto pim_input_to_daemon; /* * Copy the TOS from the outer IP header to the inner IP header. */ if (encap_ip->ip_tos != ip_tos) { /* Outer TOS -> inner TOS */ encap_ip->ip_tos = ip_tos; /* Recompute the inner header checksum. Sigh... */ /* adjust mbuf to point to the inner IP header */ m->m_data += (iphlen + PIM_MINLEN); m->m_len -= (iphlen + PIM_MINLEN); encap_ip->ip_sum = 0; encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); /* restore mbuf to point back to the outer IP header */ m->m_data -= (iphlen + PIM_MINLEN); m->m_len += (iphlen + PIM_MINLEN); } /* * Decapsulate the inner IP packet and loopback to forward it * as a normal multicast packet. Also, make a copy of the * outer_iphdr + pimhdr + reghdr + encap_iphdr * to pass to the daemon later, so it can take the appropriate * actions (e.g., send back PIM_REGISTER_STOP). * XXX: here m->m_data points to the outer IP header. */ mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN); if (mcp == NULL) { CTR1(KTR_IPMF, "%s: m_copy() failed", __func__); m_freem(m); return (IPPROTO_DONE); } /* Keep statistics */ /* XXX: registers_bytes include only the encap. mcast pkt */ PIMSTAT_INC(pims_rcv_registers_msgs); PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len)); /* * forward the inner ip packet; point m_data at the inner ip. */ m_adj(m, iphlen + PIM_MINLEN); CTR4(KTR_IPMF, "%s: forward decap'd REGISTER: src %lx dst %lx vif %d", __func__, (u_long)ntohl(encap_ip->ip_src.s_addr), (u_long)ntohl(encap_ip->ip_dst.s_addr), (int)V_reg_vif_num); /* NB: vifp was collected above; can it change on us? */ if_simloop(vifp, m, dst.sin_family, 0); /* prepare the register head to send to the mrouting daemon */ m = mcp; } pim_input_to_daemon: /* * Pass the PIM message up to the daemon; if it is a Register message, * pass the 'head' only up to the daemon. This includes the * outer IP header, PIM header, PIM-Register header and the * inner IP header. * XXX: the outer IP header pkt size of a Register is not adjust to * reflect the fact that the inner multicast data is truncated. */ *mp = m; rip_input(mp, offp, proto); return (IPPROTO_DONE); } static int sysctl_mfctable(SYSCTL_HANDLER_ARGS) { struct mfc *rt; int error, i; if (req->newptr) return (EPERM); if (V_mfchashtbl == NULL) /* XXX unlocked */ return (0); error = sysctl_wire_old_buffer(req, 0); if (error) return (error); MFC_LOCK(); for (i = 0; i < mfchashsize; i++) { LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) { error = SYSCTL_OUT(req, rt, sizeof(struct mfc)); if (error) goto out_locked; } } out_locked: MFC_UNLOCK(); return (error); } static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, sysctl_mfctable, "IPv4 Multicast Forwarding Table " "(struct *mfc[mfchashsize], netinet/ip_mroute.h)"); static void vnet_mroute_init(const void *unused __unused) { MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO); bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); callout_init(&V_expire_upcalls_ch, 1); callout_init(&V_bw_upcalls_ch, 1); callout_init(&V_bw_meter_ch, 1); } VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init, NULL); static void vnet_mroute_uninit(const void *unused __unused) { FREE(V_nexpire, M_MRTABLE); V_nexpire = NULL; } VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_mroute_uninit, NULL); static int ip_mroute_modevent(module_t mod, int type, void *unused) { switch (type) { case MOD_LOAD: MROUTER_LOCK_INIT(); if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, if_detached_event, NULL, EVENTHANDLER_PRI_ANY); if (if_detach_event_tag == NULL) { printf("ip_mroute: unable to register " "ifnet_departure_event handler\n"); MROUTER_LOCK_DESTROY(); return (EINVAL); } MFC_LOCK_INIT(); VIF_LOCK_INIT(); mfchashsize = MFCHASHSIZE; if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) && !powerof2(mfchashsize)) { printf("WARNING: %s not a power of 2; using default\n", "net.inet.ip.mfchashsize"); mfchashsize = MFCHASHSIZE; } pim_squelch_wholepkt = 0; TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt", &pim_squelch_wholepkt); pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM, pim_encapcheck, &in_pim_protosw, NULL); if (pim_encap_cookie == NULL) { printf("ip_mroute: unable to attach pim encap\n"); VIF_LOCK_DESTROY(); MFC_LOCK_DESTROY(); MROUTER_LOCK_DESTROY(); return (EINVAL); } ip_mcast_src = X_ip_mcast_src; ip_mforward = X_ip_mforward; ip_mrouter_done = X_ip_mrouter_done; ip_mrouter_get = X_ip_mrouter_get; ip_mrouter_set = X_ip_mrouter_set; ip_rsvp_force_done = X_ip_rsvp_force_done; ip_rsvp_vif = X_ip_rsvp_vif; legal_vif_num = X_legal_vif_num; mrt_ioctl = X_mrt_ioctl; rsvp_input_p = X_rsvp_input; break; case MOD_UNLOAD: /* * Typically module unload happens after the user-level * process has shutdown the kernel services (the check * below insures someone can't just yank the module out * from under a running process). But if the module is * just loaded and then unloaded w/o starting up a user * process we still need to cleanup. */ MROUTER_LOCK(); if (ip_mrouter_cnt != 0) { MROUTER_UNLOCK(); return (EINVAL); } ip_mrouter_unloading = 1; MROUTER_UNLOCK(); EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); if (pim_encap_cookie) { encap_detach(pim_encap_cookie); pim_encap_cookie = NULL; } ip_mcast_src = NULL; ip_mforward = NULL; ip_mrouter_done = NULL; ip_mrouter_get = NULL; ip_mrouter_set = NULL; ip_rsvp_force_done = NULL; ip_rsvp_vif = NULL; legal_vif_num = NULL; mrt_ioctl = NULL; rsvp_input_p = NULL; VIF_LOCK_DESTROY(); MFC_LOCK_DESTROY(); MROUTER_LOCK_DESTROY(); break; default: return EOPNOTSUPP; } return 0; } static moduledata_t ip_mroutemod = { "ip_mroute", ip_mroute_modevent, 0 }; DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); Index: projects/clang380-import/sys/netinet/ip_output.c =================================================================== --- projects/clang380-import/sys/netinet/ip_output.c (revision 293686) +++ projects/clang380-import/sys/netinet/ip_output.c (revision 293687) @@ -1,1391 +1,1392 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_mbuf_stress_test.h" #include "opt_mpath.h" #include "opt_route.h" #include "opt_sctp.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef SCTP #include #include #endif #ifdef IPSEC #include #include #endif /* IPSEC*/ #include #include #ifdef MBUF_STRESS_TEST static int mbuf_frag_size = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); #endif static void ip_mloopback(struct ifnet *, const struct mbuf *, int); extern int in_mcast_loop; extern struct protosw inetsw[]; static inline int ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error) { struct m_tag *fwd_tag = NULL; struct mbuf *m; struct in_addr odst; struct ip *ip; m = *mp; ip = mtod(m, struct ip *); /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, inp); m = *mp; if ((*error) != 0 || m == NULL) return 1; /* Finished */ ip = mtod(m, struct ip *); /* See if destination IP address was changed by packet filter. */ if (odst.s_addr != ip->ip_dst.s_addr) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip_input(). */ if (in_localip(ip->ip_dst)) { m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif *error = netisr_queue(NETISR_IP, m); return 1; /* Finished */ } bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; return -1; /* Reloop */ } /* See if fib was changed by packet filter. */ if ((*fibnum) != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; *fibnum = M_GETFIB(m); return -1; /* Reloop for FIB change */ } /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; *error = netisr_queue(NETISR_IP, m); return 1; /* Finished */ } /* Or forward to some other address? */ if ((m->m_flags & M_IP_NEXTHOP) && ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP_NEXTHOP; m_tag_delete(m, fwd_tag); return -1; /* Reloop for CHANGE of dst */ } return 0; } /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * In the IP forwarding case, the packet will arrive with options already * inserted, so must have a NULL opt pointer. */ int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp) { struct rm_priotracker in_ifa_tracker; struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); int mtu; int error = 0; struct sockaddr_in *dst; const struct sockaddr_in *gw; struct in_ifaddr *ia; int isbroadcast; uint16_t ip_len, ip_off; struct route iproute; struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; int have_ia_ref; #ifdef IPSEC int no_route_but_check_spd = 0; #endif M_ASSERTPKTHDR(m); if (inp != NULL) { INP_LOCK_ASSERT(inp); M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } if (ro == NULL) { ro = &iproute; bzero(ro, sizeof (*ro)); } #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET, m, ro); #endif if (opt) { int len = 0; m = ip_insertoptions(m, opt, &len); if (len != 0) hlen = len; /* ip->ip_hl is updated above */ } ip = mtod(m, struct ip *); ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_v = IPVERSION; ip->ip_hl = hlen >> 2; ip_fillid(ip); IPSTAT_INC(ips_localout); } else { /* Header already set, fetch hlen from there */ hlen = ip->ip_hl << 2; } /* * dst/gw handling: * * dst can be rewritten but always points to &ro->ro_dst. * gw is readonly but can point either to dst OR rt_gateway, * therefore we need restore gw if we're redoing lookup. */ gw = dst = (struct sockaddr_in *)&ro->ro_dst; fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); rte = ro->ro_rt; /* * The address family should also be checked in case of sharing * the cache with IPv6. */ if (rte == NULL || dst->sin_family != AF_INET) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } again: ia = NULL; have_ia_ref = 0; /* * If routing to interface only, short circuit routing lookup. * The use of an all-ones broadcast address implies this; an * interface is specified by the broadcast address of an interface, * or the destination address of a ptp interface. */ if (flags & IP_SENDONES) { if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst), M_GETFIB(m)))) == NULL && (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), M_GETFIB(m)))) == NULL) { IPSTAT_INC(ips_noroute); error = ENETUNREACH; goto bad; } have_ia_ref = 1; ip->ip_dst.s_addr = INADDR_BROADCAST; dst->sin_addr = ip->ip_dst; ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = 1; } else if (flags & IP_ROUTETOIF) { if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), M_GETFIB(m)))) == NULL && (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0, M_GETFIB(m)))) == NULL) { IPSTAT_INC(ips_noroute); error = ENETUNREACH; goto bad; } have_ia_ref = 1; ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = in_broadcast(dst->sin_addr, ifp); } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && imo != NULL && imo->imo_multicast_ifp != NULL) { /* * Bypass the normal routing lookup for multicast * packets if the interface is specified. */ ifp = imo->imo_multicast_ifp; IFP_TO_IA(ifp, ia, &in_ifa_tracker); if (ia) have_ia_ref = 1; isbroadcast = 0; /* fool gcc */ } else { /* * We want to do any cloning requested by the link layer, * as this is probably required in all cases for correct * operation (as it is for ARP). */ if (rte == NULL) { #ifdef RADIX_MPATH rtalloc_mpath_fib(ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), fibnum); #else in_rtalloc_ign(ro, 0, fibnum); #endif rte = ro->ro_rt; } if (rte == NULL || (rte->rt_flags & RTF_UP) == 0 || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { #ifdef IPSEC /* * There is no route for this packet, but it is * possible that a matching SPD entry exists. */ no_route_but_check_spd = 1; mtu = 0; /* Silence GCC warning. */ goto sendit; #endif IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; } ia = ifatoia(rte->rt_ifa); ifp = rte->rt_ifp; counter_u64_add(rte->rt_pksent, 1); + rt_update_ro_flags(ro); if (rte->rt_flags & RTF_GATEWAY) gw = (struct sockaddr_in *)rte->rt_gateway; if (rte->rt_flags & RTF_HOST) isbroadcast = (rte->rt_flags & RTF_BROADCAST); else isbroadcast = in_broadcast(gw->sin_addr, ifp); } /* * Calculate MTU. If we have a route that is up, use that, * otherwise use the interface's MTU. */ if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) mtu = rte->rt_mtu; else mtu = ifp->if_mtu; /* Catch a possible divide by zero later. */ KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { m->m_flags |= M_MCAST; /* * IP destination address is multicast. Make sure "gw" * still points to the address in "ro". (It may have been * changed to point to a gateway address, above.) */ gw = dst; /* * See if the caller provided any multicast options */ if (imo != NULL) { ip->ip_ttl = imo->imo_multicast_ttl; if (imo->imo_multicast_vif != -1) ip->ip_src.s_addr = ip_mcast_src ? ip_mcast_src(imo->imo_multicast_vif) : INADDR_ANY; } else ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; /* * Confirm that the outgoing interface supports multicast. */ if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { if ((ifp->if_flags & IFF_MULTICAST) == 0) { IPSTAT_INC(ips_noroute); error = ENETUNREACH; goto bad; } } /* * If source address not specified yet, use address * of outgoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) { /* Interface may have no addresses. */ if (ia != NULL) ip->ip_src = IA_SIN(ia)->sin_addr; } if ((imo == NULL && in_mcast_loop) || (imo && imo->imo_multicast_loop)) { /* * Loop back multicast datagram if not expressly * forbidden to do so, even if we are not a member * of the group; ip_input() will filter it later, * thus deferring a hash lookup and mutex acquisition * at the expense of a cheap copy using m_copym(). */ ip_mloopback(ifp, m, hlen); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IP_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip_mloopback(), * above, will be forwarded by the ip_input() routine, * if necessary. */ if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { /* * If rsvp daemon is not running, do not * set ip_moptions. This ensures that the packet * is multicast and not just sent down one link * as prescribed by rsvpd. */ if (!V_rsvp_on) imo = NULL; if (ip_mforward && ip_mforward(ip, ifp, m, imo) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a time-to-live of zero may be looped- * back, above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip_mloopback() will * loop back a copy. ip_input() will drop the copy if * this host does not belong to the destination group on * the loopback interface. */ if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { m_freem(m); goto done; } goto sendit; } /* * If the source address is not specified yet, use the address * of the outoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) { /* Interface may have no addresses. */ if (ia != NULL) { ip->ip_src = IA_SIN(ia)->sin_addr; } } /* * Look for broadcast address and * verify user is allowed to send * such a packet. */ if (isbroadcast) { if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EADDRNOTAVAIL; goto bad; } if ((flags & IP_ALLOWBROADCAST) == 0) { error = EACCES; goto bad; } /* don't allow broadcast messages to be fragmented */ if (ip_len > mtu) { error = EMSGSIZE; goto bad; } m->m_flags |= M_BCAST; } else { m->m_flags &= ~M_BCAST; } sendit: #ifdef IPSEC switch(ip_ipsec_output(&m, inp, &error)) { case 1: goto bad; case -1: goto done; case 0: default: break; /* Continue with packet processing. */ } /* * Check if there was a route for this packet; return error if not. */ if (no_route_but_check_spd) { IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; } /* Update variables that are affected by ipsec4_output(). */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; #endif /* IPSEC */ /* Jump over all PFIL processing if hooks are not active. */ if (PFIL_HOOKED(&V_inet_pfil_hook)) { switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { case 1: /* Finished */ goto done; case 0: /* Continue normally */ ip = mtod(m, struct ip *); break; case -1: /* Need to try again */ /* Reset everything for a new round */ RO_RTFREE(ro); if (have_ia_ref) ifa_free(&ia->ia_ifa); ro->ro_prepend = NULL; rte = NULL; gw = dst; ip = mtod(m, struct ip *); goto again; } } /* 127/8 must not appear on wire - RFC1122. */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((ifp->if_flags & IFF_LOOPBACK) == 0) { IPSTAT_INC(ips_badaddr); error = EADDRNOTAVAIL; goto bad; } } m->m_pkthdr.csum_flags |= CSUM_IP; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); m->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif /* * If small enough for interface, or the interface will take * care of the fragmentation for us, we can just send directly. */ if (ip_len <= mtu || (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { ip->ip_sum = 0; if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { ip->ip_sum = in_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_IP; } /* * Record statistics for this interface address. * With CSUM_TSO the byte/packet count will be slightly * incorrect because we count the IP+TCP headers only * once instead of for every generated packet. */ if (!(flags & IP_FORWARDING) && ia) { if (m->m_pkthdr.csum_flags & CSUM_TSO) counter_u64_add(ia->ia_ifa.ifa_opackets, m->m_pkthdr.len / m->m_pkthdr.tso_segsz); else counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } #ifdef MBUF_STRESS_TEST if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) m = m_fragment(m, M_NOWAIT, mbuf_frag_size); #endif /* * Reset layer specific mbuf flags * to avoid confusing lower layers. */ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); goto done; } /* Balk when DF bit is set or the interface didn't support TSO. */ if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { error = EMSGSIZE; IPSTAT_INC(ips_cantfrag); goto bad; } /* * Too large for interface; fragment if possible. If successful, * on return, m will point to a list of packets to be sent. */ error = ip_fragment(ip, &m, mtu, ifp->if_hwassist); if (error) goto bad; for (; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia != NULL) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } /* * Reset layer specific mbuf flags * to avoid confusing upper layers. */ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); } else m_freem(m); } if (error == 0) IPSTAT_INC(ips_fragmented); done: if (ro == &iproute) RO_RTFREE(ro); else if (rte == NULL) /* * If the caller supplied a route but somehow the reference * to it has been released need to prevent the caller * calling RTFREE on it again. */ ro->ro_rt = NULL; if (have_ia_ref) ifa_free(&ia->ia_ifa); return (error); bad: m_freem(m); goto done; } /* * Create a chain of fragments which fit the given mtu. m_frag points to the * mbuf to be fragmented; on return it points to the chain with the fragments. * Return 0 if no error. If error, m_frag may contain a partially built * chain of fragments that should be freed by the caller. * * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) */ int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags) { int error = 0; int hlen = ip->ip_hl << 2; int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ int off; struct mbuf *m0 = *m_frag; /* the original packet */ int firstlen; struct mbuf **mnext; int nfrags; uint16_t ip_len, ip_off; ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); if (ip_off & IP_DF) { /* Fragmentation not allowed */ IPSTAT_INC(ips_cantfrag); return EMSGSIZE; } /* * Must be able to put at least 8 bytes per fragment. */ if (len < 8) return EMSGSIZE; /* * If the interface will not calculate checksums on * fragmented packets, then do it here. */ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m0); m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #ifdef SCTP if (m0->m_pkthdr.csum_flags & CSUM_SCTP) { sctp_delayed_cksum(m0, hlen); m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif if (len > PAGE_SIZE) { /* * Fragment large datagrams such that each segment * contains a multiple of PAGE_SIZE amount of data, * plus headers. This enables a receiver to perform * page-flipping zero-copy optimizations. * * XXX When does this help given that sender and receiver * could have different page sizes, and also mtu could * be less than the receiver's page size ? */ int newlen; off = MIN(mtu, m0->m_pkthdr.len); /* * firstlen (off - hlen) must be aligned on an * 8-byte boundary */ if (off < hlen) goto smart_frag_failure; off = ((off - hlen) & ~7) + hlen; newlen = (~PAGE_MASK) & mtu; if ((newlen + sizeof (struct ip)) > mtu) { /* we failed, go back the default */ smart_frag_failure: newlen = len; off = hlen + len; } len = newlen; } else { off = hlen + len; } firstlen = off - hlen; mnext = &m0->m_nextpkt; /* pointer to next packet */ /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. * Here, m0 is the original packet, m is the fragment being created. * The fragments are linked off the m_nextpkt of the original * packet, which after processing serves as the first fragment. */ for (nfrags = 1; off < ip_len; off += len, nfrags++) { struct ip *mhip; /* ip header on the fragment */ struct mbuf *m; int mhlen = sizeof (struct ip); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; IPSTAT_INC(ips_odropped); goto done; } /* * Make sure the complete packet header gets copied * from the originating mbuf to the newly created * mbuf. This also ensures that existing firewall * classification(s), VLAN tags and so on get copied * to the resulting fragmented packet(s): */ if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { m_free(m); error = ENOBUFS; IPSTAT_INC(ips_odropped); goto done; } /* * In the first mbuf, leave room for the link header, then * copy the original IP header including options. The payload * goes into an additional mbuf chain returned by m_copym(). */ m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); *mhip = *ip; if (hlen > sizeof (struct ip)) { mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); mhip->ip_v = IPVERSION; mhip->ip_hl = mhlen >> 2; } m->m_len = mhlen; /* XXX do we need to add ip_off below ? */ mhip->ip_off = ((off - hlen) >> 3) + ip_off; if (off + len >= ip_len) len = ip_len - off; else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); m->m_next = m_copym(m0, off, len, M_NOWAIT); if (m->m_next == NULL) { /* copy failed */ m_free(m); error = ENOBUFS; /* ??? */ IPSTAT_INC(ips_odropped); goto done; } m->m_pkthdr.len = mhlen + len; #ifdef MAC mac_netinet_fragment(m0, m); #endif mhip->ip_off = htons(mhip->ip_off); mhip->ip_sum = 0; if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { mhip->ip_sum = in_cksum(m, mhlen); m->m_pkthdr.csum_flags &= ~CSUM_IP; } *mnext = m; mnext = &m->m_nextpkt; } IPSTAT_ADD(ips_ofragments, nfrags); /* * Update first fragment by trimming what's been copied out * and updating header. */ m_adj(m0, hlen + firstlen - ip_len); m0->m_pkthdr.len = hlen + firstlen; ip->ip_len = htons((u_short)m0->m_pkthdr.len); ip->ip_off = htons(ip_off | IP_MF); ip->ip_sum = 0; if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { ip->ip_sum = in_cksum(m0, hlen); m0->m_pkthdr.csum_flags &= ~CSUM_IP; } done: *m_frag = m0; return error; } void in_delayed_cksum(struct mbuf *m) { struct ip *ip; uint16_t csum, offset, ip_len; ip = mtod(m, struct ip *); offset = ip->ip_hl << 2 ; ip_len = ntohs(ip->ip_len); csum = in_cksum_skip(m, ip_len, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ /* find the mbuf in the chain where the checksum starts*/ while ((m != NULL) && (offset >= m->m_len)) { offset -= m->m_len; m = m->m_next; } KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain.")); KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs.")); *(u_short *)(m->m_data + offset) = csum; } /* * IP socket option processing. */ int ip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; #ifdef RSS uint32_t rss_bucket; int retval; #endif error = optval = 0; if (sopt->sopt_level != IPPROTO_IP) { error = EINVAL; if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: INP_WLOCK(inp); if ((so->so_options & SO_REUSEADDR) != 0) inp->inp_flags2 |= INP_REUSEADDR; else inp->inp_flags2 &= ~INP_REUSEADDR; INP_WUNLOCK(inp); error = 0; break; case SO_REUSEPORT: INP_WLOCK(inp); if ((so->so_options & SO_REUSEPORT) != 0) inp->inp_flags2 |= INP_REUSEPORT; else inp->inp_flags2 &= ~INP_REUSEPORT; INP_WUNLOCK(inp); error = 0; break; case SO_SETFIB: INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(inp); error = 0; break; default: break; } } return (error); } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { case IP_OPTIONS: #ifdef notyet case IP_RETOPTS: #endif { struct mbuf *m; if (sopt->sopt_valsize > MLEN) { error = EMSGSIZE; break; } m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; break; } m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); if (error) { m_free(m); break; } INP_WLOCK(inp); error = ip_pcbopts(inp, sopt->sopt_name, m); INP_WUNLOCK(inp); return (error); } case IP_BINDANY: if (sopt->sopt_td != NULL) { error = priv_check(sopt->sopt_td, PRIV_NETINET_BINDANY); if (error) break; } /* FALLTHROUGH */ case IP_BINDMULTI: #ifdef RSS case IP_RSS_LISTEN_BUCKET: #endif case IP_TOS: case IP_TTL: case IP_MINTTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: case IP_ONESBCAST: case IP_DONTFRAG: case IP_RECVTOS: case IP_RECVFLOWID: #ifdef RSS case IP_RECVRSSBUCKETID: #endif error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (sopt->sopt_name) { case IP_TOS: inp->inp_ip_tos = optval; break; case IP_TTL: inp->inp_ip_ttl = optval; break; case IP_MINTTL: if (optval >= 0 && optval <= MAXTTL) inp->inp_ip_minttl = optval; else error = EINVAL; break; #define OPTSET(bit) do { \ INP_WLOCK(inp); \ if (optval) \ inp->inp_flags |= bit; \ else \ inp->inp_flags &= ~bit; \ INP_WUNLOCK(inp); \ } while (0) #define OPTSET2(bit, val) do { \ INP_WLOCK(inp); \ if (val) \ inp->inp_flags2 |= bit; \ else \ inp->inp_flags2 &= ~bit; \ INP_WUNLOCK(inp); \ } while (0) case IP_RECVOPTS: OPTSET(INP_RECVOPTS); break; case IP_RECVRETOPTS: OPTSET(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: OPTSET(INP_RECVDSTADDR); break; case IP_RECVTTL: OPTSET(INP_RECVTTL); break; case IP_RECVIF: OPTSET(INP_RECVIF); break; case IP_ONESBCAST: OPTSET(INP_ONESBCAST); break; case IP_DONTFRAG: OPTSET(INP_DONTFRAG); break; case IP_BINDANY: OPTSET(INP_BINDANY); break; case IP_RECVTOS: OPTSET(INP_RECVTOS); break; case IP_BINDMULTI: OPTSET2(INP_BINDMULTI, optval); break; case IP_RECVFLOWID: OPTSET2(INP_RECVFLOWID, optval); break; #ifdef RSS case IP_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { inp->inp_rss_listen_bucket = optval; OPTSET2(INP_RSS_BUCKET_SET, 1); } else { error = EINVAL; } break; case IP_RECVRSSBUCKETID: OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif } break; #undef OPTSET #undef OPTSET2 /* * Multicast socket options are processed by the in_mcast * module. */ case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: case IP_ADD_SOURCE_MEMBERSHIP: case IP_DROP_SOURCE_MEMBERSHIP: case IP_BLOCK_SOURCE: case IP_UNBLOCK_SOURCE: case IP_MSFILTER: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: error = inp_setmoptions(inp, sopt); break; case IP_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; INP_WLOCK(inp); switch (optval) { case IP_PORTRANGE_DEFAULT: inp->inp_flags &= ~(INP_LOWPORT); inp->inp_flags &= ~(INP_HIGHPORT); break; case IP_PORTRANGE_HIGH: inp->inp_flags &= ~(INP_LOWPORT); inp->inp_flags |= INP_HIGHPORT; break; case IP_PORTRANGE_LOW: inp->inp_flags &= ~(INP_HIGHPORT); inp->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } INP_WUNLOCK(inp); break; #ifdef IPSEC case IP_IPSEC_POLICY: { caddr_t req; struct mbuf *m; if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; req = mtod(m, caddr_t); error = ipsec_set_policy(inp, sopt->sopt_name, req, m->m_len, (sopt->sopt_td != NULL) ? sopt->sopt_td->td_ucred : NULL); m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { case IP_OPTIONS: case IP_RETOPTS: if (inp->inp_options) error = sooptcopyout(sopt, mtod(inp->inp_options, char *), inp->inp_options->m_len); else sopt->sopt_valsize = 0; break; case IP_TOS: case IP_TTL: case IP_MINTTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: case IP_PORTRANGE: case IP_ONESBCAST: case IP_DONTFRAG: case IP_BINDANY: case IP_RECVTOS: case IP_BINDMULTI: case IP_FLOWID: case IP_FLOWTYPE: case IP_RECVFLOWID: #ifdef RSS case IP_RSSBUCKETID: case IP_RECVRSSBUCKETID: #endif switch (sopt->sopt_name) { case IP_TOS: optval = inp->inp_ip_tos; break; case IP_TTL: optval = inp->inp_ip_ttl; break; case IP_MINTTL: optval = inp->inp_ip_minttl; break; #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) #define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0) case IP_RECVOPTS: optval = OPTBIT(INP_RECVOPTS); break; case IP_RECVRETOPTS: optval = OPTBIT(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: optval = OPTBIT(INP_RECVDSTADDR); break; case IP_RECVTTL: optval = OPTBIT(INP_RECVTTL); break; case IP_RECVIF: optval = OPTBIT(INP_RECVIF); break; case IP_PORTRANGE: if (inp->inp_flags & INP_HIGHPORT) optval = IP_PORTRANGE_HIGH; else if (inp->inp_flags & INP_LOWPORT) optval = IP_PORTRANGE_LOW; else optval = 0; break; case IP_ONESBCAST: optval = OPTBIT(INP_ONESBCAST); break; case IP_DONTFRAG: optval = OPTBIT(INP_DONTFRAG); break; case IP_BINDANY: optval = OPTBIT(INP_BINDANY); break; case IP_RECVTOS: optval = OPTBIT(INP_RECVTOS); break; case IP_FLOWID: optval = inp->inp_flowid; break; case IP_FLOWTYPE: optval = inp->inp_flowtype; break; case IP_RECVFLOWID: optval = OPTBIT2(INP_RECVFLOWID); break; #ifdef RSS case IP_RSSBUCKETID: retval = rss_hash2bucket(inp->inp_flowid, inp->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; else error = EINVAL; break; case IP_RECVRSSBUCKETID: optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif case IP_BINDMULTI: optval = OPTBIT2(INP_BINDMULTI); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; /* * Multicast socket options are processed by the in_mcast * module. */ case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_MSFILTER: error = inp_getmoptions(inp, sopt); break; #ifdef IPSEC case IP_IPSEC_POLICY: { struct mbuf *m = NULL; caddr_t req = NULL; size_t len = 0; if (m != 0) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec_get_policy(sotoinpcb(so), req, len, &m); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0) m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Routine called from ip_output() to loop back a copy of an IP multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be a loopback interface -- evil, but easier than * replicating that code here. */ static void ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen) { struct ip *ip; struct mbuf *copym; /* * Make a deep copy of the packet because we're going to * modify the pack in order to generate checksums. */ copym = m_dup(m, M_NOWAIT); if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen)) copym = m_pullup(copym, hlen); if (copym != NULL) { /* If needed, compute the checksum and mark it as valid. */ if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(copym); copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } /* * We don't bother to fragment if the IP length is greater * than the interface's MTU. Can this possibly matter? */ ip = mtod(copym, struct ip *); ip->ip_sum = 0; ip->ip_sum = in_cksum(copym, hlen); if_simloop(ifp, copym, AF_INET, 0); } } Index: projects/clang380-import/sys/netinet/raw_ip.c =================================================================== --- projects/clang380-import/sys/netinet/raw_ip.c (revision 293686) +++ projects/clang380-import/sys/netinet/raw_ip.c (revision 293687) @@ -1,1131 +1,1132 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #endif /*IPSEC*/ #include #include VNET_DEFINE(int, ip_defttl) = IPDEFTTL; SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_defttl), 0, "Maximum TTL on IP packets"); VNET_DEFINE(struct inpcbhead, ripcb); VNET_DEFINE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) /* * Control and data hooks for ipfw, dummynet, divert and so on. * The data hooks are not used here but it is convenient * to keep them all in one place. */ VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL; VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL; int (*ip_dn_ctl_ptr)(struct sockopt *); int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *); void (*ip_divert_ptr)(struct mbuf *, int); int (*ng_ipfw_input_p)(struct mbuf **, int, struct ip_fw_args *, int); #ifdef INET /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip_mrouter); /* * The various mrouter and rsvp functions. */ int (*ip_mrouter_set)(struct socket *, struct sockopt *); int (*ip_mrouter_get)(struct socket *, struct sockopt *); int (*ip_mrouter_done)(void); int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); int (*mrt_ioctl)(u_long, caddr_t, int); int (*legal_vif_num)(int); u_long (*ip_mcast_src)(int); int (*rsvp_input_p)(struct mbuf **, int *, int); int (*ip_rsvp_vif)(struct socket *, struct sockopt *); void (*ip_rsvp_force_done)(struct socket *); #endif /* INET */ u_long rip_sendspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); u_long rip_recvspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); /* * Hash functions */ #define INP_PCBHASH_RAW_SIZE 256 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \ (((proto) + (laddr) + (faddr)) % (mask) + 1) #ifdef INET static void rip_inshash(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *pcbhash; int hash; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); if (inp->inp_ip_p != 0 && inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_faddr.s_addr != INADDR_ANY) { hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr, inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask); } else hash = 0; pcbhash = &pcbinfo->ipi_hashbase[hash]; LIST_INSERT_HEAD(pcbhash, inp, inp_hash); } static void rip_delhash(struct inpcb *inp) { INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); LIST_REMOVE(inp, inp_hash); } #endif /* INET */ /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ static void rip_zone_change(void *tag) { uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); } static int rip_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "rawinp"); return (0); } void rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE, IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } #ifdef VIMAGE void rip_destroy(void) { in_pcbinfo_destroy(&V_ripcbinfo); } #endif #ifdef INET static int rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, struct sockaddr_in *ripsrc) { int policyfail = 0; INP_LOCK_ASSERT(last); #ifdef IPSEC /* check AH/ESP integrity. */ if (ipsec4_in_reject(n, last)) { policyfail = 1; } #endif /* IPSEC */ #ifdef MAC if (!policyfail && mac_inpcb_check_deliver(last, n) != 0) policyfail = 1; #endif /* Check the minimum TTL for socket. */ if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl) policyfail = 1; if (!policyfail) { struct mbuf *opts = NULL; struct socket *so; so = last->inp_socket; if ((last->inp_flags & INP_CONTROLOPTS) || (so->so_options & (SO_TIMESTAMP | SO_BINTIME))) ip_savecontrol(last, &opts, ip, n); SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)ripsrc, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&so->so_rcv); } else sorwakeup_locked(so); } else m_freem(n); return (policyfail); } /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last; struct sockaddr_in ripsrc; int hash; *mp = NULL; bzero(&ripsrc, sizeof(ripsrc)); ripsrc.sin_len = sizeof(ripsrc); ripsrc.sin_family = AF_INET; ripsrc.sin_addr = ip->ip_src; last = NULL; ifp = m->m_pkthdr.rcvif; hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { if (inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (jailed_without_vnet(inp->inp_cred)) { /* * XXX: If faddr was bound to multicast group, * jailed raw socket will drop datagram. */ if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) continue; } if (last != NULL) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); } INP_RLOCK(inp); last = inp; } LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (!in_nullhost(inp->inp_laddr) && !in_hosteq(inp->inp_laddr, ip->ip_dst)) continue; if (!in_nullhost(inp->inp_faddr) && !in_hosteq(inp->inp_faddr, ip->ip_src)) continue; if (jailed_without_vnet(inp->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) continue; } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (inp->inp_moptions != NULL && IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { /* * If the incoming datagram is for IGMP, allow it * through unconditionally to the raw socket. * * In the case of IGMPv2, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. imo_multi_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if (proto != IPPROTO_IGMP) { struct sockaddr_in group; bzero(&group, sizeof(struct sockaddr_in)); group.sin_len = sizeof(struct sockaddr_in); group.sin_family = AF_INET; group.sin_addr = ip->ip_dst; blocked = imo_multi_filter(inp->inp_moptions, ifp, (struct sockaddr *)&group, (struct sockaddr *)&ripsrc); } if (blocked != MCAST_PASS) { IPSTAT_INC(ips_notmember); continue; } } if (last != NULL) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); } INP_RLOCK(inp); last = inp; } INP_INFO_RUNLOCK(&V_ripcbinfo); if (last != NULL) { if (rip_append(last, ip, m, &ripsrc) != 0) IPSTAT_INC(ips_delivered); INP_RUNLOCK(last); } else { m_freem(m); IPSTAT_INC(ips_noproto); IPSTAT_DEC(ips_delivered); } return (IPPROTO_DONE); } /* * Generate IP header and pass packet to ip_output. Tack on options user may * have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, ...) { struct ip *ip; int error; struct inpcb *inp = sotoinpcb(so); va_list ap; u_long dst; int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST; va_start(ap, so); dst = va_arg(ap, u_long); va_end(ap); /* * If the user handed us a complete IP packet, use it. Otherwise, * allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_NOWAIT); if (m == NULL) return(ENOBUFS); INP_RLOCK(inp); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip_tos; if (inp->inp_flags & INP_DONTFRAG) ip->ip_off = htons(IP_DF); else ip->ip_off = htons(0); ip->ip_p = inp->inp_ip_p; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would * let a source of INADDR_ANY pass, which we do not * want to see from jails. */ if (ip->ip_src.s_addr == INADDR_ANY) { error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src, inp->inp_cred); } else { error = prison_local_ip4(inp->inp_cred, &ip->ip_src); } if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } } ip->ip_ttl = inp->inp_ip_ttl; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } INP_RLOCK(inp); ip = mtod(m, struct ip *); error = prison_check_ip4(inp->inp_cred, &ip->ip_src); if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } /* * Don't allow both user specified and setsockopt options, * and don't allow packet length sizes that will crash. */ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || (ntohs(ip->ip_len) > m->m_pkthdr.len) || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } /* * This doesn't allow application to specify ID of zero, * but we got this limitation from the beginning of history. */ if (ip->ip_id == 0) ip_fillid(ip); /* * XXX prevent ip_output from overwriting header fields. */ flags |= IP_RAWOUTPUT; IPSTAT_INC(ips_rawout); } if (inp->inp_flags & INP_ONESBCAST) flags |= IP_SENDONES; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif error = ip_output(m, inp->inp_options, NULL, flags, inp->inp_moptions, inp); INP_RUNLOCK(inp); return (error); } /* * Raw IP socket option processing. * * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could * only be created by a privileged process, and as such, socket option * operations to manage system properties on any raw socket were allowed to * take place without explicit additional access control checks. However, * raw sockets can now also be created in jail(), and therefore explicit * checks are now required. Likewise, raw sockets can be used by a process * after it gives up privilege, so some caution is required. For options * passed down to the IP layer via ip_ctloutput(), checks are assumed to be * performed in ip_ctloutput() and therefore no check occurs here. * Unilaterally checking priv_check() here breaks normal IP socket option * operations on raw sockets. * * When adding new socket options here, make sure to add access control * checks here as necessary. * * XXX-BZ inp locking? */ int rip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; if (sopt->sopt_level != IPPROTO_IP) { if ((sopt->sopt_level == SOL_SOCKET) && (sopt->sopt_name == SO_SETFIB)) { inp->inp_inc.inc_fibnum = so->so_fibnum; return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case IP_HDRINCL: optval = inp->inp_flags & INP_HDRINCL; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: /* ADD actually returns the body... */ case IP_FW_GET: case IP_FW_TABLE_GETSIZE: case IP_FW_TABLE_LIST: case IP_FW_NAT_GET_CONFIG: case IP_FW_NAT_GET_LOG: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT; break ; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case IP_HDRINCL: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: case IP_FW_TABLE_FLUSH: case IP_FW_NAT_CFG: case IP_FW_NAT_DEL: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT ; break ; case IP_RSVP_ON: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_init(so); break; case IP_RSVP_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_done(); break; case IP_RSVP_VIF_ON: case IP_RSVP_VIF_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_vif ? ip_rsvp_vif(so, sopt) : EINVAL; break; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; } return (error); } /* * This function exists solely to receive the PRC_IFDOWN messages which are * sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, and calls * in_ifadown() to remove all routes corresponding to that address. It also * receives the PRC_IFUP messages from if_up() and reinstalls the interface * routes. */ void rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct ifnet *ifp; int err; int flags; switch (cmd) { case PRC_IFDOWN: IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, 0); /* * in_ifadown gets rid of all the rest of the * routes. This is not quite the right thing * to do, but at least if we are running a * routing process they will come back. */ in_ifadown(&ia->ia_ifa, 0); ifa_free(&ia->ia_ifa); break; } } if (ia == NULL) /* If ia matched, already unlocked. */ IN_IFADDR_RUNLOCK(&in_ifa_tracker); break; case PRC_IFUP: IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return; } ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; if ((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; err = ifa_del_loopback_route((struct ifaddr *)ia, sa); err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; err = ifa_add_loopback_route((struct ifaddr *)ia, sa); ifa_free(&ia->ia_ifa); break; } } static int rip_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); if (proto >= IPPROTO_MAX || proto < 0) return EPROTONOSUPPORT; error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; inp->inp_ip_ttl = V_ip_defttl; rip_inshash(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); INP_WUNLOCK(inp); return (0); } static void rip_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_detach: inp == NULL")); KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, ("rip_detach: not closed")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); if (so == V_ip_mrouter && ip_mrouter_done) ip_mrouter_done(); if (ip_rsvp_force_done) ip_rsvp_force_done(so); if (so == V_ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } static void rip_dodisconnect(struct socket *so, struct inpcb *inp) { struct inpcbinfo *pcbinfo; pcbinfo = inp->inp_pcbinfo; INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr.s_addr = INADDR_ANY; rip_inshash(inp); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); } static void rip_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_abort: inp == NULL")); rip_dodisconnect(so, inp); } static void rip_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_close: inp == NULL")); rip_dodisconnect(so, inp); } static int rip_disconnect(struct socket *so) { struct inpcb *inp; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); rip_dodisconnect(so, inp); return (0); } static int rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; int error; if (nam->sa_len != sizeof(*addr)) return (EINVAL); error = prison_check_ip4(td->td_ucred, &addr->sin_addr); if (error != 0) return (error); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_bind: inp == NULL")); if (TAILQ_EMPTY(&V_ifnet) || (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || (addr->sin_addr.s_addr && (inp->inp_flags & INP_BINDANY) == 0 && ifa_ifwithaddr_check((struct sockaddr *)addr) == 0)) return (EADDRNOTAVAIL); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_laddr = addr->sin_addr; rip_inshash(inp); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (TAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) return (EAFNOSUPPORT); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_connect: inp == NULL")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr = addr->sin_addr; rip_inshash(inp); soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; u_long dst; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_send: inp == NULL")); /* * Note: 'dst' reads below are unlocked. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } dst = inp->inp_faddr.s_addr; /* Unlocked read. */ } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; } return (rip_output(m, so, dst)); } #endif /* INET */ static int rip_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = V_ripcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } if (req->newptr != 0) return (EPERM); /* * OK, now we're committed to doing something. */ INP_INFO_RLOCK(&V_ripcbinfo); gencnt = V_ripcbinfo.ipi_gencnt; n = V_ripcbinfo.ipi_count; INP_INFO_RUNLOCK(&V_ripcbinfo); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return (ENOMEM); INP_INFO_RLOCK(&V_ripcbinfo); for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { in_pcbref(inp); inp_list[i++] = inp; } INP_WUNLOCK(inp); } INP_INFO_RUNLOCK(&V_ripcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; bzero(&xi, sizeof(xi)); xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else INP_RUNLOCK(inp); } INP_INFO_WLOCK(&V_ripcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_ripcbinfo); if (!error) { /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ INP_INFO_RLOCK(&V_ripcbinfo); xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_ripcbinfo.ipi_count; INP_INFO_RUNLOCK(&V_ripcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); #ifdef INET struct pr_usrreqs rip_usrreqs = { .pru_abort = rip_abort, .pru_attach = rip_attach, .pru_bind = rip_bind, .pru_connect = rip_connect, .pru_control = in_control, .pru_detach = rip_detach, .pru_disconnect = rip_disconnect, .pru_peeraddr = in_getpeeraddr, .pru_send = rip_send, .pru_shutdown = rip_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = rip_close, }; #endif /* INET */ Index: projects/clang380-import/sys/netinet/tcp_reass.c =================================================================== --- projects/clang380-import/sys/netinet/tcp_reass.c (revision 293686) +++ projects/clang380-import/sys/netinet/tcp_reass.c (revision 293687) @@ -1,332 +1,333 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_tcpdebug.h" #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0, "TCP Segment Reassembly Queue"); static int tcp_reass_maxseg = 0; SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN, &tcp_reass_maxseg, 0, "Global maximum number of TCP Segments in Reassembly Queue"); static uma_zone_t tcp_reass_zone; SYSCTL_UMA_CUR(_net_inet_tcp_reass, OID_AUTO, cursegments, 0, &tcp_reass_zone, "Global number of TCP Segments currently in Reassembly Queue"); /* Initialize TCP reassembly queue */ static void tcp_reass_zone_change(void *tag) { /* Set the zone limit and read back the effective value. */ tcp_reass_maxseg = nmbclusters / 16; tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg); } void tcp_reass_global_init(void) { tcp_reass_maxseg = nmbclusters / 16; TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", &tcp_reass_maxseg); tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); /* Set the zone limit and read back the effective value. */ tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg); EVENTHANDLER_REGISTER(nmbclusters_change, tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY); } void tcp_reass_flush(struct tcpcb *tp) { struct tseg_qent *qe; INP_WLOCK_ASSERT(tp->t_inpcb); while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(qe, tqe_q); m_freem(qe->tqe_m); uma_zfree(tcp_reass_zone, qe); tp->t_segqlen--; } KASSERT((tp->t_segqlen == 0), ("TCP reass queue %p segment count is %d instead of 0 after flush.", tp, tp->t_segqlen)); } int tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) { struct tseg_qent *q; struct tseg_qent *p = NULL; struct tseg_qent *nq; struct tseg_qent *te = NULL; struct socket *so = tp->t_inpcb->inp_socket; char *s = NULL; int flags; struct tseg_qent tqs; INP_WLOCK_ASSERT(tp->t_inpcb); /* * XXX: tcp_reass() is rather inefficient with its data structures * and should be rewritten (see NetBSD for optimizations). */ /* * Call with th==NULL after become established to * force pre-ESTABLISHED data up to user socket. */ if (th == NULL) goto present; /* * Limit the number of segments that can be queued to reduce the * potential for mbuf exhaustion. For best performance, we want to be * able to queue a full window's worth of segments. The size of the * socket receive buffer determines our advertised window and grows * automatically when socket buffer autotuning is enabled. Use it as the * basis for our queue limit. * Always let the missing segment through which caused this queue. * NB: Access to the socket buffer is left intentionally unlocked as we * can tolerate stale information here. * * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat * should work but causes packets to be dropped when they shouldn't. * Investigate why and re-evaluate the below limit after the behaviour * is understood. */ if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) && tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) { TCPSTAT_INC(tcps_rcvreassfull); *tlenp = 0; if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: queue limit reached, " "segment dropped\n", s, __func__); free(s, M_TCPLOG); } m_freem(m); return (0); } /* * Allocate a new queue entry. If we can't, or hit the zone limit * just drop the pkt. * * Use a temporary structure on the stack for the missing segment * when the zone is exhausted. Otherwise we may get stuck. */ te = uma_zalloc(tcp_reass_zone, M_NOWAIT); if (te == NULL) { if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) { TCPSTAT_INC(tcps_rcvmemdrop); m_freem(m); *tlenp = 0; if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: global zone limit " "reached, segment dropped\n", s, __func__); free(s, M_TCPLOG); } return (0); } else { bzero(&tqs, sizeof(struct tseg_qent)); te = &tqs; if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: global zone limit reached, using " "stack for missing segment\n", s, __func__); free(s, M_TCPLOG); } } } tp->t_segqlen++; /* * Find a segment which begins after this one does. */ LIST_FOREACH(q, &tp->t_segq, tqe_q) { if (SEQ_GT(q->tqe_th->th_seq, th->th_seq)) break; p = q; } /* * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. */ if (p != NULL) { int i; /* conversion to int (in i) handles seq wraparound */ i = p->tqe_th->th_seq + p->tqe_len - th->th_seq; if (i > 0) { if (i >= *tlenp) { TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp); m_freem(m); if (te != &tqs) uma_zfree(tcp_reass_zone, te); tp->t_segqlen--; /* * Try to present any queued data * at the left window edge to the user. * This is needed after the 3-WHS * completes. */ goto present; /* ??? */ } m_adj(m, i); *tlenp -= i; th->th_seq += i; } } tp->t_rcvoopack++; TCPSTAT_INC(tcps_rcvoopack); TCPSTAT_ADD(tcps_rcvoobyte, *tlenp); /* * While we overlap succeeding segments trim them or, * if they are completely covered, dequeue them. */ while (q) { int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq; if (i <= 0) break; if (i < q->tqe_len) { q->tqe_th->th_seq += i; q->tqe_len -= i; m_adj(q->tqe_m, i); break; } nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); uma_zfree(tcp_reass_zone, q); tp->t_segqlen--; q = nq; } /* Insert the new segment queue entry into place. */ te->tqe_m = m; te->tqe_th = th; te->tqe_len = *tlenp; if (p == NULL) { LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q); } else { KASSERT(te != &tqs, ("%s: temporary stack based entry not " "first element in queue", __func__)); LIST_INSERT_AFTER(p, te, tqe_q); } present: /* * Present data to user, advancing rcv_nxt through * completed sequence space. */ if (!TCPS_HAVEESTABLISHED(tp->t_state)) return (0); q = LIST_FIRST(&tp->t_segq); if (!q || q->tqe_th->th_seq != tp->rcv_nxt) return (0); SOCKBUF_LOCK(&so->so_rcv); do { tp->rcv_nxt += q->tqe_len; flags = q->tqe_th->th_flags & TH_FIN; nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) m_freem(q->tqe_m); else sbappendstream_locked(&so->so_rcv, q->tqe_m, 0); if (q != &tqs) uma_zfree(tcp_reass_zone, q); tp->t_segqlen--; q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); sorwakeup_locked(so); return (flags); } Index: projects/clang380-import/sys/netinet/tcp_subr.c =================================================================== --- projects/clang380-import/sys/netinet/tcp_subr.c (revision 293686) +++ projects/clang380-import/sys/netinet/tcp_subr.c (revision 293687) @@ -1,2920 +1,2921 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include +#include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #endif #ifdef TCP_RFC7413 #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef TCPPCAP #include #endif #ifdef TCPDEBUG #include #endif #ifdef INET6 #include #endif #ifdef TCP_OFFLOAD #include #endif #ifdef IPSEC #include #include #ifdef INET6 #include #endif #include #include #endif /*IPSEC*/ #include #include #include VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS; #ifdef INET6 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif struct rwlock tcp_function_lock; static int sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0, &sysctl_net_inet_tcp_mss_check, "I", "Default TCP Maximum Segment Size"); #ifdef INET6 static int sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_v6mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_v6mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0, &sysctl_net_inet_tcp_mss_v6_check, "I", "Default TCP Maximum Segment Size for IPv6"); #endif /* INET6 */ /* * Minimum MSS we accept and use. This prevents DoS attacks where * we are forced to a ridiculous low MSS like 20 and send hundreds * of packets instead of one. The effect scales with the available * bandwidth and quickly saturates the CPU and network interface * with packet generation and sending. Set to zero to disable MINMSS * checking. This setting prevents us from sending too small packets. */ VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS; SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_minmss), 0, "Minimum TCP Maximum Segment Size"); VNET_DEFINE(int, tcp_do_rfc1323) = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc1323), 0, "Enable rfc1323 (high performance TCP) extensions"); static int tcp_log_debug = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW, &tcp_log_debug, 0, "Log errors caused by incoming TCP segments"); static int tcp_tcbhashsize; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); static int do_tcpdrain = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs"); static VNET_DEFINE(int, icmp_may_rst) = 1; #define V_icmp_may_rst VNET(icmp_may_rst) SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_may_rst), 0, "Certain ICMP unreachable messages may abort connections in SYN_SENT"); static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0; #define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_isn_reseed_interval), 0, "Seconds between reseeding of ISN secret"); static int tcp_soreceive_stream; SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN, &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets"); #ifdef TCP_SIGNATURE static int tcp_sig_checksigs = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW, &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic"); #endif VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); static struct inpcb *tcp_notify(struct inpcb *, int); static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int); static void tcp_mtudisc(struct inpcb *, int); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); static void tcp_timer_discard(struct tcpcb *, uint32_t); static struct tcp_function_block tcp_def_funcblk = { "default", tcp_output, tcp_do_segment, tcp_default_ctloutput, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0 }; struct tcp_funchead t_functions; static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk; static struct tcp_function_block * find_tcp_functions_locked(struct tcp_function_set *fs) { struct tcp_function *f; struct tcp_function_block *blk=NULL; TAILQ_FOREACH(f, &t_functions, tf_next) { if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) { blk = f->tf_fb; break; } } return(blk); } static struct tcp_function_block * find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s) { struct tcp_function_block *rblk=NULL; struct tcp_function *f; TAILQ_FOREACH(f, &t_functions, tf_next) { if (f->tf_fb == blk) { rblk = blk; if (s) { *s = f; } break; } } return (rblk); } struct tcp_function_block * find_and_ref_tcp_functions(struct tcp_function_set *fs) { struct tcp_function_block *blk; rw_rlock(&tcp_function_lock); blk = find_tcp_functions_locked(fs); if (blk) refcount_acquire(&blk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(blk); } struct tcp_function_block * find_and_ref_tcp_fb(struct tcp_function_block *blk) { struct tcp_function_block *rblk; rw_rlock(&tcp_function_lock); rblk = find_tcp_fb_locked(blk, NULL); if (rblk) refcount_acquire(&rblk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(rblk); } static int sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) { int error=ENOENT; struct tcp_function_set fs; struct tcp_function_block *blk; memset(&fs, 0, sizeof(fs)); rw_rlock(&tcp_function_lock); blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL); if (blk) { /* Found him */ strcpy(fs.function_set_name, blk->tfb_tcp_block_name); fs.pcbcnt = blk->tfb_refcnt; } rw_runlock(&tcp_function_lock); error = sysctl_handle_string(oidp, fs.function_set_name, sizeof(fs.function_set_name), req); /* Check for error or no change */ if (error != 0 || req->newptr == NULL) return(error); rw_wlock(&tcp_function_lock); blk = find_tcp_functions_locked(&fs); if ((blk == NULL) || (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { error = ENOENT; goto done; } tcp_func_set_ptr = blk; done: rw_wunlock(&tcp_function_lock); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_net_inet_default_tcp_functions, "A", "Set/get the default TCP functions"); static int sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS) { int error, cnt, linesz; struct tcp_function *f; char *buffer, *cp; size_t bufsz, outsz; cnt = 0; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { cnt++; } rw_runlock(&tcp_function_lock); bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1; buffer = malloc(bufsz, M_TEMP, M_WAITOK); error = 0; cp = buffer; linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count"); cp += linesz; bufsz -= linesz; outsz = linesz; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { linesz = snprintf(cp, bufsz, "%-32s%c %u\n", f->tf_fb->tfb_tcp_block_name, (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ', f->tf_fb->tfb_refcnt); if (linesz >= bufsz) { error = EOVERFLOW; break; } cp += linesz; bufsz -= linesz; outsz += linesz; } rw_runlock(&tcp_function_lock); if (error == 0) error = sysctl_handle_string(oidp, buffer, outsz + 1, req); free(buffer, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available, CTLTYPE_STRING|CTLFLAG_RD, NULL, 0, sysctl_net_inet_list_available, "A", "list available TCP Function sets"); /* * Target size of TCP PCB hash tables. Must be a power of two. * * Note that this can be overridden by the kernel environment * variable net.inet.tcp.tcbhashsize */ #ifndef TCBHASHSIZE #define TCBHASHSIZE 0 #endif /* * XXX * Callouts should be moved into struct tcp directly. They are currently * separate because the tcpcb structure is exported to userland for sysctl * parsing purposes, which do not know about callouts. */ struct tcpcb_mem { struct tcpcb tcb; struct tcp_timer tt; struct cc_var ccv; struct osd osd; }; static VNET_DEFINE(uma_zone_t, tcpcb_zone); #define V_tcpcb_zone VNET(tcpcb_zone) MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory"); static struct mtx isn_mtx; #define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF) #define ISN_LOCK() mtx_lock(&isn_mtx) #define ISN_UNLOCK() mtx_unlock(&isn_mtx) /* * TCP initialization. */ static void tcp_zone_change(void *tag) { uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets); uma_zone_set_max(V_tcpcb_zone, maxsockets); tcp_tw_zone_change(); } static int tcp_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "tcpinp"); return (0); } /* * Take a value and get the next power of 2 that doesn't overflow. * Used to size the tcp_inpcb hash buckets. */ static int maketcp_hashsize(int size) { int hashsize; /* * auto tune. * get the next power of 2 higher than maxsockets. */ hashsize = 1 << fls(size); /* catch overflow, and just go one power of 2 smaller */ if (hashsize < size) { hashsize = 1 << (fls(size) - 1); } return (hashsize); } int register_tcp_functions(struct tcp_function_block *blk, int wait) { struct tcp_function_block *lblk; struct tcp_function *n; struct tcp_function_set fs; if ((blk->tfb_tcp_output == NULL) || (blk->tfb_tcp_do_segment == NULL) || (blk->tfb_tcp_ctloutput == NULL) || (strlen(blk->tfb_tcp_block_name) == 0)) { /* * These functions are required and you * need a name. */ return (EINVAL); } if (blk->tfb_tcp_timer_stop_all || blk->tfb_tcp_timers_left || blk->tfb_tcp_timer_activate || blk->tfb_tcp_timer_active || blk->tfb_tcp_timer_stop) { /* * If you define one timer function you * must have them all. */ if ((blk->tfb_tcp_timer_stop_all == NULL) || (blk->tfb_tcp_timers_left == NULL) || (blk->tfb_tcp_timer_activate == NULL) || (blk->tfb_tcp_timer_active == NULL) || (blk->tfb_tcp_timer_stop == NULL)) { return (EINVAL); } } n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait); if (n == NULL) { return (ENOMEM); } n->tf_fb = blk; strcpy(fs.function_set_name, blk->tfb_tcp_block_name); rw_wlock(&tcp_function_lock); lblk = find_tcp_functions_locked(&fs); if (lblk) { /* Duplicate name space not allowed */ rw_wunlock(&tcp_function_lock); free(n, M_TCPFUNCTIONS); return (EALREADY); } refcount_init(&blk->tfb_refcnt, 0); blk->tfb_flags = 0; TAILQ_INSERT_TAIL(&t_functions, n, tf_next); rw_wunlock(&tcp_function_lock); return(0); } int deregister_tcp_functions(struct tcp_function_block *blk) { struct tcp_function_block *lblk; struct tcp_function *f; int error=ENOENT; if (strcmp(blk->tfb_tcp_block_name, "default") == 0) { /* You can't un-register the default */ return (EPERM); } rw_wlock(&tcp_function_lock); if (blk == tcp_func_set_ptr) { /* You can't free the current default */ rw_wunlock(&tcp_function_lock); return (EBUSY); } if (blk->tfb_refcnt) { /* Still tcb attached, mark it. */ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED; rw_wunlock(&tcp_function_lock); return (EBUSY); } lblk = find_tcp_fb_locked(blk, &f); if (lblk) { /* Found */ TAILQ_REMOVE(&t_functions, f, tf_next); f->tf_fb = NULL; free(f, M_TCPFUNCTIONS); error = 0; } rw_wunlock(&tcp_function_lock); return (error); } void tcp_init(void) { const char *tcbhash_tuneable; int hashsize; tcbhash_tuneable = "net.inet.tcp.tcbhashsize"; if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize); if (hashsize == 0) { /* * Auto tune the hash size based on maxsockets. * A perfect hash would have a 1:1 mapping * (hashsize = maxsockets) however it's been * suggested that O(2) average is better. */ hashsize = maketcp_hashsize(maxsockets / 4); /* * Our historical default is 512, * do not autotune lower than this. */ if (hashsize < 512) hashsize = 512; if (bootverbose && IS_DEFAULT_VNET(curvnet)) printf("%s: %s auto tuned to %d\n", __func__, tcbhash_tuneable, hashsize); } /* * We require a hashsize to be a power of two. * Previously if it was not a power of two we would just reset it * back to 512, which could be a nasty surprise if you did not notice * the error message. * Instead what we do is clip it to the closest power of two lower * than the specified hash value. */ if (!powerof2(hashsize)) { int oldhashsize = hashsize; hashsize = maketcp_hashsize(hashsize); /* prevent absurdly low value */ if (hashsize < 16) hashsize = 16; printf("%s: WARNING: TCB hash size not a power of 2, " "clipped from %d to %d.\n", __func__, oldhashsize, hashsize); } in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE, IPI_HASHFIELDS_4TUPLE); /* * These have to be type stable for the benefit of the timers. */ V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(V_tcpcb_zone, maxsockets); uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached"); tcp_tw_init(); syncache_init(); tcp_hc_init(); TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; tcp_reass_global_init(); /* XXX virtualize those bellow? */ tcp_delacktime = TCPTV_DELACK; tcp_keepinit = TCPTV_KEEP_INIT; tcp_keepidle = TCPTV_KEEP_IDLE; tcp_keepintvl = TCPTV_KEEPINTVL; tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; tcp_rexmit_min = TCPTV_MIN; if (tcp_rexmit_min < 1) tcp_rexmit_min = 1; tcp_rexmit_slop = TCPTV_CPU_VAR; tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; tcp_tcbhashsize = hashsize; /* Setup the tcp function block list */ TAILQ_INIT(&t_functions); rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0); register_tcp_functions(&tcp_def_funcblk, M_WAITOK); if (tcp_soreceive_stream) { #ifdef INET tcp_usrreqs.pru_soreceive = soreceive_stream; #endif #ifdef INET6 tcp6_usrreqs.pru_soreceive = soreceive_stream; #endif /* INET6 */ } #ifdef INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) #endif /* INET6 */ if (max_protohdr < TCP_MINPROTOHDR) max_protohdr = TCP_MINPROTOHDR; if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR ISN_LOCK_INIT(); EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, EVENTHANDLER_PRI_ANY); #ifdef TCPPCAP tcp_pcap_init(); #endif #ifdef TCP_RFC7413 tcp_fastopen_init(); #endif } #ifdef VIMAGE void tcp_destroy(void) { int error; #ifdef TCP_RFC7413 tcp_fastopen_destroy(); #endif tcp_hc_destroy(); syncache_destroy(); tcp_tw_destroy(); in_pcbinfo_destroy(&V_tcbinfo); uma_zdestroy(V_sack_hole_zone); uma_zdestroy(V_tcpcb_zone); error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error); } error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error); } } #endif void tcp_fini(void *xtp) { } /* * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. * tcp_template used to store this data in mbufs, but we now recopy it out * of the tcpcb each time to conserve mbufs. */ void tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) { struct tcphdr *th = (struct tcphdr *)tcp_ptr; INP_WLOCK_ASSERT(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { struct ip6_hdr *ip6; ip6 = (struct ip6_hdr *)ip_ptr; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { struct ip *ip; ip = (struct ip *)ip_ptr; ip->ip_v = IPVERSION; ip->ip_hl = 5; ip->ip_tos = inp->inp_ip_tos; ip->ip_len = 0; ip->ip_id = 0; ip->ip_off = 0; ip->ip_ttl = inp->inp_ip_ttl; ip->ip_sum = 0; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } #endif /* INET */ th->th_sport = inp->inp_lport; th->th_dport = inp->inp_fport; th->th_seq = 0; th->th_ack = 0; th->th_x2 = 0; th->th_off = 5; th->th_flags = 0; th->th_win = 0; th->th_urp = 0; th->th_sum = 0; /* in_pseudo() is called later for ipv4 */ } /* * Create template to be used to send tcp packets on a connection. * Allocates an mbuf and fills in a skeletal tcp/ip header. The only * use for this function is in keepalives, which use tcp_respond. */ struct tcptemp * tcpip_maketemplate(struct inpcb *inp) { struct tcptemp *t; t = malloc(sizeof(*t), M_TEMP, M_NOWAIT); if (t == NULL) return (NULL); tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); return (t); } /* * Send a single message to the TCP at address specified by * the given TCP/IP header. If m == NULL, then we make a copy * of the tcpiphdr at th and send directly to the addressed host. * This is used to force keep alive messages out using the TCP * template for a connection. If flags are given then we send * a message back to the TCP which originated the segment th, * and discard the mbuf containing it and any other attached mbufs. * * In any case the ack and sequence number of the transmitted * segment are as specified by the parameters. * * NOTE: If m != NULL, then th must point to *inside* the mbuf. */ void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags) { int tlen; int win = 0; struct ip *ip; struct tcphdr *nth; #ifdef INET6 struct ip6_hdr *ip6; int isipv6; #endif /* INET6 */ int ipflags = 0; struct inpcb *inp; KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL")); #ifdef INET6 isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4); ip6 = ipgen; #endif /* INET6 */ ip = ipgen; if (tp != NULL) { inp = tp->t_inpcb; KASSERT(inp != NULL, ("tcp control block w/o inpcb")); INP_WLOCK_ASSERT(inp); } else inp = NULL; if (tp != NULL) { if (!(flags & TH_RST)) { win = sbspace(&inp->inp_socket->so_rcv); if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; } } if (m == NULL) { m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; tlen = 0; m->m_data += max_linkhdr; #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; } else { /* * reuse the mbuf. * XXX MRT We inherrit the FIB, which is lucky. */ m_freem(m->m_next); m->m_next = NULL; m->m_data = (caddr_t)ipgen; /* m_len is set later */ tlen = 0; #define xchg(a,b,type) { type t; t=a; a=b; b=t; } #ifdef INET6 if (isipv6) { xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); } if (th != nth) { /* * this is usually a case when an extension header * exists between the IPv6 header and the * TCP header. */ nth->th_sport = th->th_sport; nth->th_dport = th->th_dport; } xchg(nth->th_dport, nth->th_sport, uint16_t); #undef xchg } #ifdef INET6 if (isipv6) { ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_TCP; tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); ip6->ip6_plen = htons(tlen - sizeof(*ip6)); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { tlen += sizeof (struct tcpiphdr); ip->ip_len = htons(tlen); ip->ip_ttl = V_ip_defttl; if (V_path_mtu_discovery) ip->ip_off |= htons(IP_DF); } #endif m->m_len = tlen; m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = NULL; #ifdef MAC if (inp != NULL) { /* * Packet is associated with a socket, so allow the * label of the response to reflect the socket label. */ INP_WLOCK_ASSERT(inp); mac_inpcb_create_mbuf(inp, m); } else { /* * Packet is not associated with a socket, so possibly * update the label in place. */ mac_netinet_tcp_reply(m); } #endif nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; nth->th_off = sizeof (struct tcphdr) >> 2; nth->th_flags = flags; if (tp != NULL) nth->th_win = htons((u_short) (win >> tp->rcv_scale)); else nth->th_win = htons((u_short)win); nth->th_urp = 0; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; nth->th_sum = in6_cksum_pseudo(ip6, tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : NULL, NULL); } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { m->m_pkthdr.csum_flags = CSUM_TCP; nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); } #endif /* INET */ #ifdef TCPDEBUG if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); if (flags & TH_RST) TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *), tp, nth); TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth); #ifdef INET6 if (isipv6) (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp); #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET (void) ip_output(m, NULL, NULL, ipflags, NULL, inp); #endif } /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument * protocol control block. The `inp' parameter must have * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(struct inpcb *inp) { struct tcpcb_mem *tm; struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO); if (tm == NULL) return (NULL); tp = &tm->tcb; /* Initialise cc_var struct for this tcpcb. */ tp->ccv = &tm->ccv; tp->ccv->type = IPPROTO_TCP; tp->ccv->ccvc.tcp = tp; rw_rlock(&tcp_function_lock); tp->t_fb = tcp_func_set_ptr; refcount_acquire(&tp->t_fb->tfb_refcnt); rw_runlock(&tcp_function_lock); if (tp->t_fb->tfb_tcp_fb_init) { (*tp->t_fb->tfb_tcp_fb_init)(tp); } /* * Use the current system default CC algorithm. */ CC_LIST_RLOCK(); KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!")); CC_ALGO(tp) = CC_DEFAULT(); CC_LIST_RUNLOCK(); if (CC_ALGO(tp)->cb_init != NULL) if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } tp->osd = &tm->osd; if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } #ifdef VIMAGE tp->t_vnet = inp->inp_vnet; #endif tp->t_timers = &tm->tt; /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = #ifdef INET6 isipv6 ? V_tcp_v6mssdflt : #endif /* INET6 */ V_tcp_mssdflt; /* Set up our timeouts. */ callout_init(&tp->t_timers->tt_rexmt, 1); callout_init(&tp->t_timers->tt_persist, 1); callout_init(&tp->t_timers->tt_keep, 1); callout_init(&tp->t_timers->tt_2msl, 1); callout_init(&tp->t_timers->tt_delack, 1); if (V_tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (V_tcp_do_sack) tp->t_flags |= TF_SACK_PERMIT; TAILQ_INIT(&tp->snd_holes); /* * The tcpcb will hold a reference on its inpcb until tcp_discardcb() * is called. */ in_pcbref(inp); /* Reference for tcpcb */ tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives * reasonable initial retransmit time. */ tp->t_srtt = TCPTV_SRTTBASE; tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; tp->t_rttmin = tcp_rexmit_min; tp->t_rxtcur = TCPTV_RTOBASE; tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; inp->inp_ppcb = tp; #ifdef TCPPCAP /* * Init the TCP PCAP queues. */ tcp_pcap_tcpcb_init(tp); #endif return (tp); /* XXX */ } /* * Switch the congestion control algorithm back to NewReno for any active * control blocks using an algorithm which is about to go away. * This ensures the CC framework can allow the unload to proceed without leaving * any dangling pointers which would trigger a panic. * Returning non-zero would inform the CC framework that something went wrong * and it would be unsafe to allow the unload to proceed. However, there is no * way for this to occur with this implementation so we always return zero. */ int tcp_ccalgounload(struct cc_algo *unload_algo) { struct cc_algo *tmpalgo; struct inpcb *inp; struct tcpcb *tp; VNET_ITERATOR_DECL(vnet_iter); /* * Check all active control blocks across all network stacks and change * any that are using "unload_algo" back to NewReno. If "unload_algo" * requires cleanup code to be run, call it. */ VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); INP_INFO_WLOCK(&V_tcbinfo); /* * New connections already part way through being initialised * with the CC algo we're removing will not race with this code * because the INP_INFO_WLOCK is held during initialisation. We * therefore don't enter the loop below until the connection * list has stabilised. */ LIST_FOREACH(inp, &V_tcb, inp_list) { INP_WLOCK(inp); /* Important to skip tcptw structs. */ if (!(inp->inp_flags & INP_TIMEWAIT) && (tp = intotcpcb(inp)) != NULL) { /* * By holding INP_WLOCK here, we are assured * that the connection is not currently * executing inside the CC module's functions * i.e. it is safe to make the switch back to * NewReno. */ if (CC_ALGO(tp) == unload_algo) { tmpalgo = CC_ALGO(tp); /* NewReno does not require any init. */ CC_ALGO(tp) = &newreno_cc_algo; if (tmpalgo->cb_destroy != NULL) tmpalgo->cb_destroy(tp->ccv); } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); return (0); } /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ struct tcpcb * tcp_drop(struct tcpcb *tp, int errno) { struct socket *so = tp->t_inpcb->inp_socket; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (TCPS_HAVERCVDSYN(tp->t_state)) { tcp_state_change(tp, TCPS_CLOSED); (void) tp->t_fb->tfb_tcp_output(tp); TCPSTAT_INC(tcps_drops); } else TCPSTAT_INC(tcps_conndrops); if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } void tcp_discardcb(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ int released; INP_WLOCK_ASSERT(inp); /* * Make sure that all of our timers are stopped before we delete the * PCB. * * If stopping a timer fails, we schedule a discard function in same * callout, and the last discard function called will take care of * deleting the tcpcb. */ tcp_timer_stop(tp, TT_REXMT); tcp_timer_stop(tp, TT_PERSIST); tcp_timer_stop(tp, TT_KEEP); tcp_timer_stop(tp, TT_2MSL); tcp_timer_stop(tp, TT_DELACK); if (tp->t_fb->tfb_tcp_timer_stop_all) { /* Call the stop-all function of the methods */ tp->t_fb->tfb_tcp_timer_stop_all(tp); } /* * If we got enough samples through the srtt filter, * save the rtt and rttvar in the routing entry. * 'Enough' is arbitrarily defined as 4 rtt samples. * 4 samples is enough for the srtt filter to converge * to within enough % of the correct value; fewer samples * and we could save a bogus rtt. The danger is not high * as tcp quickly recovers from everything. * XXX: Works very well but needs some more statistics! */ if (tp->t_rttupdated >= 4) { struct hc_metrics_lite metrics; u_long ssthresh; bzero(&metrics, sizeof(metrics)); /* * Update the ssthresh always when the conditions below * are satisfied. This gives us better new start value * for the congestion avoidance for new connections. * ssthresh is only set if packet loss occured on a session. * * XXXRW: 'so' may be NULL here, and/or socket buffer may be * being torn down. Ideally this code would not use 'so'. */ ssthresh = tp->snd_ssthresh; if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { /* * convert the limit from user data bytes to * packets then to packet data bytes. */ ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; if (ssthresh < 2) ssthresh = 2; ssthresh *= (u_long)(tp->t_maxseg + #ifdef INET6 (isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : #endif sizeof (struct tcpiphdr) #ifdef INET6 ) #endif ); } else ssthresh = 0; metrics.rmx_ssthresh = ssthresh; metrics.rmx_rtt = tp->t_srtt; metrics.rmx_rttvar = tp->t_rttvar; metrics.rmx_cwnd = tp->snd_cwnd; metrics.rmx_sendpipe = 0; metrics.rmx_recvpipe = 0; tcp_hc_update(&inp->inp_inc, &metrics); } /* free the reassembly queue, if any */ tcp_reass_flush(tp); #ifdef TCP_OFFLOAD /* Disconnect offload device, if any. */ if (tp->t_flags & TF_TOE) tcp_offload_detach(tp); #endif tcp_free_sackholes(tp); #ifdef TCPPCAP /* Free the TCP PCAP queues. */ tcp_pcap_drain(&(tp->t_inpkts)); tcp_pcap_drain(&(tp->t_outpkts)); #endif /* Allow the CC algorithm to clean up after itself. */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(tp->ccv); khelp_destroy_osd(tp->osd); CC_ALGO(tp) = NULL; inp->inp_ppcb = NULL; if ((tp->t_timers->tt_flags & TT_MASK) == 0) { /* We own the last reference on tcpcb, let's free it. */ if ((tp->t_fb->tfb_tcp_timers_left) && (tp->t_fb->tfb_tcp_timers_left(tp))) { /* Some fb timers left running! */ return; } if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); released = in_pcbrele_wlocked(inp); KASSERT(!released, ("%s: inp %p should not have been released " "here", __func__, inp)); } } void tcp_timer_2msl_discard(void *xtp) { tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL); } void tcp_timer_keep_discard(void *xtp) { tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP); } void tcp_timer_persist_discard(void *xtp) { tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST); } void tcp_timer_rexmt_discard(void *xtp) { tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT); } void tcp_timer_delack_discard(void *xtp) { tcp_timer_discard((struct tcpcb *)xtp, TT_DELACK); } void tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type) { struct inpcb *inp; CURVNET_SET(tp->t_vnet); INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0, ("%s: tcpcb has to be stopped here", __func__)); KASSERT((tp->t_timers->tt_flags & timer_type) != 0, ("%s: discard callout should be running", __func__)); tp->t_timers->tt_flags &= ~timer_type; if ((tp->t_timers->tt_flags & TT_MASK) == 0) { /* We own the last reference on this tcpcb, let's free it. */ if ((tp->t_fb->tfb_tcp_timers_left) && (tp->t_fb->tfb_tcp_timers_left(tp))) { /* Some fb timers left running! */ goto leave; } if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } } leave: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } /* * Attempt to close a TCP control block, marking it as dropped, and freeing * the socket if we hold the only reference. */ struct tcpcb * tcp_close(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD if (tp->t_state == TCPS_LISTEN) tcp_offload_listen_stop(tp); #endif #ifdef TCP_RFC7413 /* * This releases the TFO pending counter resource for TFO listen * sockets as well as passively-created TFO sockets that transition * from SYN_RECEIVED to CLOSED. */ if (tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; } #endif in_pcbdrop(inp); TCPSTAT_INC(tcps_closed); KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); so = inp->inp_socket; soisdisconnected(so); if (inp->inp_flags & INP_SOCKREF) { KASSERT(so->so_state & SS_PROTOREF, ("tcp_close: !SS_PROTOREF")); inp->inp_flags &= ~INP_SOCKREF; INP_WUNLOCK(inp); ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state &= ~SS_PROTOREF; sofree(so); return (NULL); } return (tp); } void tcp_drain(void) { VNET_ITERATOR_DECL(vnet_iter); if (!do_tcpdrain) return; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct inpcb *inpb; struct tcpcb *tcpb; /* * Walk the tcpbs, if existing, and flush the reassembly queue, * if there is one... * XXX: The "Net/3" implementation doesn't imply that the TCP * reassembly queue should be flushed, but in a situation * where we're really low on mbufs, this is potentially * useful. */ INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { if (inpb->inp_flags & INP_TIMEWAIT) continue; INP_WLOCK(inpb); if ((tcpb = intotcpcb(inpb)) != NULL) { tcp_reass_flush(tcpb); tcp_clean_sackreport(tcpb); } INP_WUNLOCK(inpb); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Notify a tcp user of an asynchronous error; * store error as soft error, but wake up user * (for now, won't do anything until can select for soft error). * * Do not wake up user since there currently is no mechanism for * reporting soft errors (yet - a kqueue filter may be added). */ static struct inpcb * tcp_notify(struct inpcb *inp, int error) { struct tcpcb *tp; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_notify: tp == NULL")); /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { return (inp); } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) { tp = tcp_drop(tp, error); if (tp != NULL) return (inp); else return (NULL); } else { tp->t_softerror = error; return (inp); } #if 0 wakeup( &so->so_timeo); sorwakeup(so); sowwakeup(so); #endif } static int tcp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, m, n, pcb_count; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = V_tcbinfo.ipi_count + syncache_pcbcount(); n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ INP_LIST_RLOCK(&V_tcbinfo); gencnt = V_tcbinfo.ipi_gencnt; n = V_tcbinfo.ipi_count; INP_LIST_RUNLOCK(&V_tcbinfo); m = syncache_pcbcount(); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + (n + m) * sizeof(struct xtcpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n + m; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); error = syncache_pcblist(req, m, &pcb_count); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == NULL) return (ENOMEM); INP_INFO_WLOCK(&V_tcbinfo); for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt) { /* * XXX: This use of cr_cansee(), introduced with * TCP state changes, is not quite right, but for * now, better than nothing. */ if (inp->inp_flags & INP_TIMEWAIT) { if (intotw(inp) != NULL) error = cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred); else error = EINVAL; /* Skip this inp. */ } else error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) { in_pcbref(inp); inp_list[i++] = inp; } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; void *inp_ppcb; bzero(&xt, sizeof(xt)); xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb == NULL) bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); else if (inp->inp_flags & INP_TIMEWAIT) { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); xt.xt_tp.t_state = TCPS_TIME_WAIT; } else { bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); if (xt.xt_tp.t_timers) tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer); } if (inp->inp_socket != NULL) sotoxsocket(inp->inp_socket, &xt.xt_socket); else { bzero(&xt.xt_socket, sizeof xt.xt_socket); xt.xt_socket.xso_protocol = IPPROTO_TCP; } xt.xt_inp.inp_gencnt = inp->inp_gencnt; INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xt, sizeof xt); } else INP_RUNLOCK(inp); } INP_INFO_RLOCK(&V_tcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_RUNLOCK(&V_tcbinfo); if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ INP_LIST_RLOCK(&V_tcbinfo); xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_tcbinfo.ipi_count + pcb_count; INP_LIST_RUNLOCK(&V_tcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); #ifdef INET static int tcp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp_getcred, "S,xucred", "Get the xucred of a TCP connection"); #endif /* INET */ #ifdef INET6 static int tcp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; #ifdef INET int mapped = 0; #endif error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) mapped = 1; else #endif return (EINVAL); } #ifdef INET if (mapped == 1) inp = in_pcblookup(&V_tcbinfo, *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], addrs[1].sin6_port, *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); else #endif inp = in6_pcblookup(&V_tcbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection"); #endif /* INET6 */ #ifdef INET void tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct ip *ip = vip; struct tcphdr *th; struct in_addr faddr; struct inpcb *inp; struct tcpcb *tp; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct icmp *icp; struct in_conninfo inc; tcp_seq icmp_tcp_seq; int mtu; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; /* * Redirects don't need to be handled up here. */ else if (PRC_IS_REDIRECT(cmd)) return; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ else if (cmd == PRC_HOSTDEAD) ip = NULL; else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip == NULL) { in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify); return; } icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); INP_INFO_RLOCK(&V_tcbinfo); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL) { if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { icmp_tcp_seq = ntohl(th->th_seq); tp = intotcpcb(inp); if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && SEQ_LT(icmp_tcp_seq, tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* * MTU discovery: * If we got a needfrag set the MTU * in the route to the suggested new * value (if given) and then notify. */ mtu = ntohs(icp->icmp_nextmtu); /* * If no alternative MTU was * proposed, try the next smaller * one. */ if (!mtu) mtu = ip_next_mtu( ntohs(ip->ip_len), 1); if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr)) mtu = V_tcp_minmss + sizeof(struct tcpiphdr); /* * Only process the offered MTU if it * is smaller than the current one. */ if (mtu < tp->t_maxseg + sizeof(struct tcpiphdr)) { bzero(&inc, sizeof(inc)); inc.inc_faddr = faddr; inc.inc_fibnum = inp->inp_inc.inc_fibnum; tcp_hc_updatemtu(&inc, mtu); tcp_mtudisc(inp, mtu); } } else inp = (*notify)(inp, inetctlerrmap[cmd]); } } if (inp != NULL) INP_WUNLOCK(inp); } else { bzero(&inc, sizeof(inc)); inc.inc_fport = th->th_dport; inc.inc_lport = th->th_sport; inc.inc_faddr = faddr; inc.inc_laddr = ip->ip_src; syncache_unreach(&inc, th); } INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET */ #ifdef INET6 void tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct tcphdr th; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct ip6_hdr *ip6; struct mbuf *m; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; int off; struct tcp_portonly { u_int16_t th_sport; u_int16_t th_dport; } *thp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc_notify; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; off = 0; /* fool gcc */ sa6_src = &sa6_any; } if (ip6 != NULL) { struct in_conninfo inc; /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* check if we can safely examine src and dst ports */ if (m->m_pkthdr.len < off + sizeof(*thp)) return; bzero(&th, sizeof(th)); m_copydata(m, off, sizeof(*thp), (caddr_t)&th); in6_pcbnotify(&V_tcbinfo, sa, th.th_dport, (struct sockaddr *)ip6cp->ip6c_src, th.th_sport, cmd, NULL, notify); bzero(&inc, sizeof(inc)); inc.inc_fport = th.th_dport; inc.inc_lport = th.th_sport; inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr; inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr; inc.inc_flags |= INC_ISIPV6; INP_INFO_RLOCK(&V_tcbinfo); syncache_unreach(&inc, &th); INP_INFO_RUNLOCK(&V_tcbinfo); } else in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify); } #endif /* INET6 */ /* * Following is where TCP initial sequence number generation occurs. * * There are two places where we must use initial sequence numbers: * 1. In SYN-ACK packets. * 2. In SYN packets. * * All ISNs for SYN-ACK packets are generated by the syncache. See * tcp_syncache.c for details. * * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling * depends on this property. In addition, these ISNs should be * unguessable so as to prevent connection hijacking. To satisfy * the requirements of this situation, the algorithm outlined in * RFC 1948 is used, with only small modifications. * * Implementation details: * * Time is based off the system timer, and is corrected so that it * increases by one megabyte per second. This allows for proper * recycling on high speed LANs while still leaving over an hour * before rollover. * * As reading the *exact* system time is too expensive to be done * whenever setting up a TCP connection, we increment the time * offset in two ways. First, a small random positive increment * is added to isn_offset for each connection that is set up. * Second, the function tcp_isn_tick fires once per clock tick * and increments isn_offset as necessary so that sequence numbers * are incremented at approximately ISN_BYTES_PER_SECOND. The * random positive increments serve only to ensure that the same * exact sequence number is never sent out twice (as could otherwise * happen when a port is recycled in less than the system tick * interval.) * * net.inet.tcp.isn_reseed_interval controls the number of seconds * between seeding of isn_secret. This is normally set to zero, * as reseeding should not be necessary. * * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In * general, this means holding an exclusive (write) lock. */ #define ISN_BYTES_PER_SECOND 1048576 #define ISN_STATIC_INCREMENT 4096 #define ISN_RANDOM_INCREMENT (4096 - 1) static VNET_DEFINE(u_char, isn_secret[32]); static VNET_DEFINE(int, isn_last); static VNET_DEFINE(int, isn_last_reseed); static VNET_DEFINE(u_int32_t, isn_offset); static VNET_DEFINE(u_int32_t, isn_offset_old); #define V_isn_secret VNET(isn_secret) #define V_isn_last VNET(isn_last) #define V_isn_last_reseed VNET(isn_last_reseed) #define V_isn_offset VNET(isn_offset) #define V_isn_offset_old VNET(isn_offset_old) tcp_seq tcp_new_isn(struct tcpcb *tp) { MD5_CTX isn_ctx; u_int32_t md5_buffer[4]; tcp_seq new_isn; u_int32_t projected_offset; INP_WLOCK_ASSERT(tp->t_inpcb); ISN_LOCK(); /* Seed if this is the first use, reseed if requested. */ if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) && (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz) < (u_int)ticks))) { read_random(&V_isn_secret, sizeof(V_isn_secret)); V_isn_last_reseed = ticks; } /* Compute the md5 hash and return the ISN. */ MD5Init(&isn_ctx); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, sizeof(struct in6_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr, sizeof(struct in6_addr)); } else #endif { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr, sizeof(struct in_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr, sizeof(struct in_addr)); } MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret)); MD5Final((u_char *) &md5_buffer, &isn_ctx); new_isn = (tcp_seq) md5_buffer[0]; V_isn_offset += ISN_STATIC_INCREMENT + (arc4random() & ISN_RANDOM_INCREMENT); if (ticks != V_isn_last) { projected_offset = V_isn_offset_old + ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last); if (SEQ_GT(projected_offset, V_isn_offset)) V_isn_offset = projected_offset; V_isn_offset_old = V_isn_offset; V_isn_last = ticks; } new_isn += V_isn_offset; ISN_UNLOCK(); return (new_isn); } /* * When a specific ICMP unreachable message is received and the * connection state is SYN-SENT, drop the connection. This behavior * is controlled by the icmp_may_rst sysctl. */ struct inpcb * tcp_drop_syn_sent(struct inpcb *inp, int errno) { struct tcpcb *tp; INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); if (tp->t_state != TCPS_SYN_SENT) return (inp); tp = tcp_drop(tp, errno); if (tp != NULL) return (inp); else return (NULL); } /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value. Also nudge TCP to send something, since we * know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ static struct inpcb * tcp_mtudisc_notify(struct inpcb *inp, int error) { tcp_mtudisc(inp, -1); return (inp); } static void tcp_mtudisc(struct inpcb *inp, int mtuoffer) { struct tcpcb *tp; struct socket *so; INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return; tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL")); tcp_mss_update(tp, -1, mtuoffer, NULL, NULL); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); /* If the mss is larger than the socket buffer, decrease the mss. */ if (so->so_snd.sb_hiwat < tp->t_maxseg) tp->t_maxseg = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); TCPSTAT_INC(tcps_mturesent); tp->t_rtttime = 0; tp->snd_nxt = tp->snd_una; tcp_free_sackholes(tp); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_SACK_PERMIT) EXIT_FASTRECOVERY(tp->t_flags); tp->t_fb->tfb_tcp_output(tp); } #ifdef INET /* * Look-up the routing entry to the peer of this inpcb. If no route * is found and it cannot be allocated, then return 0. This routine * is called by TCP routines that access the rmx structure and by * tcp_mss_update to get the peer/interface MTU. */ u_long tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct route sro; struct sockaddr_in *dst; struct ifnet *ifp; u_long maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer")); bzero(&sro, sizeof(sro)); if (inc->inc_faddr.s_addr != INADDR_ANY) { dst = (struct sockaddr_in *)&sro.ro_dst; dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = inc->inc_faddr; in_rtalloc_ign(&sro, 0, inc->inc_fibnum); } if (sro.ro_rt != NULL) { ifp = sro.ro_rt->rt_ifp; if (sro.ro_rt->rt_mtu == 0) maxmtu = ifp->if_mtu; else maxmtu = min(sro.ro_rt->rt_mtu, ifp->if_mtu); /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO4 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } RTFREE(sro.ro_rt); } return (maxmtu); } #endif /* INET */ #ifdef INET6 u_long tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct route_in6 sro6; struct ifnet *ifp; u_long maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer")); bzero(&sro6, sizeof(sro6)); if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { sro6.ro_dst.sin6_family = AF_INET6; sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); sro6.ro_dst.sin6_addr = inc->inc6_faddr; in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum); } if (sro6.ro_rt != NULL) { ifp = sro6.ro_rt->rt_ifp; if (sro6.ro_rt->rt_mtu == 0) maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp); else maxmtu = min(sro6.ro_rt->rt_mtu, IN6_LINKMTU(sro6.ro_rt->rt_ifp)); /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO6 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } RTFREE(sro6.ro_rt); } return (maxmtu); } #endif /* INET6 */ /* * Calculate effective SMSS per RFC5681 definition for a given TCP * connection at its current state, taking into account SACK and etc. */ u_int tcp_maxseg(const struct tcpcb *tp) { u_int optlen; if (tp->t_flags & TF_NOOPT) return (tp->t_maxseg); /* * Here we have a simplified code from tcp_addoptions(), * without a proper loop, and having most of paddings hardcoded. * We might make mistakes with padding here in some edge cases, * but this is harmless, since result of tcp_maxseg() is used * only in cwnd and ssthresh estimations. */ #define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4) if (TCPS_HAVEESTABLISHED(tp->t_state)) { if (tp->t_flags & TF_RCVD_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = 0; #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { optlen += TCPOLEN_SACKHDR; optlen += tp->rcv_numsacks * TCPOLEN_SACK; optlen = PAD(optlen); } } else { if (tp->t_flags & TF_REQ_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = PAD(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) optlen += PAD(TCPOLEN_WINDOW); #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if (tp->t_flags & TF_SACK_PERMIT) optlen += PAD(TCPOLEN_SACK_PERMITTED); } #undef PAD optlen = min(optlen, TCP_MAXOLEN); return (tp->t_maxseg - optlen); } #ifdef IPSEC /* compute ESP/AH header size for TCP, including outer IP header. */ size_t ipsec_hdrsiz_tcp(struct tcpcb *tp) { struct inpcb *inp; struct mbuf *m; size_t hdrsiz; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif struct tcphdr *th; if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) || (!key_havesp(IPSEC_DIR_OUTBOUND))) return (0); m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) return (0); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); tcpip_fillheaders(inp, ip6, th); hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } else #endif /* INET6 */ { ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); tcpip_fillheaders(inp, ip, th); hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } m_free(m); return (hdrsiz); } #endif /* IPSEC */ #ifdef TCP_SIGNATURE /* * Callback function invoked by m_apply() to digest TCP segment data * contained within an mbuf chain. */ static int tcp_signature_apply(void *fstate, void *data, u_int len) { MD5Update(fstate, (u_char *)data, len); return (0); } /* * XXX The key is retrieved from the system's PF_KEY SADB, by keying a * search with the destination IP address, and a 'magic SPI' to be * determined by the application. This is hardcoded elsewhere to 1179 */ struct secasvar * tcp_get_sav(struct mbuf *m, u_int direction) { union sockaddr_union dst; struct secasvar *sav; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; char ip6buf[INET6_ADDRSTRLEN]; #endif /* Extract the destination from the IP header in the mbuf. */ bzero(&dst, sizeof(union sockaddr_union)); ip = mtod(m, struct ip *); #ifdef INET6 ip6 = NULL; /* Make the compiler happy. */ #endif switch (ip->ip_v) { #ifdef INET case IPVERSION: dst.sa.sa_len = sizeof(struct sockaddr_in); dst.sa.sa_family = AF_INET; dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ? ip->ip_src : ip->ip_dst; break; #endif #ifdef INET6 case (IPV6_VERSION >> 4): ip6 = mtod(m, struct ip6_hdr *); dst.sa.sa_len = sizeof(struct sockaddr_in6); dst.sa.sa_family = AF_INET6; dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ? ip6->ip6_src : ip6->ip6_dst; break; #endif default: return (NULL); /* NOTREACHED */ break; } /* Look up an SADB entry which matches the address of the peer. */ sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI)); if (sav == NULL) { ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__, (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) : #ifdef INET6 (ip->ip_v == (IPV6_VERSION >> 4)) ? ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) : #endif "(unsupported)")); } return (sav); } /* * Compute TCP-MD5 hash of a TCP segment. (RFC2385) * * Parameters: * m pointer to head of mbuf chain * len length of TCP segment data, excluding options * optlen length of TCP segment options * buf pointer to storage for computed MD5 digest * sav pointer to security assosiation * * We do this over ip, tcphdr, segment data, and the key in the SADB. * When called from tcp_input(), we can be sure that th_sum has been * zeroed out and verified already. * * Releases reference to SADB key before return. * * Return 0 if successful, otherwise return -1. * */ int tcp_signature_do_compute(struct mbuf *m, int len, int optlen, u_char *buf, struct secasvar *sav) { #ifdef INET struct ippseudo ippseudo; #endif MD5_CTX ctx; int doff; struct ip *ip; #ifdef INET struct ipovly *ipovly; #endif struct tcphdr *th; #ifdef INET6 struct ip6_hdr *ip6; struct in6_addr in6; uint32_t plen; uint16_t nhdr; #endif u_short savecsum; KASSERT(m != NULL, ("NULL mbuf chain")); KASSERT(buf != NULL, ("NULL signature pointer")); /* Extract the destination from the IP header in the mbuf. */ ip = mtod(m, struct ip *); #ifdef INET6 ip6 = NULL; /* Make the compiler happy. */ #endif MD5Init(&ctx); /* * Step 1: Update MD5 hash with IP(v6) pseudo-header. * * XXX The ippseudo header MUST be digested in network byte order, * or else we'll fail the regression test. Assume all fields we've * been doing arithmetic on have been in host byte order. * XXX One cannot depend on ipovly->ih_len here. When called from * tcp_output(), the underlying ip_len member has not yet been set. */ switch (ip->ip_v) { #ifdef INET case IPVERSION: ipovly = (struct ipovly *)ip; ippseudo.ippseudo_src = ipovly->ih_src; ippseudo.ippseudo_dst = ipovly->ih_dst; ippseudo.ippseudo_pad = 0; ippseudo.ippseudo_p = IPPROTO_TCP; ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) + optlen); MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo)); th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip)); doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen; break; #endif #ifdef INET6 /* * RFC 2385, 2.0 Proposal * For IPv6, the pseudo-header is as described in RFC 2460, namely the * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero- * extended next header value (to form 32 bits), and 32-bit segment * length. * Note: Upper-Layer Packet Length comes before Next Header. */ case (IPV6_VERSION >> 4): in6 = ip6->ip6_src; in6_clearscope(&in6); MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); in6 = ip6->ip6_dst; in6_clearscope(&in6); MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); plen = htonl(len + sizeof(struct tcphdr) + optlen); MD5Update(&ctx, (char *)&plen, sizeof(uint32_t)); nhdr = 0; MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); nhdr = IPPROTO_TCP; MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr)); doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen; break; #endif default: KEY_FREESAV(&sav); return (-1); /* NOTREACHED */ break; } /* * Step 2: Update MD5 hash with TCP header, excluding options. * The TCP checksum must be set to zero. */ savecsum = th->th_sum; th->th_sum = 0; MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); th->th_sum = savecsum; /* * Step 3: Update MD5 hash with TCP segment data. * Use m_apply() to avoid an early m_pullup(). */ if (len > 0) m_apply(m, doff, len, tcp_signature_apply, &ctx); /* * Step 4: Update MD5 hash with shared secret. */ MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); MD5Final(buf, &ctx); key_sa_recordxfer(sav, m); KEY_FREESAV(&sav); return (0); } /* * Compute TCP-MD5 hash of a TCP segment. (RFC2385) * * Return 0 if successful, otherwise return -1. */ int tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen, u_char *buf, u_int direction) { struct secasvar *sav; if ((sav = tcp_get_sav(m, direction)) == NULL) return (-1); return (tcp_signature_do_compute(m, len, optlen, buf, sav)); } /* * Verify the TCP-MD5 hash of a TCP segment. (RFC2385) * * Parameters: * m pointer to head of mbuf chain * len length of TCP segment data, excluding options * optlen length of TCP segment options * buf pointer to storage for computed MD5 digest * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) * * Return 1 if successful, otherwise return 0. */ int tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen, struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) { char tmpdigest[TCP_SIGLEN]; if (tcp_sig_checksigs == 0) return (1); if ((tcpbflag & TF_SIGNATURE) == 0) { if ((to->to_flags & TOF_SIGNATURE) != 0) { /* * If this socket is not expecting signature but * the segment contains signature just fail. */ TCPSTAT_INC(tcps_sig_err_sigopt); TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } /* Signature is not expected, and not present in segment. */ return (1); } /* * If this socket is expecting signature but the segment does not * contain any just fail. */ if ((to->to_flags & TOF_SIGNATURE) == 0) { TCPSTAT_INC(tcps_sig_err_nosigopt); TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0], IPSEC_DIR_INBOUND) == -1) { TCPSTAT_INC(tcps_sig_err_buildsig); TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) { TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } TCPSTAT_INC(tcps_sig_rcvgoodsig); return (1); } #endif /* TCP_SIGNATURE */ static int sysctl_drop(SYSCTL_HANDLER_ARGS) { /* addrs[0] is a foreign socket, addrs[1] is a local one. */ struct sockaddr_storage addrs[2]; struct inpcb *inp; struct tcpcb *tp; struct tcptw *tw; struct sockaddr_in *fin, *lin; #ifdef INET6 struct sockaddr_in6 *fin6, *lin6; #endif int error; inp = NULL; fin = lin = NULL; #ifdef INET6 fin6 = lin6 = NULL; #endif error = 0; if (req->oldptr != NULL || req->oldlen != 0) return (EINVAL); if (req->newptr == NULL) return (EPERM); if (req->newlen < sizeof(addrs)) return (ENOMEM); error = SYSCTL_IN(req, &addrs, sizeof(addrs)); if (error) return (error); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: fin6 = (struct sockaddr_in6 *)&addrs[0]; lin6 = (struct sockaddr_in6 *)&addrs[1]; if (fin6->sin6_len != sizeof(struct sockaddr_in6) || lin6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) { if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr)) return (EINVAL); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]); fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; break; } error = sa6_embedscope(fin6, V_ip6_use_defzone); if (error) return (error); error = sa6_embedscope(lin6, V_ip6_use_defzone); if (error) return (error); break; #endif #ifdef INET case AF_INET: fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; if (fin->sin_len != sizeof(struct sockaddr_in) || lin->sin_len != sizeof(struct sockaddr_in)) return (EINVAL); break; #endif default: return (EINVAL); } INP_INFO_RLOCK(&V_tcbinfo); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr, fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif #ifdef INET case AF_INET: inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port, lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif } if (inp != NULL) { if (inp->inp_flags & INP_TIMEWAIT) { /* * XXXRW: There currently exists a state where an * inpcb is present, but its timewait state has been * discarded. For now, don't allow dropping of this * type of inpcb. */ tw = intotw(inp); if (tw != NULL) tcp_twclose(tw, 0); else INP_WUNLOCK(inp); } else if (!(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket->so_options & SO_ACCEPTCONN)) { tp = intotcpcb(inp); tp = tcp_drop(tp, ECONNABORTED); if (tp != NULL) INP_WUNLOCK(inp); } else INP_WUNLOCK(inp); } else error = ESRCH; INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL, 0, sysctl_drop, "", "Drop TCP connection"); /* * Generate a standardized TCP log line for use throughout the * tcp subsystem. Memory allocation is done with M_NOWAIT to * allow use in the interrupt context. * * NB: The caller MUST free(s, M_TCPLOG) the returned string. * NB: The function may return NULL if memory allocation failed. * * Due to header inclusion and ordering limitations the struct ip * and ip6_hdr pointers have to be passed as void pointers. */ char * tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_in_vain == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } char * tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_debug == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { char *s, *sp; size_t size; struct ip *ip; #ifdef INET6 const struct ip6_hdr *ip6; ip6 = (const struct ip6_hdr *)ip6hdr; #endif /* INET6 */ ip = (struct ip *)ip4hdr; /* * The log line looks like this: * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2" */ size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") + sizeof(PRINT_TH_FLAGS) + 1 + #ifdef INET6 2 * INET6_ADDRSTRLEN; #else 2 * INET_ADDRSTRLEN; #endif /* INET6 */ s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT); if (s == NULL) return (NULL); strcat(s, "TCP: ["); sp = s + strlen(s); if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) { inet_ntoa_r(inc->inc_faddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); inet_ntoa_r(inc->inc_laddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); #ifdef INET6 } else if (inc) { ip6_sprintf(sp, &inc->inc6_faddr); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); ip6_sprintf(sp, &inc->inc6_laddr); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); } else if (ip6 && th) { ip6_sprintf(sp, &ip6->ip6_src); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); ip6_sprintf(sp, &ip6->ip6_dst); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET6 */ #ifdef INET } else if (ip && th) { inet_ntoa_r(ip->ip_src, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); inet_ntoa_r(ip->ip_dst, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET */ } else { free(s, M_TCPLOG); return (NULL); } sp = s + strlen(s); if (th) sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS); if (*(s + size - 1) != '\0') panic("%s: string too long", __func__); return (s); } /* * A subroutine which makes it easy to track TCP state changes with DTrace. * This function shouldn't be called for t_state initializations that don't * correspond to actual TCP state transitions. */ void tcp_state_change(struct tcpcb *tp, int newstate) { #if defined(KDTRACE_HOOKS) int pstate = tp->t_state; #endif tp->t_state = newstate; TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate); } Index: projects/clang380-import/sys/netinet/toecore.c =================================================================== --- projects/clang380-import/sys/netinet/toecore.c (revision 293686) +++ projects/clang380-import/sys/netinet/toecore.c (revision 293687) @@ -1,584 +1,569 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include static struct mtx toedev_lock; static TAILQ_HEAD(, toedev) toedev_list; static eventhandler_tag listen_start_eh; static eventhandler_tag listen_stop_eh; static eventhandler_tag lle_event_eh; -static eventhandler_tag route_redirect_eh; static int toedev_connect(struct toedev *tod __unused, struct socket *so __unused, struct rtentry *rt __unused, struct sockaddr *nam __unused) { return (ENOTSUP); } static int toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static int toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static void toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused, struct mbuf *m) { m_freem(m); return; } static void toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused) { return; } static int toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static void toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused) { return; } static void toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused, struct sockaddr *sa __unused, uint8_t *lladdr __unused, uint16_t vtag __unused) { return; } static void toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused, struct rtentry *rt0 __unused, struct rtentry *rt1 __unused) { return; } static void toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused) { return; } static void toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused) { return; } static int toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused, struct mbuf *m) { m_freem(m); return (0); } static void toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused, struct socket *so __unused) { return; } static void toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused, int sopt_dir __unused, int sopt_name __unused) { return; } /* * Inform one or more TOE devices about a listening socket. */ static void toe_listen_start(struct inpcb *inp, void *arg) { struct toedev *t, *tod; struct tcpcb *tp; INP_WLOCK_ASSERT(inp); KASSERT(inp->inp_pcbinfo == &V_tcbinfo, ("%s: inp is not a TCP inp", __func__)); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) return; tp = intotcpcb(inp); if (tp->t_state != TCPS_LISTEN) return; t = arg; mtx_lock(&toedev_lock); TAILQ_FOREACH(tod, &toedev_list, link) { if (t == NULL || t == tod) tod->tod_listen_start(tod, tp); } mtx_unlock(&toedev_lock); } static void toe_listen_start_event(void *arg __unused, struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_LISTEN, ("%s: t_state %s", __func__, tcpstates[tp->t_state])); toe_listen_start(inp, NULL); } static void toe_listen_stop_event(void *arg __unused, struct tcpcb *tp) { struct toedev *tod; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_LISTEN, ("%s: t_state %s", __func__, tcpstates[tp->t_state])); mtx_lock(&toedev_lock); TAILQ_FOREACH(tod, &toedev_list, link) tod->tod_listen_stop(tod, tp); mtx_unlock(&toedev_lock); } /* * Fill up a freshly allocated toedev struct with reasonable defaults. */ void init_toedev(struct toedev *tod) { tod->tod_softc = NULL; /* * Provide no-op defaults so that the kernel can call any toedev * function without having to check whether the TOE driver supplied one * or not. */ tod->tod_connect = toedev_connect; tod->tod_listen_start = toedev_listen_start; tod->tod_listen_stop = toedev_listen_stop; tod->tod_input = toedev_input; tod->tod_rcvd = toedev_rcvd; tod->tod_output = toedev_output; tod->tod_send_rst = toedev_output; tod->tod_send_fin = toedev_output; tod->tod_pcb_detach = toedev_pcb_detach; tod->tod_l2_update = toedev_l2_update; tod->tod_route_redirect = toedev_route_redirect; tod->tod_syncache_added = toedev_syncache_added; tod->tod_syncache_removed = toedev_syncache_removed; tod->tod_syncache_respond = toedev_syncache_respond; tod->tod_offload_socket = toedev_offload_socket; tod->tod_ctloutput = toedev_ctloutput; } /* * Register an active TOE device with the system. This allows it to receive * notifications from the kernel. */ int register_toedev(struct toedev *tod) { struct toedev *t; mtx_lock(&toedev_lock); TAILQ_FOREACH(t, &toedev_list, link) { if (t == tod) { mtx_unlock(&toedev_lock); return (EEXIST); } } TAILQ_INSERT_TAIL(&toedev_list, tod, link); registered_toedevs++; mtx_unlock(&toedev_lock); inp_apply_all(toe_listen_start, tod); return (0); } /* * Remove the TOE device from the global list of active TOE devices. It is the * caller's responsibility to ensure that the TOE device is quiesced prior to * this call. */ int unregister_toedev(struct toedev *tod) { struct toedev *t, *t2; int rc = ENODEV; mtx_lock(&toedev_lock); TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) { if (t == tod) { TAILQ_REMOVE(&toedev_list, tod, link); registered_toedevs--; rc = 0; break; } } KASSERT(registered_toedevs >= 0, ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs)); mtx_unlock(&toedev_lock); return (rc); } void toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, void *tod, void *todctx) { struct socket *lso = inp->inp_socket; INP_WLOCK_ASSERT(inp); syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx); } int toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct socket **lsop) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); return (syncache_expand(inc, to, th, lsop, NULL)); } /* * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP * header (presumably for an incoming SYN) is also provided, an existing 4-tuple * in TIME_WAIT may be assassinated freeing it up for re-use. * * Note that the TCP header must have been run through tcp_fields_to_host() or * equivalent. */ int toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp) { struct inpcb *inp; if (inc->inc_flags & INC_ISIPV6) { inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr, inc->inc_fport, &inc->inc6_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp); } else { inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport, inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp); } if (inp != NULL) { INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */ if (!tcp_twcheck(inp, NULL, th, NULL, 0)) return (EADDRINUSE); } else { INP_WUNLOCK(inp); return (EADDRINUSE); } } return (0); } static void toe_lle_event(void *arg __unused, struct llentry *lle, int evt) { struct toedev *tod; struct ifnet *ifp; struct sockaddr *sa; uint8_t *lladdr; uint16_t vtag; int family; struct sockaddr_in6 sin6; LLE_WLOCK_ASSERT(lle); ifp = lltable_get_ifp(lle->lle_tbl); family = lltable_get_af(lle->lle_tbl); if (family != AF_INET && family != AF_INET6) return; /* * Not interested if the interface's TOE capability is not enabled. */ if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) || (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))) return; tod = TOEDEV(ifp); if (tod == NULL) return; sa = (struct sockaddr *)&sin6; lltable_fill_sa_entry(lle, sa); vtag = 0xfff; if (evt != LLENTRY_RESOLVED) { /* * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean * this entry is going to be deleted. */ lladdr = NULL; } else { KASSERT(lle->la_flags & LLE_VALID, ("%s: %p resolved but not valid?", __func__, lle)); lladdr = (uint8_t *)lle->ll_addr; #ifdef VLAN_TAG VLAN_TAG(ifp, &vtag); #endif } tod->tod_l2_update(tod, ifp, sa, lladdr, vtag); } /* - * XXX: implement. - */ -static void -toe_route_redirect_event(void *arg __unused, struct rtentry *rt0, - struct rtentry *rt1, struct sockaddr *sa) -{ - - return; -} - -/* * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's * tod_l2_update will be called later, when the entry is resolved or times out. */ int toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, uint8_t *lladdr, uint16_t *vtag) { int rc; switch (sa->sa_family) { #ifdef INET case AF_INET: rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL); break; #endif #ifdef INET6 case AF_INET6: rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL); break; #endif default: return (EPROTONOSUPPORT); } if (rc == 0) { #ifdef VLAN_TAG if (VLAN_TAG(ifp, vtag) != 0) #endif *vtag = 0xfff; } return (rc); } void toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) { INP_WLOCK_ASSERT(inp); if (!(inp->inp_flags & INP_DROPPED)) { struct tcpcb *tp = intotcpcb(inp); KASSERT(tp->t_flags & TF_TOE, ("%s: tp %p not offloaded.", __func__, tp)); if (err == EAGAIN) { /* * Temporary failure during offload, take this PCB back. * Detach from the TOE driver and do the rest of what * TCP's pru_connect would have done if the connection * wasn't offloaded. */ tod->tod_pcb_detach(tod, tp); KASSERT(!(tp->t_flags & TF_TOE), ("%s: tp %p still offloaded.", __func__, tp)); tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); (void) tp->t_fb->tfb_tcp_output(tp); } else { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_drop(tp, err); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } } INP_WLOCK_ASSERT(inp); } static int toecore_load(void) { mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF); TAILQ_INIT(&toedev_list); listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start, toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY); listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop, toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, EVENTHANDLER_PRI_ANY); - route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event, - toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY); return (0); } static int toecore_unload(void) { mtx_lock(&toedev_lock); if (!TAILQ_EMPTY(&toedev_list)) { mtx_unlock(&toedev_lock); return (EBUSY); } EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); - EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh); mtx_unlock(&toedev_lock); mtx_destroy(&toedev_lock); return (0); } static int toecore_mod_handler(module_t mod, int cmd, void *arg) { if (cmd == MOD_LOAD) return (toecore_load()); if (cmd == MOD_UNLOAD) return (toecore_unload()); return (EOPNOTSUPP); } static moduledata_t mod_data= { "toecore", toecore_mod_handler, 0 }; MODULE_VERSION(toecore, 1); DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); Index: projects/clang380-import/sys/netinet6/frag6.c =================================================================== --- projects/clang380-import/sys/netinet6/frag6.c (revision 293686) +++ projects/clang380-import/sys/netinet6/frag6.c (revision 293687) @@ -1,819 +1,820 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_rss.h" #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for ECN definitions */ #include /* for ECN definitions */ #include static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); static void frag6_deq(struct ip6asfrag *); static void frag6_insque(struct ip6q *, struct ip6q *); static void frag6_remque(struct ip6q *); static void frag6_freef(struct ip6q *); static struct mtx ip6qlock; /* * These fields all protected by ip6qlock. */ static VNET_DEFINE(u_int, frag6_nfragpackets); static VNET_DEFINE(u_int, frag6_nfrags); static VNET_DEFINE(struct ip6q, ip6q); /* ip6 reassemble queue */ #define V_frag6_nfragpackets VNET(frag6_nfragpackets) #define V_frag6_nfrags VNET(frag6_nfrags) #define V_ip6q VNET(ip6q) #define IP6Q_LOCK_INIT() mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF); #define IP6Q_LOCK() mtx_lock(&ip6qlock) #define IP6Q_TRYLOCK() mtx_trylock(&ip6qlock) #define IP6Q_LOCK_ASSERT() mtx_assert(&ip6qlock, MA_OWNED) #define IP6Q_UNLOCK() mtx_unlock(&ip6qlock) static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); /* * Initialise reassembly queue and fragment identifier. */ static void frag6_change(void *tag) { V_ip6_maxfragpackets = nmbclusters / 4; V_ip6_maxfrags = nmbclusters / 4; } void frag6_init(void) { V_ip6_maxfragpackets = nmbclusters / 4; V_ip6_maxfrags = nmbclusters / 4; V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q; if (!IS_DEFAULT_VNET(curvnet)) return; EVENTHANDLER_REGISTER(nmbclusters_change, frag6_change, NULL, EVENTHANDLER_PRI_ANY); IP6Q_LOCK_INIT(); } /* * In RFC2460, fragment and reassembly rule do not agree with each other, * in terms of next header field handling in fragment header. * While the sender will use the same value for all of the fragmented packets, * receiver is suggested not to check the consistency. * * fragment rule (p20): * (2) A Fragment header containing: * The Next Header value that identifies the first header of * the Fragmentable Part of the original packet. * -> next header field is same for all fragments * * reassembly rule (p21): * The Next Header field of the last header of the Unfragmentable * Part is obtained from the Next Header field of the first * fragment's Fragment header. * -> should grab it from the first fragment only * * The following note also contradicts with fragment rule - noone is going to * send different fragment with different next header field. * * additional note (p22): * The Next Header values in the Fragment headers of different * fragments of the same original packet may differ. Only the value * from the Offset zero fragment packet is used for reassembly. * -> should grab it from the first fragment only * * There is no explicit reason given in the RFC. Historical reason maybe? */ /* * Fragment input */ int frag6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp, *t; struct ip6_hdr *ip6; struct ip6_frag *ip6f; struct ip6q *q6; struct ip6asfrag *af6, *ip6af, *af6dwn; struct in6_ifaddr *ia; int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ struct ifnet *dstifp; u_int8_t ecn, ecn0; #ifdef RSS struct m_tag *mtag; struct ip6_direct_ctx *ip6dc; #endif #if 0 char ip6buf[INET6_ADDRSTRLEN]; #endif ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); #else IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); if (ip6f == NULL) return (IPPROTO_DONE); #endif dstifp = NULL; /* find the destination interface of the packet. */ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia != NULL) { dstifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); } /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); return IPPROTO_DONE; } /* * check whether fragment packet's fragment length is * multiple of 8 octets. * sizeof(struct ip6_frag) == 8 * sizeof(struct ip6_hdr) = 40 */ if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); return IPPROTO_DONE; } IP6STAT_INC(ip6s_fragments); in6_ifstat_inc(dstifp, ifs6_reass_reqd); /* offset now points to data portion */ offset += sizeof(struct ip6_frag); /* * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0) * upfront, unrelated to any reassembly. Just skip the fragment header. */ if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { /* XXX-BZ we want dedicated counters for this. */ IP6STAT_INC(ip6s_reassembled); in6_ifstat_inc(dstifp, ifs6_reass_ok); *offp = offset; return (ip6f->ip6f_nxt); } IP6Q_LOCK(); /* * Enforce upper bound on number of fragments. * If maxfrag is 0, never accept fragments. * If maxfrag is -1, accept all fragments without limitation. */ if (V_ip6_maxfrags < 0) ; else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags) goto dropfrag; for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst) #ifdef MAC && mac_ip6q_match(m, q6) #endif ) break; if (q6 == &V_ip6q) { /* * the first fragment to arrive, create a reassembly queue. */ first_frag = 1; /* * Enforce upper bound on number of fragmented packets * for which we attempt reassembly; * If maxfragpackets is 0, never accept fragments. * If maxfragpackets is -1, accept all fragments without * limitation. */ if (V_ip6_maxfragpackets < 0) ; else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets) goto dropfrag; V_frag6_nfragpackets++; q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE, M_NOWAIT); if (q6 == NULL) goto dropfrag; bzero(q6, sizeof(*q6)); #ifdef MAC if (mac_ip6q_init(q6, M_NOWAIT) != 0) { free(q6, M_FTABLE); goto dropfrag; } mac_ip6q_create(m, q6); #endif frag6_insque(q6, &V_ip6q); /* ip6q_nxt will be filled afterwards, from 1st fragment */ q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; #ifdef notyet q6->ip6q_nxtp = (u_char *)nxtp; #endif q6->ip6q_ident = ip6f->ip6f_ident; q6->ip6q_ttl = IPV6_FRAGTTL; q6->ip6q_src = ip6->ip6_src; q6->ip6q_dst = ip6->ip6_dst; q6->ip6q_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ q6->ip6q_nfrag = 0; } /* * If it's the 1st fragment, record the length of the * unfragmentable part and the next header of the fragment header. */ fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); if (fragoff == 0) { q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag); q6->ip6q_nxt = ip6f->ip6f_nxt; } /* * Check that the reassembled packet would not exceed 65535 bytes * in size. * If it would exceed, discard the fragment and return an ICMP error. */ frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; if (q6->ip6q_unfrglen >= 0) { /* The 1st fragment has already arrived. */ if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); IP6Q_UNLOCK(); return (IPPROTO_DONE); } } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); IP6Q_UNLOCK(); return (IPPROTO_DONE); } /* * If it's the first fragment, do the above check for each * fragment already stored in the reassembly queue. */ if (fragoff == 0) { for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6dwn) { af6dwn = af6->ip6af_down; if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > IPV6_MAXPACKET) { struct mbuf *merr = IP6_REASS_MBUF(af6); struct ip6_hdr *ip6err; int erroff = af6->ip6af_offset; /* dequeue the fragment. */ frag6_deq(af6); free(af6, M_FTABLE); /* adjust pointer. */ ip6err = mtod(merr, struct ip6_hdr *); /* * Restore source and destination addresses * in the erroneous IPv6 header. */ ip6err->ip6_src = q6->ip6q_src; ip6err->ip6_dst = q6->ip6q_dst; icmp6_error(merr, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); } } } ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE, M_NOWAIT); if (ip6af == NULL) goto dropfrag; bzero(ip6af, sizeof(*ip6af)); ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; ip6af->ip6af_off = fragoff; ip6af->ip6af_frglen = frgpartlen; ip6af->ip6af_offset = offset; IP6_REASS_MBUF(ip6af) = m; if (first_frag) { af6 = (struct ip6asfrag *)q6; goto insert; } /* * Handle ECN by comparing this segment with the first one; * if CE is set, do not lose CE. * drop if CE and not-ECT are mixed for the same packet. */ ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; ecn0 = q6->ip6q_ecn; if (ecn == IPTOS_ECN_CE) { if (ecn0 == IPTOS_ECN_NOTECT) { free(ip6af, M_FTABLE); goto dropfrag; } if (ecn0 != IPTOS_ECN_CE) q6->ip6q_ecn = IPTOS_ECN_CE; } if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { free(ip6af, M_FTABLE); goto dropfrag; } /* * Find a segment which begins after this one does. */ for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) if (af6->ip6af_off > ip6af->ip6af_off) break; #if 0 /* * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - ip6af->ip6af_off; if (i > 0) { if (i >= ip6af->ip6af_frglen) goto dropfrag; m_adj(IP6_REASS_MBUF(ip6af), i); ip6af->ip6af_off += i; ip6af->ip6af_frglen -= i; } } /* * While we overlap succeeding segments trim them or, * if they are completely covered, dequeue them. */ while (af6 != (struct ip6asfrag *)q6 && ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) { i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; if (i < af6->ip6af_frglen) { af6->ip6af_frglen -= i; af6->ip6af_off += i; m_adj(IP6_REASS_MBUF(af6), i); break; } af6 = af6->ip6af_down; m_freem(IP6_REASS_MBUF(af6->ip6af_up)); frag6_deq(af6->ip6af_up); } #else /* * If the incoming framgent overlaps some existing fragments in * the reassembly queue, drop it, since it is dangerous to override * existing fragments from a security point of view. * We don't know which fragment is the bad guy - here we trust * fragment that came in earlier, with no real reason. * * Note: due to changes after disabling this part, mbuf passed to * m_adj() below now does not meet the requirement. */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - ip6af->ip6af_off; if (i > 0) { #if 0 /* suppress the noisy log */ log(LOG_ERR, "%d bytes of a fragment from %s " "overlaps the previous fragment\n", i, ip6_sprintf(ip6buf, &q6->ip6q_src)); #endif free(ip6af, M_FTABLE); goto dropfrag; } } if (af6 != (struct ip6asfrag *)q6) { i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; if (i > 0) { #if 0 /* suppress the noisy log */ log(LOG_ERR, "%d bytes of a fragment from %s " "overlaps the succeeding fragment", i, ip6_sprintf(ip6buf, &q6->ip6q_src)); #endif free(ip6af, M_FTABLE); goto dropfrag; } } #endif insert: #ifdef MAC if (!first_frag) mac_ip6q_update(m, q6); #endif /* * Stick new segment in its place; * check for complete reassembly. * Move to front of packet queue, as we are * the most recently active fragmented packet. */ frag6_enq(ip6af, af6->ip6af_up); V_frag6_nfrags++; q6->ip6q_nfrag++; #if 0 /* xxx */ if (q6 != V_ip6q.ip6q_next) { frag6_remque(q6); frag6_insque(q6, &V_ip6q); } #endif next = 0; for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { IP6Q_UNLOCK(); return IPPROTO_DONE; } next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { IP6Q_UNLOCK(); return IPPROTO_DONE; } /* * Reassembly is complete; concatenate fragments. */ ip6af = q6->ip6q_down; t = m = IP6_REASS_MBUF(ip6af); af6 = ip6af->ip6af_down; frag6_deq(ip6af); while (af6 != (struct ip6asfrag *)q6) { af6dwn = af6->ip6af_down; frag6_deq(af6); while (t->m_next) t = t->m_next; m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset); m_cat(t, IP6_REASS_MBUF(af6)); free(af6, M_FTABLE); af6 = af6dwn; } /* adjust offset to point where the original next header starts */ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); free(ip6af, M_FTABLE); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); if (q6->ip6q_ecn == IPTOS_ECN_CE) ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); nxt = q6->ip6q_nxt; #ifdef notyet *q6->ip6q_nxtp = (u_char)(nxt & 0xff); #endif if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { frag6_remque(q6); V_frag6_nfrags -= q6->ip6q_nfrag; #ifdef MAC mac_ip6q_destroy(q6); #endif free(q6, M_FTABLE); V_frag6_nfragpackets--; goto dropfrag; } /* * Store NXT to the original. */ { char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */ *prvnxtp = nxt; } frag6_remque(q6); V_frag6_nfrags -= q6->ip6q_nfrag; #ifdef MAC mac_ip6q_reassemble(q6, m); mac_ip6q_destroy(q6); #endif free(q6, M_FTABLE); V_frag6_nfragpackets--; if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ int plen = 0; for (t = m; t; t = t->m_next) plen += t->m_len; m->m_pkthdr.len = plen; } #ifdef RSS mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), M_NOWAIT); if (mtag == NULL) goto dropfrag; ip6dc = (struct ip6_direct_ctx *)(mtag + 1); ip6dc->ip6dc_nxt = nxt; ip6dc->ip6dc_off = offset; m_tag_prepend(m, mtag); #endif IP6Q_UNLOCK(); IP6STAT_INC(ip6s_reassembled); in6_ifstat_inc(dstifp, ifs6_reass_ok); #ifdef RSS /* * Queue/dispatch for reprocessing. */ netisr_dispatch(NETISR_IPV6_DIRECT, m); return IPPROTO_DONE; #endif /* * Tell launch routine the next header */ *mp = m; *offp = offset; return nxt; dropfrag: IP6Q_UNLOCK(); in6_ifstat_inc(dstifp, ifs6_reass_fail); IP6STAT_INC(ip6s_fragdropped); m_freem(m); return IPPROTO_DONE; } /* * Free a fragment reassembly header and all * associated datagrams. */ void frag6_freef(struct ip6q *q6) { struct ip6asfrag *af6, *down6; IP6Q_LOCK_ASSERT(); for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = down6) { struct mbuf *m = IP6_REASS_MBUF(af6); down6 = af6->ip6af_down; frag6_deq(af6); /* * Return ICMP time exceeded error for the 1st fragment. * Just free other fragments. */ if (af6->ip6af_off == 0) { struct ip6_hdr *ip6; /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* restore source and destination addresses */ ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; icmp6_error(m, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_REASSEMBLY, 0); } else m_freem(m); free(af6, M_FTABLE); } frag6_remque(q6); V_frag6_nfrags -= q6->ip6q_nfrag; #ifdef MAC mac_ip6q_destroy(q6); #endif free(q6, M_FTABLE); V_frag6_nfragpackets--; } /* * Put an ip fragment on a reassembly chain. * Like insque, but pointers in middle of structure. */ void frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) { IP6Q_LOCK_ASSERT(); af6->ip6af_up = up6; af6->ip6af_down = up6->ip6af_down; up6->ip6af_down->ip6af_up = af6; up6->ip6af_down = af6; } /* * To frag6_enq as remque is to insque. */ void frag6_deq(struct ip6asfrag *af6) { IP6Q_LOCK_ASSERT(); af6->ip6af_up->ip6af_down = af6->ip6af_down; af6->ip6af_down->ip6af_up = af6->ip6af_up; } void frag6_insque(struct ip6q *new, struct ip6q *old) { IP6Q_LOCK_ASSERT(); new->ip6q_prev = old; new->ip6q_next = old->ip6q_next; old->ip6q_next->ip6q_prev= new; old->ip6q_next = new; } void frag6_remque(struct ip6q *p6) { IP6Q_LOCK_ASSERT(); p6->ip6q_prev->ip6q_next = p6->ip6q_next; p6->ip6q_next->ip6q_prev = p6->ip6q_prev; } /* * IPv6 reassembling timer processing; * if a timer expires on a reassembly * queue, discard it. */ void frag6_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); struct ip6q *q6; VNET_LIST_RLOCK_NOSLEEP(); IP6Q_LOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); q6 = V_ip6q.ip6q_next; if (q6) while (q6 != &V_ip6q) { --q6->ip6q_ttl; q6 = q6->ip6q_next; if (q6->ip6q_prev->ip6q_ttl == 0) { IP6STAT_INC(ip6s_fragtimeout); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ frag6_freef(q6->ip6q_prev); } } /* * If we are over the maximum number of fragments * (due to the limit being lowered), drain off * enough to get down to the new limit. */ while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets && V_ip6q.ip6q_prev) { IP6STAT_INC(ip6s_fragoverflow); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ frag6_freef(V_ip6q.ip6q_prev); } CURVNET_RESTORE(); } IP6Q_UNLOCK(); VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Drain off all datagram fragments. */ void frag6_drain(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); if (IP6Q_TRYLOCK() == 0) { VNET_LIST_RUNLOCK_NOSLEEP(); return; } VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); while (V_ip6q.ip6q_next != &V_ip6q) { IP6STAT_INC(ip6s_fragdropped); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ frag6_freef(V_ip6q.ip6q_next); } CURVNET_RESTORE(); } IP6Q_UNLOCK(); VNET_LIST_RUNLOCK_NOSLEEP(); } int ip6_deletefraghdr(struct mbuf *m, int offset, int wait) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct mbuf *t; /* Delete frag6 header. */ if (m->m_len >= offset + sizeof(struct ip6_frag)) { /* This is the only possible case with !PULLDOWN_TEST. */ bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), offset); m->m_data += sizeof(struct ip6_frag); m->m_len -= sizeof(struct ip6_frag); } else { /* This comes with no copy if the boundary is on cluster. */ if ((t = m_split(m, offset, wait)) == NULL) return (ENOMEM); m_adj(t, sizeof(struct ip6_frag)); m_cat(m, t); } return (0); } Index: projects/clang380-import/sys/netinet6/icmp6.c =================================================================== --- projects/clang380-import/sys/netinet6/icmp6.c (revision 293686) +++ projects/clang380-import/sys/netinet6/icmp6.c (revision 293687) @@ -1,2869 +1,2870 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #define MBUF_PRIVATE /* XXXRW: Optimisation tries to avoid M_EXT mbufs */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct domain inet6domain; VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat); VNET_PCPUSTAT_SYSINIT(icmp6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(icmp6stat); #endif /* VIMAGE */ VNET_DECLARE(struct inpcbinfo, ripcbinfo); VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(int, icmp6errppslim); static VNET_DEFINE(int, icmp6errpps_count) = 0; static VNET_DEFINE(struct timeval, icmp6errppslim_last); VNET_DECLARE(int, icmp6_nodeinfo); #define V_ripcbinfo VNET(ripcbinfo) #define V_ripcb VNET(ripcb) #define V_icmp6errppslim VNET(icmp6errppslim) #define V_icmp6errpps_count VNET(icmp6errpps_count) #define V_icmp6errppslim_last VNET(icmp6errppslim_last) #define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) static void icmp6_errcount(int, int); static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *, struct in6_addr *); static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *, struct ifnet **, struct in6_addr *); static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int); static int icmp6_notify_error(struct mbuf **, int, int, int); /* * Kernel module interface for updating icmp6stat. The argument is an index * into icmp6stat treated as an array of u_quad_t. While this encodes the * general layout of icmp6stat into the caller, it doesn't encode its * location, so that future changes to add, for example, per-CPU stats * support won't cause binary compatibility problems for kernel modules. */ void kmod_icmp6stat_inc(int statnum) { counter_u64_add(VNET(icmp6stat)[statnum], 1); } static void icmp6_errcount(int type, int code) { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: ICMP6STAT_INC(icp6s_odst_unreach_noroute); return; case ICMP6_DST_UNREACH_ADMIN: ICMP6STAT_INC(icp6s_odst_unreach_admin); return; case ICMP6_DST_UNREACH_BEYONDSCOPE: ICMP6STAT_INC(icp6s_odst_unreach_beyondscope); return; case ICMP6_DST_UNREACH_ADDR: ICMP6STAT_INC(icp6s_odst_unreach_addr); return; case ICMP6_DST_UNREACH_NOPORT: ICMP6STAT_INC(icp6s_odst_unreach_noport); return; } break; case ICMP6_PACKET_TOO_BIG: ICMP6STAT_INC(icp6s_opacket_too_big); return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: ICMP6STAT_INC(icp6s_otime_exceed_transit); return; case ICMP6_TIME_EXCEED_REASSEMBLY: ICMP6STAT_INC(icp6s_otime_exceed_reassembly); return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: ICMP6STAT_INC(icp6s_oparamprob_header); return; case ICMP6_PARAMPROB_NEXTHEADER: ICMP6STAT_INC(icp6s_oparamprob_nextheader); return; case ICMP6_PARAMPROB_OPTION: ICMP6STAT_INC(icp6s_oparamprob_option); return; } break; case ND_REDIRECT: ICMP6STAT_INC(icp6s_oredirect); return; } ICMP6STAT_INC(icp6s_ounknown); } /* * A wrapper function for icmp6_error() necessary when the erroneous packet * may not contain enough scope zone information. */ void icmp6_error2(struct mbuf *m, int type, int code, int param, struct ifnet *ifp) { struct ip6_hdr *ip6; if (ifp == NULL) return; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif ip6 = mtod(m, struct ip6_hdr *); if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) return; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) return; icmp6_error(m, type, code, param); } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(struct mbuf *m, int type, int code, int param) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; ICMP6STAT_INC(icp6s_error); /* count per-type-code statistics */ icmp6_errcount(type, code); #ifdef M_DECRYPTED /*not openbsd*/ if (m->m_flags & M_DECRYPTED) { ICMP6STAT_INC(icp6s_canterror); goto freeit; } #endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif oip6 = mtod(m, struct ip6_hdr *); /* * If the destination address of the erroneous packet is a multicast * address, or the packet was sent using link-layer multicast, * we should basically suppress sending an error (RFC 2463, Section * 2.4). * We have two exceptions (the item e.2 in that section): * - the Packet Too Big message can be sent for path MTU discovery. * - the Parameter Problem Message that can be allowed an icmp6 error * in the option type field. This check has been done in * ip6_unknown_opt(), so we can just check the type and code. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* * RFC 2463, 2.4 (e.5): source address check. * XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ ICMP6STAT_INC(icp6s_canterror); goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { ICMP6STAT_INC(icp6s_toofreq); goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */ if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; in6_clearscope(&oip6->ip6_src); in6_clearscope(&oip6->ip6_dst); icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and output path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m->m_pkthdr.rcvif = NULL; ICMP6STAT_INC(icp6s_outhist[type]); icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ return; freeit: /* * If we can't tell whether or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ int icmp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp, *n; struct ifnet *ifp; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; int icmp6len = m->m_pkthdr.len - *offp; int code, sum, noff; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; int ip6len, error; ifp = m->m_pkthdr.rcvif; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); if (icmp6len < sizeof(struct icmp6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } /* * Check multicast group membership. * Note: SSM filters are not applied for ICMPv6 traffic. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct in6_multi *inm; inm = in6m_lookup(ifp, &ip6->ip6_dst); if (inm == NULL) { IP6STAT_INC(ip6s_notmember); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); goto freeit; } } /* * calculate the checksum */ #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return IPPROTO_DONE; } #endif code = icmp6->icmp6_code; if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, ip6_sprintf(ip6bufs, &ip6->ip6_src))); ICMP6STAT_INC(icp6s_checksum); goto freeit; } ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); icmp6_ifstat_inc(ifp, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(ifp, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; case ICMP6_DST_UNREACH_ADDR: code = PRC_HOSTDEAD; break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; break; case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; break; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig); /* validation is made in icmp6_mtudisc_update */ code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; break; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(ifp, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: code = PRC_TIMXCEED_INTRANS; break; case ICMP6_TIME_EXCEED_REASSEMBLY: code = PRC_TIMXCEED_REASS; break; default: goto badcode; } goto deliver; break; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(ifp, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; break; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(ifp, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { /* Give up remote */ break; } if (!M_WRITABLE(n) || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; int n0len; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN); n = m_gethdr(M_NOWAIT, n0->m_type); if (n == NULL) { /* Give up remote */ m_freem(n0); break; } m_move_pkthdr(n, n0); /* FIB copied. */ n0len = n0->m_pkthdr.len; /* save for use below */ /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); noff = sizeof(struct ip6_hdr); /* new mbuf contains only ipv6+icmpv6 headers */ n->m_len = noff + sizeof(struct icmp6_hdr); /* * Adjust mbuf. ip6_plen will be adjusted in * ip6_output(). */ m_adj(n0, off + sizeof(struct icmp6_hdr)); /* recalculate complete packet size */ n->m_pkthdr.len = n0len + (noff - off); n->m_next = n0; } else { nip6 = mtod(n, struct ip6_hdr *); IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off, sizeof(*nicmp6)); noff = off; } nicmp6->icmp6_type = ICMP6_ECHO_REPLY; nicmp6->icmp6_code = 0; if (n) { ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]); icmp6_reflect(n, noff); } break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(ifp, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: case MLD_LISTENER_DONE: case MLDV2_LISTENER_REPORT: /* * Drop MLD traffic which is not link-local, has a hop limit * of greater than 1 hop, or which does not have the * IPv6 HBH Router Alert option. * As IPv6 HBH options are stripped in ip6_input() we must * check an mbuf header flag. * XXX Should we also sanity check that these messages * were directed to a link-local multicast prefix? */ if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0) goto freeit; if (mld_input(m, off, icmp6len) != 0) return (IPPROTO_DONE); /* m stays. */ break; case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!V_icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), IPPROTO_DONE); #endif n = m_copy(m, 0, M_COPYALL); if (n) n = ni6_input(n, off); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { struct prison *pr; u_char *p; int maxhlen, hlen; /* * XXX: this combination of flags is pointless, * but should we keep this for compatibility? */ if ((V_icmp6_nodeinfo & 5) != 5) break; if (code != 0) goto badcode; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN); n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { /* Give up remote */ break; } if (!m_dup_pkthdr(n, m, M_NOWAIT)) { /* * Previous code did a blind M_COPY_PKTHDR * and said "just for rcvif". If true, then * we could tolerate the dup failing (due to * the deep copy of the tag chain). For now * be conservative and just fail. */ m_free(n); n = NULL; } maxhlen = M_TRAILINGSPACE(n) - (sizeof(*nip6) + sizeof(*nicmp6) + 4); pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); hlen = strlen(pr->pr_hostname); if (maxhlen > hlen) maxhlen = hlen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); /* meaningless TTL */ bcopy(pr->pr_hostname, p + 4, maxhlen); mtx_unlock(&pr->pr_mtx); noff = sizeof(struct ip6_hdr); n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } if (n) { ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]); icmp6_reflect(n, noff); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* give up local */ /* Send incoming SeND packet to user space. */ if (send_sendso_input_hook != NULL) { IP6_EXTHDR_CHECK(m, off, icmp6len, IPPROTO_DONE); error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); /* -1 == no app on SEND socket */ if (error == 0) return (IPPROTO_DONE); nd6_rs_input(m, off, icmp6len); } else nd6_rs_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { IP6_EXTHDR_CHECK(n, off, icmp6len, IPPROTO_DONE); error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; /* -1 == no app on SEND socket */ nd6_rs_input(n, off, icmp6len); } else nd6_rs_input(n, off, icmp6len); /* m stays. */ break; case ND_ROUTER_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_ra_input(m, off, icmp6len); } else nd6_ra_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_ra_input(n, off, icmp6len); } else nd6_ra_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_ns_input(m, off, icmp6len); } else nd6_ns_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_ns_input(n, off, icmp6len); } else nd6_ns_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_na_input(m, off, icmp6len); } else nd6_na_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_na_input(n, off, icmp6len); } else nd6_na_input(n, off, icmp6len); /* m stays. */ break; case ND_REDIRECT: icmp6_ifstat_inc(ifp, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); icmp6_redirect_input(m, off); } else icmp6_redirect_input(m, off); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; icmp6_redirect_input(n, off); } else icmp6_redirect_input(n, off); /* m stays. */ break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp ? ifp->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(&m, off, icmp6len, code) != 0) { /* In this case, m should've been freed. */ return (IPPROTO_DONE); } break; badcode: ICMP6STAT_INC(icp6s_badcode); break; badlen: ICMP6STAT_INC(icp6s_badlen); break; } /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, *offp); return IPPROTO_DONE; freeit: m_freem(m); return IPPROTO_DONE; } static int icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) { struct mbuf *m = *mp; struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void (*ctlfunc)(int, struct sockaddr *, void *); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ip6_rthdr0 *rth0; int rthlen; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* * When the erroneous packet contains a * routing header, we should examine the * header to determine the final destination. * Otherwise, we can't properly update * information that depends on the final * destination (e.g. path MTU). */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); rth = (struct ip6_rthdr *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif rthlen = (rth->ip6r_len + 1) << 3; /* * XXX: currently there is no * officially defined type other * than type-0. * Note that if the segment left field * is 0, all intermediate hops must * have been passed. */ if (rth->ip6r_segleft && rth->ip6r_type == IPV6_RTHDR_TYPE_0) { int hops; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); rth0 = (struct ip6_rthdr0 *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, struct ip6_rthdr0 *, m, eoff, rthlen); if (rth0 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* just ignore a bogus header */ if ((rth0->ip6r0_len % 2) == 0 && (hops = rth0->ip6r0_len/2)) finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); } eoff += rthlen; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. * XXX: there is no guarantee that the source or destination * addresses of the inner packet are in the same scope as * the addresses of the icmp packet. But there is no other * way to determine the zone. */ eip6 = (struct ip6_hdr *)(icmp6 + 1); bzero(&icmp6dst, sizeof(icmp6dst)); icmp6dst.sin6_len = sizeof(struct sockaddr_in6); icmp6dst.sin6_family = AF_INET6; if (finaldst == NULL) icmp6dst.sin6_addr = eip6->ip6_dst; else icmp6dst.sin6_addr = *finaldst; if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; bzero(&icmp6src, sizeof(icmp6src)); icmp6src.sin6_len = sizeof(struct sockaddr_in6); icmp6src.sin6_family = AF_INET6; icmp6src.sin6_addr = eip6->ip6_src; if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } ctlfunc = (void (*)(int, struct sockaddr *, void *)) (inet6sw[ip6_protox[nxt]].pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, &ip6cp); } } *mp = m; return (0); freeit: m_freem(m); return (-1); } void icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) { struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct in_conninfo inc; #if 0 /* * RFC2460 section 5, last paragraph. * even though minimum link MTU for IPv6 is IPV6_MMTU, * we may see ICMPv6 too big with mtu < IPV6_MMTU * due to packet translator in the middle. * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for * special handling. */ if (mtu < IPV6_MMTU) return; #endif /* * we reject ICMPv6 too big with abnormally small value. * XXX what is the good definition of "abnormally small"? */ if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) return; if (!validated) return; /* * In case the suggested mtu is less than IPV6_MMTU, we * only need to remember that it was for above mentioned * "alwaysfrag" case. * Try to be as close to the spec as possible. */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU - 8; bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) return; if (mtu < tcp_maxmtu6(&inc, NULL)) { tcp_hc_updatemtu(&inc, mtu); ICMP6STAT_INC(icp6s_pmtuchg); } } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ static struct mbuf * ni6_input(struct mbuf *m, int off) { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; struct prison *pr; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct in6_addr in6_subj; /* subject address */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct in6_ifaddr *ia6 = NULL; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return (NULL); } #endif /* * Validate IPv6 source address. * The default configuration MUST be to refuse answering queries from * global-scope addresses according to RFC4602. * Notes: * - it's not very clear what "refuse" means; this implementation * simply drops it. * - it's not very easy to identify global-scope (unicast) addresses * since there are many prefixes for them. It should be safer * and in practice sufficient to check "all" but loopback and * link-local (note that site-local unicast was deprecated and * ULA is defined as global scope-wise) */ if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 && !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) goto bad; /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [RFC4602, Section 5.] */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 == NULL) goto bad; /* XXX impossible */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { ifa_free(&ia6->ia_ifa); nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } ifa_free(&ia6->ia_ifa); } /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(struct in6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (caddr_t)&in6_subj); if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL)) goto bad; subj = (char *)&in6_subj; if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, * if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), 0); mtx_unlock(&pr->pr_mtx); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0) goto bad; break; case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; case NI_QTYPE_IPV4ADDR: /* unsupported - should respond with unknown Qtype? */ break; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* Allocate an mbuf to reply. */ if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } if (replylen > MHLEN) n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR); else n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(m); return (NULL); } m_move_pkthdr(n, m); /* just for recvif and FIB */ n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); bcopy(&v, nni6 + 1, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in hostname? */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n->m_next = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), oldfqdn); mtx_unlock(&pr->pr_mtx); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); /* XXX: reset mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: break; /* XXX impossible! */ } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return (n); bad: m_freem(m); if (n) m_freem(n); return (NULL); } /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. * * old - return pascal string if non-zero */ static struct mbuf * ni6_nametodns(const char *name, int namelen, int old) { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */ if (len > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) goto fail; if (old) { m->m_len = len; *mtod(m, char *) = namelen; bcopy(name, mtod(m, char *) + 1, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p && *p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; bcopy(p, cp, i); cp += i; p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(const char *a, int alen, const char *b, int blen) { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && bcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (bcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, struct in6_addr *subj) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return (0); break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return (0); } } IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { addrsofif = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } addrsofif++; /* count the address */ } IF_ADDR_RUNLOCK(ifp); if (iffound) { *ifpp = ifp; IFNET_RUNLOCK_NOSLEEP(); return (addrsofif); } addrs += addrsofif; } IFNET_RUNLOCK_NOSLEEP(); return (addrs); } static int ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, struct ifnet *ifp0, int resid) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return (0); /* needless to copy */ IFNET_RLOCK_NOSLEEP(); ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); again: for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { IF_ADDR_RUNLOCK(ifp); /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. */ if (ifa6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > time_uptime) ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime); else ltime = 0; } bcopy(<ime, cp, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ifa6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); in6_clearscope((struct in6_addr *)cp); /* XXX */ cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } IF_ADDR_RUNLOCK(ifp); if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(struct mbuf **mp, int off) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct inpcb *in6p; struct inpcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return (IPPROTO_DONE); } #endif /* * XXX: the address may have embedded scope zone ID, which should be * hidden from applications. */ bzero(&fromsa, sizeof(fromsa)); fromsa.sin6_family = AF_INET6; fromsa.sin6_len = sizeof(struct sockaddr_in6); fromsa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&fromsa)) { m_freem(m); return (IPPROTO_DONE); } INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p != IPPROTO_ICMPV6) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; INP_RLOCK(in6p); if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) { INP_RUNLOCK(in6p); continue; } if (last != NULL) { struct mbuf *n = NULL; /* * Recent network drivers tend to allocate a single * mbuf cluster, rather than to make a couple of * mbufs without clusters. Also, since the IPv6 code * path tries to avoid m_pullup(), it is highly * probable that we still have an mbuf cluster here * even though the necessary length can be stored in an * mbuf's internal buffer. * Meanwhile, the default size of the receive socket * buffer for raw sockets is not so large. This means * the possibility of packet loss is relatively higher * than before. To avoid this scenario, we copy the * received data to a separate mbuf that does not use * a cluster, if possible. * XXX: it is better to copy the data after stripping * intermediate headers. */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; } else { m_free(n); n = NULL; } } } if (n != NULL || (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, off); SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked( &last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) { m_freem(opts); } SOCKBUF_UNLOCK( &last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, m, &opts); /* strip intermediate headers */ m_adj(m, off); /* avoid using mbuf clusters if possible (see above) */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { struct mbuf *n; n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; m_freem(m); m = n; } else { m_freem(n); n = NULL; } } } SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); INP_RUNLOCK(last); } else { m_freem(m); IP6STAT_DEC(ip6s_delivered); } return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ void icmp6_reflect(struct mbuf *m, size_t off) { - struct in6_addr src, *srcp = NULL; + struct in6_addr src6, *srcp; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia = NULL; struct ifnet *outif = NULL; int plen; - int type, code; + int type, code, hlim; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ #ifdef DIAGNOSTIC if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) panic("assumption failed in icmp6_reflect"); #endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ + hlim = 0; + srcp = NULL; /* * If the incoming packet was addressed directly to us (i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia != NULL && !(ia->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) - srcp = &ia->ia_addr.sin6_addr; + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { + src6 = ia->ia_addr.sin6_addr; + srcp = &src6; + + if (m->m_pkthdr.rcvif != NULL) { + /* XXX: This may not be the outgoing interface */ + hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; + } else + hlim = V_ip6_defhlim; + } + if (ia != NULL) + ifa_free(&ia->ia_ifa); } if (srcp == NULL) { - int e; - struct sockaddr_in6 sin6; + int error; + struct in6_addr dst6; + uint32_t scopeid; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ + in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, NULL, &src6, &hlim); - e = in6_selectsrc(&sin6, NULL, NULL, NULL, &outif, &src); - if (e) { + if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &sin6.sin6_addr), e)); + ip6_sprintf(ip6buf, &ip6->ip6_dst), error)); goto bad; } - srcp = &src; + srcp = &src6; } /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; ip6->ip6_src = *srcp; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; - if (outif) - ip6->ip6_hlim = ND_IFINFO(outif)->chlim; - else if (m->m_pkthdr.rcvif) { - /* XXX: This may not be the outgoing interface */ - ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; - } else - ip6->ip6_hlim = V_ip6_defhlim; + ip6->ip6_hlim = hlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); - if (ia != NULL) - ifa_free(&ia->ia_ifa); return; bad: - if (ia != NULL) - ifa_free(&ia->ia_ifa); m_freem(m); return; } void icmp6_fasttimo(void) { mld_fasttimo(); } void icmp6_slowtimo(void) { mld_slowtimo(); } static const char * icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6, struct in6_addr *tgt6) { static char buf[1024]; char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; char ip6buft[INET6_ADDRSTRLEN]; snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6), ip6_sprintf(ip6buft, tgt6)); return buf; } void icmp6_redirect_input(struct mbuf *m, int off) { struct ifnet *ifp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; char ip6buf[INET6_ADDRSTRLEN]; M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__)); ifp = m->m_pkthdr.rcvif; /* XXX if we are router, we don't update route by icmp6 redirect */ if (V_ip6_forwarding) goto freeit; if (!V_icmp6_rediraccept) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) || in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) { goto freeit; } /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", ip6_sprintf(ip6buf, &src6))); goto bad; } if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim)); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct nhop6_basic nh6; struct in6_addr kdst; uint32_t scopeid; in6_splitscope(&reddst6, &kdst, &scopeid); if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){ if ((nh6.nh_flags & NHF_GATEWAY) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(ip6buf, &nh6.nh_addr), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } else { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n", __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", __func__, ip6_sprintf(ip6buf, &redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* Validation passed. */ /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); /* * Install a gateway route in the better-router case or an interface * route in the on-link-destination case. */ { struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; struct sockaddr *gw; int rt_flags; u_int fibnum; bzero(&sdst, sizeof(sdst)); bzero(&ssrc, sizeof(ssrc)); sdst.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rt_flags = RTF_HOST; if (is_router) { bzero(&sgw, sizeof(sgw)); sgw.sin6_family = AF_INET6; sgw.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); gw = (struct sockaddr *)&sgw; rt_flags |= RTF_GATEWAY; } else gw = ifp->if_addr->ifa_addr; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) in6_rtredirect((struct sockaddr *)&sdst, gw, (struct sockaddr *)NULL, rt_flags, (struct sockaddr *)&ssrc, fibnum); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; bzero(&sdst, sizeof(sdst)); sdst.sin6_family = AF_INET6; sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); } freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badredirect); m_freem(m); } void icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct m_tag *mtag; struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; struct llentry *ln = NULL; size_t maxlen; u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; icmp6_errcount(ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!V_ip6_forwarding) goto fail; /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = sip6->ip6_src; if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto fail; M_SETFIB(m, rt->rt_fibnum); maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; ifp_ll6 = &ia->ia_addr.sin6_addr; /* XXXRW: reference released prematurely. */ ifa_free(&ia->ia_ifa); } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; } else router_ll6 = (struct in6_addr *)NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!router_ll6) goto fail; bcopy(router_ll6, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); if (!router_ll6) goto nolladdropt; { /* target lladdr option */ int len; struct nd_opt_hdr *nd_opt; char *lladdr; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(router_ll6, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; if (ln->la_flags & LLE_VALID) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); bcopy(ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } } nolladdropt: if (ln != NULL) LLE_RUNLOCK(ln); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ #ifdef M_DECRYPTED /*not openbsd*/ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; #endif if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; /* This is just for simplicity. */ if (m0->m_pkthdr.len != m0->m_len) { if (m0->m_next) { m_freem(m0->m_next); m0->m_next = NULL; } m0->m_pkthdr.len = m0->m_len; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, * the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by * original ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (m0->m_next || m0->m_pkthdr.len != m0->m_len) panic("assumption failed in %s:%d", __FILE__, __LINE__); if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } else { /* enough room, pad or truncate */ size_t extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* pad if easy enough, truncate if not */ if (8 - extra <= M_TRAILINGSPACE(m0)) { /* pad */ m0->m_len += (8 - extra); m0->m_pkthdr.len += (8 - extra); } else { /* truncate */ m0->m_pkthdr.len -= extra; m0->m_len -= extra; } } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; bzero(nd_opt_rh, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m_tag_delete_chain(m0, NULL); m0->m_flags &= ~M_PKTHDR; m->m_next = m0; m->m_pkthdr.len = m->m_len + m0->m_len; m0 = NULL; } noredhdropt:; if (m0) { m_freem(m0); m0 = NULL; } /* XXX: clear embedded link IDs in the inner header */ in6_clearscope(&sip6->ip6_src); in6_clearscope(&sip6->ip6_dst); in6_clearscope(&nd_rd->nd_rd_target); in6_clearscope(&nd_rd->nd_rd_dst); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto fail; *(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type; m_tag_prepend(m, mtag); } /* send the packet to outside... */ ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]); return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0; int optlen; struct inpcb *inp = sotoinpcb(so); int level, op, optname; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; } else level = op = optname = optlen = 0; if (level != IPPROTO_ICMPV6) { return EINVAL; } switch (op) { case PRCO_SETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; if (optlen != sizeof(ic6f)) { error = EMSGSIZE; break; } error = sooptcopyin(sopt, &ic6f, optlen, optlen); if (error == 0) { INP_WLOCK(inp); *inp->in6p_icmp6filt = ic6f; INP_WUNLOCK(inp); } break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; INP_RLOCK(inp); ic6f = *inp->in6p_icmp6filt; INP_RUNLOCK(inp); error = sooptcopyout(sopt, &ic6f, sizeof(ic6f)); break; } default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? * * dst - not used at this moment * type - not used at this moment * code - not used at this moment */ static int icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code) { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count, V_icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } Index: projects/clang380-import/sys/netinet6/in6_fib.c =================================================================== --- projects/clang380-import/sys/netinet6/in6_fib.c (revision 293686) +++ projects/clang380-import/sys/netinet6/in6_fib.c (revision 293687) @@ -1,268 +1,275 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #ifdef INET6 static void fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6); static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst, uint32_t flags, struct nhop6_basic *pnh6); static struct ifnet *fib6_get_ifaifp(struct rtentry *rte); #define RNTORT(p) ((struct rtentry *)(p)) /* * Gets real interface for the @rte. * Returns rt_ifp for !IFF_LOOPBACK routers. * Extracts "real" address interface from interface address * loopback routes. */ static struct ifnet * fib6_get_ifaifp(struct rtentry *rte) { struct ifnet *ifp; struct sockaddr_dl *sdl; ifp = rte->rt_ifp; if ((ifp->if_flags & IFF_LOOPBACK) && rte->rt_gateway->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)rte->rt_gateway; return (ifnet_byindex(sdl->sdl_index)); } return (ifp); } static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst, uint32_t flags, struct nhop6_basic *pnh6) { struct sockaddr_in6 *gw; /* Do explicit nexthop zero unless we're copying it */ memset(pnh6, 0, sizeof(*pnh6)); if ((flags & NHR_IFAIF) != 0) pnh6->nh_ifp = fib6_get_ifaifp(rte); else pnh6->nh_ifp = rte->rt_ifp; pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in6 *)rte->rt_gateway; pnh6->nh_addr = gw->sin6_addr; in6_clearscope(&pnh6->nh_addr); } else pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in6 *)rt_key(rte); if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr)) pnh6->nh_flags |= NHF_DEFAULT; } static void fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6) { struct sockaddr_in6 *gw; /* Do explicit nexthop zero unless we're copying it */ memset(pnh6, 0, sizeof(*pnh6)); if ((flags & NHR_IFAIF) != 0) pnh6->nh_ifp = fib6_get_ifaifp(rte); else pnh6->nh_ifp = rte->rt_ifp; pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in6 *)rte->rt_gateway; pnh6->nh_addr = gw->sin6_addr; in6_clearscope(&pnh6->nh_addr); } else pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in6 *)rt_key(rte); if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr)) pnh6->nh_flags |= NHF_DEFAULT; } /* * Performs IPv6 route table lookup on @dst. Returns 0 on success. * Stores basic nexthop info into provided @pnh6 structure. * Note that * - nh_ifp represents logical transmit interface (rt_ifp) by default * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - mtu from logical transmit interface will be returned. * - nh_ifp cannot be safely dereferenced * - nh_ifp represents rt_ifp (e.g. if looking up address on * interface "ix0" pointer to "ix0" interface will be returned instead * of "lo0") * - howewer mtu from "transmit" interface will be returned. * - scope will be embedded in nh_addr */ int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6) { struct radix_node_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; struct rtentry *rte; KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_addr = *dst; sin6.sin6_len = sizeof(struct sockaddr_in6); /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RADIX_NODE_HEAD_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6); RADIX_NODE_HEAD_RUNLOCK(rh); return (0); } } RADIX_NODE_HEAD_RUNLOCK(rh); return (ENOENT); } /* * Performs IPv6 route table lookup on @dst. Returns 0 on success. * Stores extended nexthop info into provided @pnh6 structure. * Note that * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. * - in that case you need to call fib6_free_nh_ext() * - nh_ifp represents logical transmit interface (rt_ifp) by default * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - mtu from logical transmit interface will be returned. * - scope will be embedded in nh_addr */ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6) { struct radix_node_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; struct rtentry *rte; KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RADIX_NODE_HEAD_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); +#ifdef RADIX_MPATH + rte = rt_mpath_select(rte, flowid); + if (rte == NULL) { + RADIX_NODE_HEAD_RUNLOCK(rh); + return (ENOENT); + } +#endif /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags, pnh6); if ((flags & NHR_REF) != 0) { /* TODO: Do lwref on egress ifp's */ } RADIX_NODE_HEAD_RUNLOCK(rh); return (0); } } RADIX_NODE_HEAD_RUNLOCK(rh); return (ENOENT); } void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6) { } #endif Index: projects/clang380-import/sys/netinet6/in6_pcb.c =================================================================== --- projects/clang380-import/sys/netinet6/in6_pcb.c (revision 293686) +++ projects/clang380-import/sys/netinet6/in6_pcb.c (revision 293687) @@ -1,1281 +1,1275 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pcbgroup.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *); int in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { struct socket *so = inp->inp_socket; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int error, lookupflags = 0; int reuseport = (so->so_options & SO_REUSEPORT); INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip6(cred, &inp->in6p_laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); } else { sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof(*sin6)) return (EINVAL); /* * family check. */ if (nam->sa_family != AF_INET6) return (EAFNOSUPPORT); if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if ((error = prison_local_ip6(cred, &sin6->sin6_addr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); lport = sin6->sin6_port; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ifa; sin6->sin6_port = 0; /* yech... */ if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL && (inp->inp_flags & INP_BINDANY) == 0) { return (EADDRNOTAVAIL); } /* * XXX: bind to an anycast address might accidentally * cause sending a packet with anycast source address. * We should allow to bind to a deprecated address, since * the application dares to use it. */ if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { ifa_free(ifa); return (EADDRNOTAVAIL); } if (ifa != NULL) ifa_free(ifa); } if (lport) { struct inpcb *t; struct tcptw *tw; /* GROSS */ if (ntohs(lport) <= V_ipport_reservedhigh && ntohs(lport) >= V_ipport_reservedlow && priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) return (EACCES); if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT, 0) != 0) { t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_flags2 & INP_REUSEPORT) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); /* * If the socket is a BINDMULTI socket, then * the credentials need to match and the * original socket also has to have been bound * with BINDMULTI. */ if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } #endif } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, lookupflags, cred); if (t && (t->inp_flags & INP_TIMEWAIT)) { /* * XXXRW: If an incpb has had its timewait * state recycled, we treat the address as * being in use (for now). This is better * than a panic, but not desirable. */ tw = intotw(t); if (tw == NULL || (reuseport & tw->tw_so_options) == 0) return (EADDRINUSE); } else if (t && (reuseport & inp_so_options(t)) == 0) { return (EADDRINUSE); } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, lookupflags, cred); if (t && t->inp_flags & INP_TIMEWAIT) { tw = intotw(t); if (tw == NULL) return (EADDRINUSE); if ((reuseport & tw->tw_so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || ((inp->inp_vflag & INP_IPV6PROTO) == (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (reuseport & inp_so_options(t)) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_vflag & INP_IPV6PROTO) != 0)) return (EADDRINUSE); } #endif } inp->in6p_laddr = sin6->sin6_addr; } if (lport == 0) { if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; return (error); } } else { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } } return (0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new * in6_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in6_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ static int in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, struct in6_addr *plocal_addr6) { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error = 0; - struct ifnet *ifp = NULL; int scope_ambiguous = 0; struct in6_addr in6a; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ if (nam->sa_len != sizeof (*sin6)) return (EINVAL); if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) sin6->sin6_addr = in6addr_loopback; } if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) return (error); - error = in6_selectsrc(sin6, inp->in6p_outputopts, - inp, inp->inp_cred, &ifp, &in6a); + error = in6_selectsrc_socket(sin6, inp->in6p_outputopts, + inp, inp->inp_cred, scope_ambiguous, &in6a, NULL); if (error) return (error); - if (ifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { - return(error); - } - /* * Do not update this earlier, in case we return with an error. * - * XXX: this in6_selectsrc result might replace the bound local + * XXX: this in6_selectsrc_socket result might replace the bound local * address with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ *plocal_addr6 = in6a; /* * Don't do pcblookup call here; return interface in * plocal_addr6 * and exit to caller, that will do the lookup. */ return (0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam, struct ucred *cred, struct mbuf *m) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_addr addr6; int error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. */ if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return (error); if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, cred); if (error) return (error); } inp->in6p_laddr = addr6; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; if (inp->inp_flags & IN6P_AUTOFLOWLABEL) inp->inp_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); in_pcbrehash_mbuf(inp, m); return (0); } int in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { return (in6_pcbconnect_mbuf(inp, nam, cred, NULL)); } void in6_pcbdisconnect(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); } struct sockaddr * in6_sockaddr(in_port_t port, struct in6_addr *addr_p) { struct sockaddr_in6 *sin6; sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK); bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ return (struct sockaddr *)sin6; } struct sockaddr * in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p) { struct sockaddr_in sin; struct sockaddr_in6 *sin6_p; bzero(&sin, sizeof sin); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = port; sin.sin_addr = *addr_p; sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK); in6_sin_2_v4mapsin6(&sin, sin6_p); return (struct sockaddr *)sin6_p; } int in6_getsockaddr(struct socket *so, struct sockaddr **nam) { register struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_lport; addr = inp->in6p_laddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_fport; addr = inp->in6p_faddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getsockaddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif { /* scope issues will be handled in in6_getsockaddr(). */ error = in6_getsockaddr(so, nam); } return error; } int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getpeeraddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif /* scope issues will be handled in in6_getpeeraddr(). */ error = in6_getpeeraddr(so, nam); return error; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. */ void in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg, struct inpcb *(*notify)(struct inpcb *, int)) { struct inpcb *inp, *inp_temp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; int errno; if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return; sa6_dst = (struct sockaddr_in6 *)dst; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return; /* * note that src can be NULL when we get notify by local fragmentation. */ sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; flowinfo = sa6_src.sin6_flowinfo; /* * Redirects go to all references to the destination, * and use in6_rtchange to invalidate the route cache. * Dead host indications: also use in6_rtchange to invalidate * the cache, and deliver the error to all the sockets. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); if ((inp->inp_vflag & INP_IPV6) == 0) { INP_WUNLOCK(inp); continue; } /* * If the error designates a new path MTU for a destination * and the application (associated with this socket) wanted to * know the value, notify. * XXX: should we avoid to notify the value to TCP sockets? */ if (cmd == PRC_MSGSIZE && cmdarg != NULL) ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, *(u_int32_t *)cmdarg); /* * Detect if we should notify the error. If no source and * destination ports are specifed, but non-zero flowinfo and * local address match, notify the error. This is the case * when the error is delivered with an encrypted buffer * by ESP. Otherwise, just compare addresses and ports * as usual. */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr) || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) { INP_WUNLOCK(inp); continue; } do_notify: if (notify) { if ((*notify)(inp, errno)) INP_WUNLOCK(inp); } else INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } /* * Lookup a PCB based on the local address and port. Caller must hold the * hash lock. No inpcb locks or references are acquired. */ struct inpcb * in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, u_short lport, int lookupflags, struct ucred *cred) { register struct inpcb *inp; int matchwild = 3, wildcard; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_WLOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_lport == lport) { /* Found. */ if (cred == NULL || prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) continue; /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) wildcard++; if (!IN6_IS_ADDR_UNSPECIFIED( &inp->in6p_laddr)) { if (IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; else if (!IN6_ARE_ADDR_EQUAL( &inp->in6p_laddr, laddr)) continue; } else { if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) break; } } } return (match); } } void in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *in6p; struct ip6_moptions *im6o; int i, gap; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(in6p); im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { /* * Unselect the outgoing ifp for multicast if it * is being detached. */ if (im6o->im6o_multicast_ifp == ifp) im6o->im6o_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ gap = 0; for (i = 0; i < im6o->im6o_num_memberships; i++) { if (im6o->im6o_membership[i]->in6m_ifp == ifp) { in6_mc_leave(im6o->im6o_membership[i], NULL); gap++; } else if (gap != 0) { im6o->im6o_membership[i - gap] = im6o->im6o_membership[i]; } } im6o->im6o_num_memberships -= gap; } INP_WUNLOCK(in6p); } INP_INFO_WUNLOCK(pcbinfo); } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(struct inpcb *in6p) { /* * We don't store route pointers in the routing table anymore */ return; } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ struct inpcb * in6_rtchange(struct inpcb *inp, int errno) { /* * We don't store route pointers in the routing table anymore */ return inp; } #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. */ static struct inpcb * in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ tmpinp = NULL; INP_GROUP_LOCK(pcbgroup); head = &pcbgroup->ipg_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP6)) goto found; if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) { inp = tmpinp; goto found; } /* * Then look for a wildcard match in the pcbgroup. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbgroup->ipg_hashbase[ INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) goto found; else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; if (inp != NULL) goto found; } /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_wildbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_wildmask)]; LIST_FOREACH(inp, head, inp_pcbgroup_wild) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) goto found; else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; if (inp != NULL) goto found; } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ INP_GROUP_UNLOCK(pcbgroup); return (NULL); found: in_pcbref(inp); INP_GROUP_UNLOCK(pcbgroup); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking buf", __func__); return (inp); } #endif /* PCBGROUP */ /* * Lookup PCB in hash list. */ static struct inpcb * in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP6)) return (inp); if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) return (tmpinp); /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) return (inp); else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ if (jail_wild != NULL) return (jail_wild); if (local_exact != NULL) return (local_exact); if (local_wild != NULL) return (local_wild); } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ /* * Not found. */ return (NULL); } /* * Lookup PCB in hash list, using pcbinfo tables. This variation locks the * hash list lock, and will return the inpcb locked (i.e., requires * INPLOOKUP_LOCKPCB). */ static struct inpcb * in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { struct inpcb *inp; INP_HASH_RLOCK(pcbinfo); inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); if (inp != NULL) { in_pcbref(inp); INP_HASH_RUNLOCK(pcbinfo); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking bug", __func__); } else INP_HASH_RUNLOCK(pcbinfo); return (inp); } /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. * * Possibly more of this logic should be in in6_pcbgroup.c. */ struct inpcb * in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { #if defined(PCBGROUP) && !defined(RSS) struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); /* * When not using RSS, use connection groups in preference to the * reservation table when looking up 4-tuples. When using RSS, just * use the reservation table, due to the cost of the Toeplitz hash * in software. * * XXXRW: This policy belongs in the pcbgroup code, as in principle * we could be doing RSS with a non-Toeplitz hash that is affordable * in software. */ #if defined(PCBGROUP) && !defined(RSS) if (in_pcbgroup_enabled(pcbinfo)) { pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); } #endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } struct inpcb * in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp, struct mbuf *m) { #ifdef PCBGROUP struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP /* * If we can use a hardware-generated hash to look up the connection * group, use that connection group to find the inpcb. Otherwise * fall back on a software hash -- or the reservation table if we're * using RSS. * * XXXRW: As above, that policy belongs in the pcbgroup code. */ if (in_pcbgroup_enabled(pcbinfo) && M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) { pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), m->m_pkthdr.flowid); if (pcbgroup != NULL) return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #ifndef RSS pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #endif } #endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) { struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ return; } Index: projects/clang380-import/sys/netinet6/in6_rmx.c =================================================================== --- projects/clang380-import/sys/netinet6/in6_rmx.c (revision 293686) +++ projects/clang380-import/sys/netinet6/in6_rmx.c (revision 293687) @@ -1,311 +1,283 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $ */ /*- * Copyright 1994, 1995 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int in6_inithead(void **head, int off); #ifdef VIMAGE extern int in6_detachhead(void **head, int off); #endif /* * Do what we need to do when inserting a route. */ static struct radix_node * in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct radix_node *treenodes) { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); - struct radix_node *ret; RADIX_NODE_HEAD_WLOCK_ASSERT(head); if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) rt->rt_flags |= RTF_MULTICAST; /* * A little bit of help for both IPv6 output and input: * For local addresses, we make sure that RTF_LOCAL is set, * with the thought that this might one day be used to speed up * ip_input(). * * We also mark routes to multicast addresses as such, because * it's easy to do and might be useful (but this is much more * dubious since it's so easy to inspect the address). (This * is done above.) * * XXX * should elaborate the code. */ if (rt->rt_flags & RTF_HOST) { if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr) ->sin6_addr, &sin6->sin6_addr)) { rt->rt_flags |= RTF_LOCAL; } } if (rt->rt_ifp != NULL) { /* * Check route MTU: * inherit interface MTU if not set or * check if MTU is too large. */ if (rt->rt_mtu == 0) { rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp); } else if (rt->rt_mtu > IN6_LINKMTU(rt->rt_ifp)) rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp); } - ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL) { - struct rtentry *rt2; - /* - * We are trying to add a net route, but can't. - * The following case should be allowed, so we'll make a - * special check for this: - * Two IPv6 addresses with the same prefix is assigned - * to a single interrface. - * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) - * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) - * In this case, (*1) and (*2) want to add the same - * net route entry, 3ffe:0501:: -> if0. - * This case should not raise an error. - */ - rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED, - rt->rt_fibnum); - if (rt2) { - if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) - && rt2->rt_gateway - && rt2->rt_gateway->sa_family == AF_LINK - && rt2->rt_ifp == rt->rt_ifp) { - ret = rt2->rt_nodes; - } - RTFREE_LOCKED(rt2); - } - } - return (ret); + return (rn_addroute(v_arg, n_arg, head, treenodes)); } /* * Age old PMTUs. */ struct mtuex_arg { struct radix_node_head *rnh; time_t nextstop; }; static VNET_DEFINE(struct callout, rtq_mtutimer); #define V_rtq_mtutimer VNET(rtq_mtutimer) static int in6_mtuexpire(struct rtentry *rt, void *rock) { struct mtuex_arg *ap = rock; if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) { if (rt->rt_expire <= time_uptime) { rt->rt_flags |= RTF_PROBEMTU; } else { ap->nextstop = lmin(ap->nextstop, rt->rt_expire); } } return (0); } #define MTUTIMO_DEFAULT (60*1) static void in6_mtutimo_setwa(struct radix_node_head *rnh, uint32_t fibum, int af, void *_arg) { struct mtuex_arg *arg; arg = (struct mtuex_arg *)_arg; arg->rnh = rnh; } static void in6_mtutimo(void *rock) { CURVNET_SET_QUIET((struct vnet *) rock); struct timeval atv; struct mtuex_arg arg; rt_foreach_fib_walk(AF_INET6, in6_mtutimo_setwa, in6_mtuexpire, &arg); atv.tv_sec = MTUTIMO_DEFAULT; atv.tv_usec = 0; callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock); CURVNET_RESTORE(); } /* * Initialize our routing tree. */ static VNET_DEFINE(int, _in6_rt_was_here); #define V__in6_rt_was_here VNET(_in6_rt_was_here) int in6_inithead(void **head, int off) { struct radix_node_head *rnh; if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3)) return (0); rnh = *head; RADIX_NODE_HEAD_LOCK_INIT(rnh); rnh->rnh_addaddr = in6_addroute; if (V__in6_rt_was_here == 0) { callout_init(&V_rtq_mtutimer, 1); in6_mtutimo(curvnet); /* kick off timeout first time */ V__in6_rt_was_here = 1; } return (1); } #ifdef VIMAGE int in6_detachhead(void **head, int off) { callout_drain(&V_rtq_mtutimer); return (rn_detachhead(head)); } #endif /* * Extended API for IPv6 FIB support. */ void in6_rtredirect(struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *nm, int flags, struct sockaddr *src, u_int fibnum) { rtredirect_fib(dst, gw, nm, flags, src, fibnum); } int in6_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *mask, int flags, struct rtentry **ret_nrt, u_int fibnum) { return (rtrequest_fib(req, dst, gw, mask, flags, ret_nrt, fibnum)); } void in6_rtalloc(struct route_in6 *ro, u_int fibnum) { rtalloc_ign_fib((struct route *)ro, 0ul, fibnum); } void in6_rtalloc_ign(struct route_in6 *ro, u_long ignflags, u_int fibnum) { rtalloc_ign_fib((struct route *)ro, ignflags, fibnum); } struct rtentry * in6_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { return (rtalloc1_fib(dst, report, ignflags, fibnum)); } Index: projects/clang380-import/sys/netinet6/in6_src.c =================================================================== --- projects/clang380-import/sys/netinet6/in6_src.c (revision 293686) +++ projects/clang380-import/sys/netinet6/in6_src.c (revision 293687) @@ -1,1168 +1,1244 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct mtx addrsel_lock; #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) static struct sx addrsel_sxlock; #define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") #define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) #define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) #define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) #define ADDR_LABEL_NOTAPP (-1) static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); #define V_defaultaddrpolicy VNET(defaultaddrpolicy) VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **, int, u_int); static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct ifnet **, struct ifnet *, u_int); +static int in6_selectsrc(uint32_t, struct sockaddr_in6 *, + struct ip6_pktopts *, struct inpcb *, struct ucred *, + struct ifnet **, struct in6_addr *); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); static void init_policy_queue(void); static int add_addrsel_policyent(struct in6_addrpolicy *); static int delete_addrsel_policyent(struct in6_addrpolicy *); static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *), void *); static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); /* * Return an IPv6 address, which is the most appropriate for a given * destination and user specified options. * If necessary, this function lookups the routing table and returns * an entry to the caller for later use. */ #define REPLACE(r) do {\ IP6STAT_INC(ip6s_sources_rule[(r)]); \ rule = (r); \ /* { \ char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ } */ \ goto replace; \ } while(0) #define NEXT(r) do {\ /* { \ char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ } */ \ goto next; /* XXX: we can't use 'continue' here */ \ } while(0) #define BREAK(r) do { \ IP6STAT_INC(ip6s_sources_rule[(r)]); \ rule = (r); \ goto out; /* XXX: we can't use 'break' here */ \ } while(0) -int -in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct inpcb *inp, struct ucred *cred, +static int +in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp) { struct rm_priotracker in6_ifa_tracker; struct in6_addr dst, tmp; struct ifnet *ifp = NULL, *oifp = NULL; struct in6_ifaddr *ia = NULL, *ia_best = NULL; struct in6_pktinfo *pi = NULL; int dst_scope = -1, best_scope = -1, best_matchlen = -1; struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; u_int32_t odstzone; int prefer_tempaddr; int error, rule; struct ip6_moptions *mopts; KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); dst = dstsock->sin6_addr; /* make a copy for local operation */ if (ifpp) { /* * Save a possibly passed in ifp for in6_selectsrc. Only * neighbor discovery code should use this feature, where * we may know the interface but not the FIB number holding * the connected subnet in case someone deleted it from the * default FIB and we need to check the interface. */ if (*ifpp != NULL) oifp = *ifpp; *ifpp = NULL; } if (inp != NULL) { INP_LOCK_ASSERT(inp); mopts = inp->in6p_moptions; } else { mopts = NULL; } /* * If the source address is explicitly specified by the caller, * check if the requested source address is indeed a unicast address * assigned to the node, and can be used as the packet's source * address. If everything is okay, use the address as source. */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { struct sockaddr_in6 srcsock; struct in6_ifaddr *ia6; /* get the outgoing interface */ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, - (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) + fibnum)) != 0) return (error); /* * determine the appropriate zone id of the source based on * the zone of the destination and the outgoing interface. * If the specified address is ambiguous wrt the scope zone, * the interface must be specified; otherwise, ifa_ifwithaddr() * will fail matching the address. */ bzero(&srcsock, sizeof(srcsock)); srcsock.sin6_family = AF_INET6; srcsock.sin6_len = sizeof(srcsock); srcsock.sin6_addr = pi->ipi6_addr; if (ifp) { error = in6_setscope(&srcsock.sin6_addr, ifp, NULL); if (error) return (error); } if (cred != NULL && (error = prison_local_ip6(cred, &srcsock.sin6_addr, (inp != NULL && (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); ia6 = (struct in6_ifaddr *)ifa_ifwithaddr( (struct sockaddr *)&srcsock); if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { if (ia6 != NULL) ifa_free(&ia6->ia_ifa); return (EADDRNOTAVAIL); } pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ if (ifpp) *ifpp = ifp; bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp)); ifa_free(&ia6->ia_ifa); return (0); } /* * Otherwise, if the socket has already bound the source, just use it. */ if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (cred != NULL && (error = prison_local_ip6(cred, &inp->in6p_laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp)); return (0); } /* * Bypass source address selection and use the primary jail IP * if requested. */ if (cred != NULL && !prison_saddrsel_ip6(cred, srcp)) return (0); /* * If the address is not specified, choose the best one based on * the outgoing interface and the destination address. */ /* get the outgoing interface */ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); #ifdef DIAGNOSTIC if (ifp == NULL) /* this should not happen */ panic("in6_selectsrc: NULL ifp"); #endif error = in6_setscope(&dst, ifp, &odstzone); if (error) return (error); rule = 0; IN6_IFADDR_RLOCK(&in6_ifa_tracker); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { int new_scope = -1, new_matchlen = -1; struct in6_addrpolicy *new_policy = NULL; u_int32_t srczone, osrczone, dstzone; struct in6_addr src; struct ifnet *ifp1 = ia->ia_ifp; /* * We'll never take an address that breaks the scope zone * of the destination. We also skip an address if its zone * does not contain the outgoing interface. * XXX: we should probably use sin6_scope_id here. */ if (in6_setscope(&dst, ifp1, &dstzone) || odstzone != dstzone) { continue; } src = ia->ia_addr.sin6_addr; if (in6_setscope(&src, ifp, &osrczone) || in6_setscope(&src, ifp1, &srczone) || osrczone != srczone) { continue; } /* avoid unusable addresses */ if ((ia->ia6_flags & (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { continue; } if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) continue; /* If jailed only take addresses of the jail into account. */ if (cred != NULL && prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0) continue; /* Rule 1: Prefer same address */ if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { ia_best = ia; BREAK(1); /* there should be no better candidate */ } if (ia_best == NULL) REPLACE(0); /* Rule 2: Prefer appropriate scope */ if (dst_scope < 0) dst_scope = in6_addrscope(&dst); new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) REPLACE(2); NEXT(2); } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) NEXT(2); REPLACE(2); } /* * Rule 3: Avoid deprecated addresses. Note that the case of * !ip6_use_deprecated is already rejected above. */ if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) NEXT(3); if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) REPLACE(3); /* Rule 4: Prefer home addresses */ /* * XXX: This is a TODO. We should probably merge the MIP6 * case above. */ /* Rule 5: Prefer outgoing interface */ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) { if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) NEXT(5); if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) REPLACE(5); } /* * Rule 6: Prefer matching label * Note that best_policy should be non-NULL here. */ if (dst_policy == NULL) dst_policy = lookup_addrsel_policy(dstsock); if (dst_policy->label != ADDR_LABEL_NOTAPP) { new_policy = lookup_addrsel_policy(&ia->ia_addr); if (dst_policy->label == best_policy->label && dst_policy->label != new_policy->label) NEXT(6); if (dst_policy->label != best_policy->label && dst_policy->label == new_policy->label) REPLACE(6); } /* * Rule 7: Prefer public addresses. * We allow users to reverse the logic by configuring * a sysctl variable, so that privacy conscious users can * always prefer temporary addresses. */ if (opts == NULL || opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { prefer_tempaddr = V_ip6_prefer_tempaddr; } else if (opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_NOTPREFER) { prefer_tempaddr = 0; } else prefer_tempaddr = 1; if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && (ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) REPLACE(7); else NEXT(7); } if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) NEXT(7); else REPLACE(7); } /* * Rule 8: prefer addresses on alive interfaces. * This is a KAME specific rule. */ if ((ia_best->ia_ifp->if_flags & IFF_UP) && !(ia->ia_ifp->if_flags & IFF_UP)) NEXT(8); if (!(ia_best->ia_ifp->if_flags & IFF_UP) && (ia->ia_ifp->if_flags & IFF_UP)) REPLACE(8); /* * Rule 9: prefer address with better virtual status. */ if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa)) REPLACE(9); if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa)) NEXT(9); /* * Rule 10: prefer address with `prefer_source' flag. */ if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 && (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0) REPLACE(10); if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 && (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0) NEXT(10); /* * Rule 14: Use longest matching prefix. * Note: in the address selection draft, this rule is * documented as "Rule 8". However, since it is also * documented that this rule can be overridden, we assign * a large number so that it is easy to assign smaller numbers * to more preferred rules. */ new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); if (best_matchlen < new_matchlen) REPLACE(14); if (new_matchlen < best_matchlen) NEXT(14); /* Rule 15 is reserved. */ /* * Last resort: just keep the current candidate. * Or, do we need more rules? */ continue; replace: ia_best = ia; best_scope = (new_scope >= 0 ? new_scope : in6_addrscope(&ia_best->ia_addr.sin6_addr)); best_policy = (new_policy ? new_policy : lookup_addrsel_policy(&ia_best->ia_addr)); best_matchlen = (new_matchlen >= 0 ? new_matchlen : in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst)); next: continue; out: break; } if ((ia = ia_best) == NULL) { IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); IP6STAT_INC(ip6s_sources_none); return (EADDRNOTAVAIL); } /* * At this point at least one of the addresses belonged to the jail * but it could still be, that we want to further restrict it, e.g. * theoratically IN6_IS_ADDR_LOOPBACK. * It must not be IN6_IS_ADDR_UNSPECIFIED anymore. * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should * let all others previously selected pass. * Use tmp to not change ::1 on lo0 to the primary jail address. */ tmp = ia->ia_addr.sin6_addr; if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); IP6STAT_INC(ip6s_sources_none); return (EADDRNOTAVAIL); } if (ifpp) *ifpp = ifp; bcopy(&tmp, srcp, sizeof(*srcp)); if (ia->ia_ifp == ifp) IP6STAT_INC(ip6s_sources_sameif[best_scope]); else IP6STAT_INC(ip6s_sources_otherif[best_scope]); if (dst_scope == best_scope) IP6STAT_INC(ip6s_sources_samescope[best_scope]); else IP6STAT_INC(ip6s_sources_otherscope[best_scope]); if (IFA6_IS_DEPRECATED(ia)) IP6STAT_INC(ip6s_sources_deprecated[best_scope]); IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (0); +} + +/* + * Select source address based on @inp, @dstsock and @opts. + * Stores selected address to @srcp. If @scope_ambiguous is set, + * embed scope from selected outgoing interface. If @hlim pointer + * is provided, stores calculated hop limit there. + * Returns 0 on success. + */ +int +in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct inpcb *inp, struct ucred *cred, int scope_ambiguous, + struct in6_addr *srcp, int *hlim) +{ + struct ifnet *retifp; + uint32_t fibnum; + int error; + + fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB; + retifp = NULL; + + error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp); + if (error != 0) + return (error); + + if (hlim != NULL) + *hlim = in6_selecthlim(inp, retifp); + + if (retifp == NULL || scope_ambiguous == 0) + return (0); + + /* + * Application should provide a proper zone ID or the use of + * default zone IDs should be enabled. Unfortunately, some + * applications do not behave as it should, so we need a + * workaround. Even if an appropriate ID is not determined + * (when it's required), if we can determine the outgoing + * interface. determine the zone ID based on the interface. + */ + error = in6_setscope(&dstsock->sin6_addr, retifp, NULL); + + return (error); +} + +/* + * Select source address based on @fibnum, @dst and @scopeid. + * Stores selected address to @srcp. + * Returns 0 on success. + * + * Used by non-socket based consumers (ND code mostly) + */ +int +in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst, + uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp, + int *hlim) +{ + struct ifnet *retifp; + struct sockaddr_in6 dst_sa; + int error; + + retifp = ifp; + bzero(&dst_sa, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = *dst; + dst_sa.sin6_scope_id = scopeid; + sa6_embedscope(&dst_sa, 0); + + error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp); + if (hlim != NULL) + *hlim = in6_selecthlim(NULL, retifp); + + return (error); } /* * clone - meaningful only for bsdi and freebsd */ static int selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum) { int error = 0; struct ifnet *ifp = NULL; struct rtentry *rt = NULL; struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; struct in6_addr *dst = &dstsock->sin6_addr; uint32_t zoneid; #if 0 char ip6buf[INET6_ADDRSTRLEN]; if (dstsock->sin6_addr.s6_addr32[0] == 0 && dstsock->sin6_addr.s6_addr32[1] == 0 && !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { printf("in6_selectroute: strange destination %s\n", ip6_sprintf(ip6buf, &dstsock->sin6_addr)); } else { printf("in6_selectroute: destination = %s%%%d\n", ip6_sprintf(ip6buf, &dstsock->sin6_addr), dstsock->sin6_scope_id); /* for debug */ } #endif /* If the caller specify the outgoing interface explicitly, use it. */ if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { /* XXX boundary check is assumed to be already done. */ ifp = ifnet_byindex(pi->ipi6_ifindex); if (ifp != NULL && (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst))) { /* * we do not have to check or get the route for * multicast. */ goto done; } else goto getroute; } /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. */ if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { goto done; /* we do not need a route for multicast. */ } /* * If destination address is LLA or link- or node-local multicast, * use it's embedded scope zone id to determine outgoing interface. */ if (IN6_IS_ADDR_MC_LINKLOCAL(dst) || IN6_IS_ADDR_MC_NODELOCAL(dst)) { zoneid = ntohs(in6_getscope(dst)); if (zoneid > 0) { ifp = in6_getlinkifnet(zoneid); goto done; } } getroute: /* * If the next hop address for the packet is specified by the caller, * use it as the gateway. */ if (opts && opts->ip6po_nexthop) { struct route_in6 *ron; sin6_next = satosin6(opts->ip6po_nexthop); if (IN6_IS_ADDR_LINKLOCAL(&sin6_next->sin6_addr)) { /* * Next hop is LLA, thus it should be neighbor. * Determine outgoing interface by zone index. */ zoneid = ntohs(in6_getscope(&sin6_next->sin6_addr)); if (zoneid > 0) { ifp = in6_getlinkifnet(zoneid); goto done; } } ron = &opts->ip6po_nextroute; /* Use a cached route if it exists and is valid. */ if (ron->ro_rt != NULL && ( (ron->ro_rt->rt_flags & RTF_UP) == 0 || ron->ro_dst.sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr, &sin6_next->sin6_addr))) RO_RTFREE(ron); if (ron->ro_rt == NULL) { ron->ro_dst = *sin6_next; in6_rtalloc(ron, fibnum); /* multi path case? */ } /* * The node identified by that address must be a * neighbor of the sending host. */ if (ron->ro_rt == NULL || (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0) error = EHOSTUNREACH; goto done; } /* * Use a cached route if it exists and is valid, else try to allocate * a new one. Note that we should check the address family of the * cached destination, in case of sharing the cache with IPv4. */ if (ro) { if (ro->ro_rt && (!(ro->ro_rt->rt_flags & RTF_UP) || ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst))) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)NULL; } if (ro->ro_rt == (struct rtentry *)NULL) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); sa6 = (struct sockaddr_in6 *)&ro->ro_dst; *sa6 = *dstsock; sa6->sin6_scope_id = 0; #ifdef RADIX_MPATH rtalloc_mpath_fib((struct route *)ro, ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); #else ro->ro_rt = in6_rtalloc1((struct sockaddr *) &ro->ro_dst, 0, 0UL, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); #endif } /* * do not care about the result if we have the nexthop * explicitly specified. */ if (opts && opts->ip6po_nexthop) goto done; if (ro->ro_rt) { ifp = ro->ro_rt->rt_ifp; if (ifp == NULL) { /* can this really happen? */ RTFREE(ro->ro_rt); ro->ro_rt = NULL; } } if (ro->ro_rt == NULL) error = EHOSTUNREACH; rt = ro->ro_rt; /* * Check if the outgoing interface conflicts with * the interface specified by ipi6_ifindex (if specified). * Note that loopback interface is always okay. * (this may happen when we are sending a packet to one of * our own addresses.) */ if (ifp && opts && opts->ip6po_pktinfo && opts->ip6po_pktinfo->ipi6_ifindex) { if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != opts->ip6po_pktinfo->ipi6_ifindex) { error = EHOSTUNREACH; goto done; } } } done: if (ifp == NULL && rt == NULL) { /* * This can happen if the caller did not pass a cached route * nor any other hints. We treat this case an error. */ error = EHOSTUNREACH; } if (error == EHOSTUNREACH) IP6STAT_INC(ip6s_noroute); if (retifp != NULL) { *retifp = ifp; /* * Adjust the "outgoing" interface. If we're going to loop * the packet back to ourselves, the ifp would be the loopback * interface. However, we'd rather know the interface associated * to the destination address (which should probably be one of * our own addresses.) */ if (rt) { if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && (rt->rt_gateway->sa_family == AF_LINK)) *retifp = ifnet_byindex(((struct sockaddr_dl *) rt->rt_gateway)->sdl_index); } } if (retrt != NULL) *retrt = rt; /* rt may be NULL */ return (error); } static int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct ifnet **retifp, struct ifnet *oifp, u_int fibnum) { int error; struct route_in6 sro; struct rtentry *rt = NULL; int rt_flags; KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); bzero(&sro, sizeof(sro)); rt_flags = 0; error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum); if (rt) rt_flags = rt->rt_flags; if (rt && rt == sro.ro_rt) RTFREE(rt); if (error != 0) { /* Help ND. See oifp comment in in6_selectsrc(). */ if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { *retifp = oifp; error = 0; } return (error); } /* * do not use a rejected or black hole route. * XXX: this check should be done in the L2 output routine. * However, if we skipped this check here, we'd see the following * scenario: * - install a rejected route for a scoped address prefix * (like fe80::/10) * - send a packet to a destination that matches the scoped prefix, * with ambiguity about the scope zone. * - pick the outgoing interface from the route, and disambiguate the * scope zone with the interface. * - ip6_output() would try to get another route with the "new" * destination, which may be valid. * - we'd see no error on output. * Although this may not be very harmful, it should still be confusing. * We thus reject the case here. */ if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) { error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); return (error); } return (0); } /* * Public wrapper function to selectroute(). * * XXX-BZ in6_selectroute() should and will grow the FIB argument. The * in6_selectroute_fib() function is only there for backward compat on stable. */ int in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt) { return (selectroute(dstsock, opts, mopts, ro, retifp, retrt, 0, RT_DEFAULT_FIB)); } #ifndef BURN_BRIDGES int in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt, u_int fibnum) { return (selectroute(dstsock, opts, mopts, ro, retifp, retrt, 0, fibnum)); } #endif /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit value specified via ioctl. * 2. (If the outgoing interface is detected) the current * hop limit of the interface specified by router advertisement. * 3. The system default hoplimit. */ int in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) { if (in6p && in6p->in6p_hops >= 0) return (in6p->in6p_hops); else if (ifp) return (ND_IFINFO(ifp)->chlim); else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { struct nhop6_basic nh6; struct in6_addr dst; uint32_t fibnum, scopeid; int hlim; fibnum = in6p->inp_inc.inc_fibnum; in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid); if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){ hlim = ND_IFINFO(nh6.nh_ifp)->chlim; return (hlim); } } return (V_ip6_defhlim); } /* * XXX: this is borrowed from in6_pcbbind(). If possible, we should * share this function by all *bsd*... */ int in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) { struct socket *so = inp->inp_socket; u_int16_t lport = 0; int error, lookupflags = 0; #ifdef INVARIANTS struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; #endif INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); error = prison_local_ip6(cred, laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); if (error) return(error); /* XXX: this is redundant when called from in6_pcbbind */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; inp->inp_flags |= INP_ANONPORT; error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags); if (error != 0) return (error); inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } return (0); } void addrsel_policy_init(void) { init_policy_queue(); /* initialize the "last resort" policy */ bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy)); V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; if (!IS_DEFAULT_VNET(curvnet)) return; ADDRSEL_LOCK_INIT(); ADDRSEL_SXLOCK_INIT(); } static struct in6_addrpolicy * lookup_addrsel_policy(struct sockaddr_in6 *key) { struct in6_addrpolicy *match = NULL; ADDRSEL_LOCK(); match = match_addrsel_policy(key); if (match == NULL) match = &V_defaultaddrpolicy; else match->use++; ADDRSEL_UNLOCK(); return (match); } /* * Subroutines to manage the address selection policy table via sysctl. */ struct walkarg { struct sysctl_req *w_req; }; static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_DECL(_net_inet6_ip6); static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, CTLFLAG_RD, in6_src_sysctl, ""); static int in6_src_sysctl(SYSCTL_HANDLER_ARGS) { struct walkarg w; if (req->newptr) return EPERM; bzero(&w, sizeof(w)); w.w_req = req; return (walk_addrsel_policy(dump_addrsel_policyent, &w)); } int in6_src_ioctl(u_long cmd, caddr_t data) { struct in6_addrpolicy ent0; if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) return (EOPNOTSUPP); /* check for safety */ ent0 = *(struct in6_addrpolicy *)data; if (ent0.label == ADDR_LABEL_NOTAPP) return (EINVAL); /* check if the prefix mask is consecutive. */ if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) return (EINVAL); /* clear trailing garbages (if any) of the prefix address. */ IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr); ent0.use = 0; switch (cmd) { case SIOCAADDRCTL_POLICY: return (add_addrsel_policyent(&ent0)); case SIOCDADDRCTL_POLICY: return (delete_addrsel_policyent(&ent0)); } return (0); /* XXX: compromise compilers */ } /* * The followings are implementation of the policy table using a * simple tail queue. * XXX such details should be hidden. * XXX implementation using binary tree should be more efficient. */ struct addrsel_policyent { TAILQ_ENTRY(addrsel_policyent) ape_entry; struct in6_addrpolicy ape_policy; }; TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab); #define V_addrsel_policytab VNET(addrsel_policytab) static void init_policy_queue(void) { TAILQ_INIT(&V_addrsel_policytab); } static int add_addrsel_policyent(struct in6_addrpolicy *newpolicy) { struct addrsel_policyent *new, *pol; new = malloc(sizeof(*new), M_IFADDR, M_WAITOK); ADDRSEL_XLOCK(); ADDRSEL_LOCK(); /* duplication check */ TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, &pol->ape_policy.addr.sin6_addr) && IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, &pol->ape_policy.addrmask.sin6_addr)) { ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); free(new, M_IFADDR); return (EEXIST); /* or override it? */ } } bzero(new, sizeof(*new)); /* XXX: should validate entry */ new->ape_policy = *newpolicy; TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry); ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); return (0); } static int delete_addrsel_policyent(struct in6_addrpolicy *key) { struct addrsel_policyent *pol; ADDRSEL_XLOCK(); ADDRSEL_LOCK(); /* search for the entry in the table */ TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, &pol->ape_policy.addr.sin6_addr) && IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, &pol->ape_policy.addrmask.sin6_addr)) { break; } } if (pol == NULL) { ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); return (ESRCH); } TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry); ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); free(pol, M_IFADDR); return (0); } static int walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w) { struct addrsel_policyent *pol; int error = 0; ADDRSEL_SLOCK(); TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { if ((error = (*callback)(&pol->ape_policy, w)) != 0) { ADDRSEL_SUNLOCK(); return (error); } } ADDRSEL_SUNLOCK(); return (error); } static int dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) { int error = 0; struct walkarg *w = arg; error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); return (error); } static struct in6_addrpolicy * match_addrsel_policy(struct sockaddr_in6 *key) { struct addrsel_policyent *pent; struct in6_addrpolicy *bestpol = NULL, *pol; int matchlen, bestmatchlen = -1; u_char *mp, *ep, *k, *p, m; TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) { matchlen = 0; pol = &pent->ape_policy; mp = (u_char *)&pol->addrmask.sin6_addr; ep = mp + 16; /* XXX: scope field? */ k = (u_char *)&key->sin6_addr; p = (u_char *)&pol->addr.sin6_addr; for (; mp < ep && *mp; mp++, k++, p++) { m = *mp; if ((*k & m) != *p) goto next; /* not match */ if (m == 0xff) /* short cut for a typical case */ matchlen += 8; else { while (m >= 0x80) { matchlen++; m <<= 1; } } } /* matched. check if this is better than the current best. */ if (bestpol == NULL || matchlen > bestmatchlen) { bestpol = pol; bestmatchlen = matchlen; } next: continue; } return (bestpol); } Index: projects/clang380-import/sys/netinet6/ip6_var.h =================================================================== --- projects/clang380-import/sys/netinet6/ip6_var.h (revision 293686) +++ projects/clang380-import/sys/netinet6/ip6_var.h (revision 293687) @@ -1,435 +1,436 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_var.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NETINET6_IP6_VAR_H_ #define _NETINET6_IP6_VAR_H_ /* * IP6 reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. */ struct ip6q { struct ip6asfrag *ip6q_down; struct ip6asfrag *ip6q_up; u_int32_t ip6q_ident; u_int8_t ip6q_nxt; u_int8_t ip6q_ecn; u_int8_t ip6q_ttl; struct in6_addr ip6q_src, ip6q_dst; struct ip6q *ip6q_next; struct ip6q *ip6q_prev; int ip6q_unfrglen; /* len of unfragmentable part */ #ifdef notyet u_char *ip6q_nxtp; #endif int ip6q_nfrag; /* # of fragments */ struct label *ip6q_label; }; struct ip6asfrag { struct ip6asfrag *ip6af_down; struct ip6asfrag *ip6af_up; struct mbuf *ip6af_m; int ip6af_offset; /* offset in ip6af_m to next header */ int ip6af_frglen; /* fragmentable part length */ int ip6af_off; /* fragment offset */ u_int16_t ip6af_mff; /* more fragment bit in frag off */ }; #define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) /* * IP6 reinjecting structure. */ struct ip6_direct_ctx { uint32_t ip6dc_nxt; /* next header to process */ uint32_t ip6dc_off; /* offset to next header */ }; /* * Structure attached to inpcb.in6p_moptions and * passed to ip6_output when IPv6 multicast options are in use. * This structure is lazy-allocated. */ struct ip6_moptions { struct ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */ u_char im6o_multicast_hlim; /* hoplimit for outgoing multicasts */ u_char im6o_multicast_loop; /* 1 >= hear sends if a member */ u_short im6o_num_memberships; /* no. memberships this socket */ u_short im6o_max_memberships; /* max memberships this socket */ struct in6_multi **im6o_membership; /* group memberships */ struct in6_mfilter *im6o_mfilters; /* source filters */ }; /* * Control options for outgoing packets */ /* Routing header related info */ struct ip6po_rhinfo { struct ip6_rthdr *ip6po_rhi_rthdr; /* Routing header */ struct route_in6 ip6po_rhi_route; /* Route to the 1st hop */ }; #define ip6po_rthdr ip6po_rhinfo.ip6po_rhi_rthdr #define ip6po_route ip6po_rhinfo.ip6po_rhi_route /* Nexthop related info */ struct ip6po_nhinfo { struct sockaddr *ip6po_nhi_nexthop; struct route_in6 ip6po_nhi_route; /* Route to the nexthop */ }; #define ip6po_nexthop ip6po_nhinfo.ip6po_nhi_nexthop #define ip6po_nextroute ip6po_nhinfo.ip6po_nhi_route struct ip6_pktopts { struct mbuf *ip6po_m; /* Pointer to mbuf storing the data */ int ip6po_hlim; /* Hoplimit for outgoing packets */ /* Outgoing IF/address information */ struct in6_pktinfo *ip6po_pktinfo; /* Next-hop address information */ struct ip6po_nhinfo ip6po_nhinfo; struct ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */ /* Destination options header (before a routing header) */ struct ip6_dest *ip6po_dest1; /* Routing header related info. */ struct ip6po_rhinfo ip6po_rhinfo; /* Destination options header (after a routing header) */ struct ip6_dest *ip6po_dest2; int ip6po_tclass; /* traffic class */ int ip6po_minmtu; /* fragment vs PMTU discovery policy */ #define IP6PO_MINMTU_MCASTONLY -1 /* default; send at min MTU for multicast*/ #define IP6PO_MINMTU_DISABLE 0 /* always perform pmtu disc */ #define IP6PO_MINMTU_ALL 1 /* always send at min MTU */ int ip6po_prefer_tempaddr; /* whether temporary addresses are preferred as source address */ #define IP6PO_TEMPADDR_SYSTEM -1 /* follow the system default */ #define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */ #define IP6PO_TEMPADDR_PREFER 1 /* prefer temporary address */ int ip6po_flags; #if 0 /* parameters in this block is obsolete. do not reuse the values. */ #define IP6PO_REACHCONF 0x01 /* upper-layer reachability confirmation. */ #define IP6PO_MINMTU 0x02 /* use minimum MTU (IPV6_USE_MIN_MTU) */ #endif #define IP6PO_DONTFRAG 0x04 /* disable fragmentation (IPV6_DONTFRAG) */ #define IP6PO_USECOA 0x08 /* use care of address */ }; /* * Control options for incoming packets */ struct ip6stat { uint64_t ip6s_total; /* total packets received */ uint64_t ip6s_tooshort; /* packet too short */ uint64_t ip6s_toosmall; /* not enough data */ uint64_t ip6s_fragments; /* fragments received */ uint64_t ip6s_fragdropped; /* frags dropped(dups, out of space) */ uint64_t ip6s_fragtimeout; /* fragments timed out */ uint64_t ip6s_fragoverflow; /* fragments that exceeded limit */ uint64_t ip6s_forward; /* packets forwarded */ uint64_t ip6s_cantforward; /* packets rcvd for unreachable dest */ uint64_t ip6s_redirectsent; /* packets forwarded on same net */ uint64_t ip6s_delivered; /* datagrams delivered to upper level*/ uint64_t ip6s_localout; /* total ip packets generated here */ uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */ uint64_t ip6s_reassembled; /* total packets reassembled ok */ uint64_t ip6s_fragmented; /* datagrams successfully fragmented */ uint64_t ip6s_ofragments; /* output fragments created */ uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ uint64_t ip6s_badoptions; /* error in option processing */ uint64_t ip6s_noroute; /* packets discarded due to no route */ uint64_t ip6s_badvers; /* ip6 version != 6 */ uint64_t ip6s_rawout; /* total raw ip packets generated */ uint64_t ip6s_badscope; /* scope error */ uint64_t ip6s_notmember; /* don't join this multicast group */ #define IP6S_HDRCNT 256 /* headers count */ uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */ uint64_t ip6s_m1; /* one mbuf */ #define IP6S_M2MMAX 32 uint64_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */ uint64_t ip6s_mext1; /* one ext mbuf */ uint64_t ip6s_mext2m; /* two or more ext mbuf */ uint64_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */ uint64_t ip6s_nogif; /* no match gif found */ uint64_t ip6s_toomanyhdr; /* discarded due to too many headers */ /* * statistics for improvement of the source address selection * algorithm: * XXX: hardcoded 16 = # of ip6 multicast scope types + 1 */ #define IP6S_RULESMAX 16 #define IP6S_SCOPECNT 16 /* number of times that address selection fails */ uint64_t ip6s_sources_none; /* number of times that an address on the outgoing I/F is chosen */ uint64_t ip6s_sources_sameif[IP6S_SCOPECNT]; /* number of times that an address on a non-outgoing I/F is chosen */ uint64_t ip6s_sources_otherif[IP6S_SCOPECNT]; /* * number of times that an address that has the same scope * from the destination is chosen. */ uint64_t ip6s_sources_samescope[IP6S_SCOPECNT]; /* * number of times that an address that has a different scope * from the destination is chosen. */ uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT]; /* number of times that a deprecated address is chosen */ uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT]; /* number of times that each rule of source selection is applied. */ uint64_t ip6s_sources_rule[IP6S_RULESMAX]; }; #ifdef _KERNEL #include VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat); #define IP6STAT_ADD(name, val) \ VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val)) #define IP6STAT_SUB(name, val) IP6STAT_ADD(name, -(val)) #define IP6STAT_INC(name) IP6STAT_ADD(name, 1) #define IP6STAT_DEC(name) IP6STAT_SUB(name, 1) #endif #ifdef _KERNEL /* flags passed to ip6_output as last parameter */ #define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */ #define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */ #define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */ #ifdef __NO_STRICT_ALIGNMENT #define IP6_HDR_ALIGNED_P(ip) 1 #else #define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0) #endif VNET_DECLARE(int, ip6_defhlim); /* default hop limit */ VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */ VNET_DECLARE(int, ip6_forwarding); /* act as router? */ VNET_DECLARE(int, ip6_use_deprecated); /* allow deprecated addr as source */ VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix * walk list every 5 sec. */ VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */ VNET_DECLARE(int, ip6_v6only); #define V_ip6_defhlim VNET(ip6_defhlim) #define V_ip6_defmcasthlim VNET(ip6_defmcasthlim) #define V_ip6_forwarding VNET(ip6_forwarding) #define V_ip6_use_deprecated VNET(ip6_use_deprecated) #define V_ip6_rr_prune VNET(ip6_rr_prune) #define V_ip6_mcast_pmtu VNET(ip6_mcast_pmtu) #define V_ip6_v6only VNET(ip6_v6only) VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */ VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */ VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly * queue */ VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly * queue */ VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */ VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA * receiving IF. */ VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when forwarding enabled */ VNET_DECLARE(int, ip6_log_interval); VNET_DECLARE(time_t, ip6_log_time); VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension * headers */ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ #define V_ip6_mrouter VNET(ip6_mrouter) #define V_ip6_sendredirects VNET(ip6_sendredirects) #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) #define V_ip6_maxfrags VNET(ip6_maxfrags) #define V_ip6_accept_rtadv VNET(ip6_accept_rtadv) #define V_ip6_no_radr VNET(ip6_no_radr) #define V_ip6_norbit_raif VNET(ip6_norbit_raif) #define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3) #define V_ip6_log_interval VNET(ip6_log_interval) #define V_ip6_log_time VNET(ip6_log_time) #define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit) #define V_ip6_dad_count VNET(ip6_dad_count) VNET_DECLARE(int, ip6_auto_flowlabel); VNET_DECLARE(int, ip6_auto_linklocal); #define V_ip6_auto_flowlabel VNET(ip6_auto_flowlabel) #define V_ip6_auto_linklocal VNET(ip6_auto_linklocal) VNET_DECLARE(int, ip6_use_tempaddr); /* Whether to use temporary addresses */ VNET_DECLARE(int, ip6_prefer_tempaddr); /* Whether to prefer temporary * addresses in the source address * selection */ #define V_ip6_use_tempaddr VNET(ip6_use_tempaddr) #define V_ip6_prefer_tempaddr VNET(ip6_prefer_tempaddr) VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope * zone when unspecified */ #define V_ip6_use_defzone VNET(ip6_use_defzone) VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ #define V_inet6_pfil_hook VNET(inet6_pfil_hook) #ifdef IPSTEALTH VNET_DECLARE(int, ip6stealth); #define V_ip6stealth VNET(ip6stealth) #endif extern struct pr_usrreqs rip6_usrreqs; struct sockopt; struct inpcb; int icmp6_ctloutput(struct socket *, struct sockopt *sopt); struct in6_ifaddr; void ip6_init(void); #ifdef VIMAGE void ip6_destroy(void); #endif int ip6proto_register(short); int ip6proto_unregister(short); void ip6_input(struct mbuf *); void ip6_direct_input(struct mbuf *); void ip6_freepcbopts(struct ip6_pktopts *); int ip6_unknown_opt(u_int8_t *, struct mbuf *, int); char * ip6_get_prevhdr(const struct mbuf *, int); int ip6_nexthdr(const struct mbuf *, int, int, int *); int ip6_lasthdr(const struct mbuf *, int, int, int *); extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *, u_int32_t *); struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *, struct mbuf **, int *); void ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **); void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, u_int32_t); int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip6_forward(struct mbuf *, int); void ip6_mloopback(struct ifnet *, const struct mbuf *); int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int, struct ip6_moptions *, struct ifnet **, struct inpcb *); int ip6_ctloutput(struct socket *, struct sockopt *); int ip6_raw_ctloutput(struct socket *, struct sockopt *); void ip6_initpktopts(struct ip6_pktopts *); int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *, struct ip6_pktopts *, struct ucred *, int); void ip6_clearpktopts(struct ip6_pktopts *, int); struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); int ip6_optlen(struct inpcb *); int ip6_deletefraghdr(struct mbuf *, int, int); int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int, uint32_t); int route6_input(struct mbuf **, int *, int); void frag6_init(void); int frag6_input(struct mbuf **, int *, int); void frag6_slowtimo(void); void frag6_drain(void); void rip6_init(void); int rip6_input(struct mbuf **, int *, int); void rip6_ctlinput(int, struct sockaddr *, void *); int rip6_ctloutput(struct socket *, struct sockopt *); int rip6_output(struct mbuf *, struct socket *, ...); int rip6_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *); int dest6_input(struct mbuf **, int *, int); int none_input(struct mbuf **, int *, int); -int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, - struct inpcb *inp, struct ucred *cred, - struct ifnet **, struct in6_addr *); +int in6_selectsrc_socket(struct sockaddr_in6 *, struct ip6_pktopts *, + struct inpcb *, struct ucred *, int, struct in6_addr *, int *); +int in6_selectsrc_addr(uint32_t, const struct in6_addr *, + uint32_t, struct ifnet *, struct in6_addr *, int *); int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **); int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **, u_int); u_int32_t ip6_randomid(void); u_int32_t ip6_randomflowlabel(void); void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset); #endif /* _KERNEL */ #endif /* !_NETINET6_IP6_VAR_H_ */ Index: projects/clang380-import/sys/netinet6/nd6_nbr.c =================================================================== --- projects/clang380-import/sys/netinet6/nd6_nbr.c (revision 293686) +++ projects/clang380-import/sys/netinet6/nd6_nbr.c (revision 293687) @@ -1,1592 +1,1583 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #define SDL(s) ((struct sockaddr_dl *)s) struct dadq; static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_add(struct dadq *dp); static void nd6_dad_del(struct dadq *dp); static void nd6_dad_rele(struct dadq *); static void nd6_dad_starttimer(struct dadq *, int, int); static void nd6_dad_stoptimer(struct dadq *); static void nd6_dad_timer(struct dadq *); static void nd6_dad_duplicated(struct ifaddr *, struct dadq *); static void nd6_dad_ns_output(struct dadq *); static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_na_input(struct ifaddr *); static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *, u_int); static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int); static VNET_DEFINE(int, dad_enhanced) = 1; #define V_dad_enhanced VNET(dad_enhanced) SYSCTL_DECL(_net_inet6_ip6); SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dad_enhanced), 0, "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD."); static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ #define V_dad_maxtry VNET(dad_maxtry) /* * Input a Neighbor Solicitation Message. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) */ void nd6_ns_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_solicit *nd_ns; struct in6_addr saddr6 = ip6->ip6_src; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; int rflag; union nd_opts ndopts; struct sockaddr_dl proxydl; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0; if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif) rflag = 0; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len); if (nd_ns == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */ taddr6 = nd_ns->nd_ns_target; if (in6_setscope(&taddr6, ifp, NULL) != 0) goto bad; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { /* dst has to be a solicited node multicast address. */ if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL && /* don't check ifindex portion */ daddr6.s6_addr32[1] == 0 && daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE && daddr6.s6_addr8[12] == 0xff) { ; /* good */ } else { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(wrong ip6 dst)\n")); goto bad; } } else if (!V_nd6_onlink_ns_rfc4861) { struct sockaddr_in6 src_sa6; /* * According to recent IETF discussions, it is not a good idea * to accept a NS from an address which would not be deemed * to be a neighbor otherwise. This point is expected to be * clarified in future revisions of the specification. */ bzero(&src_sa6, sizeof(src_sa6)); src_sa6.sin6_family = AF_INET6; src_sa6.sin6_len = sizeof(src_sa6); src_sa6.sin6_addr = saddr6; if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) { nd6log((LOG_INFO, "nd6_ns_input: " "NS packet from non-neighbor\n")); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n")); goto bad; } icmp6len -= sizeof(*nd_ns); nd6_option_init(nd_ns + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ns_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(link-layer address option)\n")); goto bad; } /* * Attaching target link-layer address to the NA? * (RFC 2461 7.2.4) * * NS IP dst is unicast/anycast MUST NOT add * NS IP dst is solicited-node multicast MUST add * * In implementation, we add target link-layer address by default. * We do not add one in MUST NOT cases. */ if (!IN6_IS_ADDR_MULTICAST(&daddr6)) tlladdr = 0; else tlladdr = 1; /* * Target address (taddr6) must be either: * (1) Valid unicast/anycast address for my receiving interface, * (2) Unicast address for which I'm offering proxy service, or * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* (2) check. */ if (ifa == NULL) { struct sockaddr_dl rt_gateway; struct rt_addrinfo info; struct sockaddr_in6 dst6; bzero(&dst6, sizeof(dst6)); dst6.sin6_len = sizeof(struct sockaddr_in6); dst6.sin6_family = AF_INET6; dst6.sin6_addr = taddr6; bzero(&rt_gateway, sizeof(rt_gateway)); rt_gateway.sdl_len = sizeof(rt_gateway); bzero(&info, sizeof(info)); info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; /* Always use the default FIB. */ if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6, 0, 0, &info) == 0) { if ((info.rti_flags & RTF_ANNOUNCE) != 0 && rt_gateway.sdl_family == AF_LINK) { /* * proxy NDP for single entry */ proxydl = *SDL(&rt_gateway); ifa = (struct ifaddr *)in6ifa_ifpforlinklocal( ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); if (ifa) proxy = 1; } } } if (ifa == NULL) { /* * We've got an NS packet, and we don't have that adddress * assigned for us. We MUST silently ignore it. * See RFC2461 7.2.3. */ goto freeit; } myaddr6 = *IFA_IN6(ifa); anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST; tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) goto freeit; if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s " "(if %d, NS packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) { nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &saddr6))); goto freeit; } /* * We have neighbor solicitation packet, with target address equals to * one of my tentative address. * * src addr how to process? * --- --- * multicast of course, invalid (rejected in ip6_input) * unicast somebody is doing address resolution -> ignore * unspec dup address detection * * The processing is defined in RFC 2462. */ if (tentative) { /* * If source address is unspecified address, it is for * duplicate address detection. * * If not, the packet is for addess resolution; * silently ignore it. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce); goto freeit; } /* * If the source address is unspecified address, entries must not * be created or updated. * It looks that sender is performing DAD. Output NA toward * all-node multicast address, to tell the sender that I'm using * the address. * S bit ("solicited") must be zero. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { struct in6_addr in6_all; in6_all = in6addr_linklocal_allnodes; if (in6_setscope(&in6_all, ifp, NULL) != 0) goto bad; nd6_na_output_fib(ifp, &in6_all, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); goto freeit; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0); nd6_na_output_fib(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag | ND_NA_FLAG_SOLICITED, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); freeit: if (ifa != NULL) ifa_free(ifa); m_freem(m); return; bad: nd6log((LOG_ERR, "nd6_ns_input: src=%s\n", ip6_sprintf(ip6bufs, &saddr6))); nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n", ip6_sprintf(ip6bufs, &daddr6))); nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", ip6_sprintf(ip6bufs, &taddr6))); ICMP6STAT_INC(icp6s_badns); if (ifa != NULL) ifa_free(ifa); m_freem(m); } /* * Output a Neighbor Solicitation Message. Caller specifies: * - ICMP6 header source IP6 address * - ND6 header target IP6 address * - ND6 header source datalink address * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * ln - for source address determination * nonce - If non-NULL, NS is used for duplicate address detection and * the value (length is ND_OPT_NONCE_LEN) is used as a random nonce. */ static void nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6, const struct in6_addr *daddr6, const struct in6_addr *taddr6, uint8_t *nonce, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; struct ip6_hdr *ip6; struct nd_neighbor_solicit *nd_ns; struct ip6_moptions im6o; int icmp6len; int maxlen; caddr_t mac; if (IN6_IS_ADDR_MULTICAST(taddr6)) return; /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_ns); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", __func__, max_linkhdr, maxlen, MCLBYTES)); if (max_linkhdr + maxlen > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; M_SETFIB(m, fibnum); if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_ns); m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len; m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor solicitation packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (daddr6) ip6->ip6_dst = *daddr6; else { ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; ip6->ip6_dst.s6_addr16[1] = 0; ip6->ip6_dst.s6_addr32[1] = 0; ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE; ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3]; ip6->ip6_dst.s6_addr8[12] = 0xff; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) goto bad; } if (nonce == NULL) { struct ifaddr *ifa = NULL; /* * RFC2461 7.2.2: * "If the source address of the packet prompting the * solicitation is the same as one of the addresses assigned * to the outgoing interface, that address SHOULD be placed * in the IP Source Address of the outgoing solicitation. * Otherwise, any one of the addresses assigned to the * interface should be used." * * We use the source address for the prompting packet * (saddr6), if saddr6 belongs to the outgoing interface. * Otherwise, we perform the source address selection as usual. */ if (saddr6 != NULL) ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6); if (ifa != NULL) { /* ip6_src set already. */ ip6->ip6_src = *saddr6; ifa_free(ifa); } else { int error; - struct sockaddr_in6 dst_sa; - struct in6_addr src_in; - struct ifnet *oifp; + struct in6_addr dst6, src6; + uint32_t scopeid; - bzero(&dst_sa, sizeof(dst_sa)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(dst_sa); - dst_sa.sin6_addr = ip6->ip6_dst; - - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, - NULL, NULL, &oifp, &src_in); + in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "%s: source can't be " "determined: dst=%s, error=%d\n", __func__, - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), + ip6_sprintf(ip6buf, &dst6), error)); goto bad; } - ip6->ip6_src = src_in; + ip6->ip6_src = src6; } } else { /* * Source address for DAD packet must always be IPv6 * unspecified address. (0::0) * We actually don't have to 0-clear the address (we did it * above), but we do so here explicitly to make the intention * clearer. */ bzero(&ip6->ip6_src, sizeof(ip6->ip6_src)); } nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1); nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; nd_ns->nd_ns_code = 0; nd_ns->nd_ns_reserved = 0; nd_ns->nd_ns_target = *taddr6; in6_clearscope(&nd_ns->nd_ns_target); /* XXX */ /* * Add source link-layer address option. * * spec implementation * --- --- * DAD packet MUST NOT do not add the option * there's no link layer address: * impossible do not add the option * there's link layer address: * Multicast NS MUST add one add the option * Unicast NS SHOULD add one add the option */ if (nonce == NULL && (mac = nd6_ifptomac(ifp))) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8 byte alignments... */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } /* * Add a Nonce option (RFC 3971) to detect looped back NS messages. * This behavior is documented as Enhanced Duplicate Address * Detection in RFC 7527. * net.inet6.ip6.dad_enhanced=0 disables this. */ if (V_dad_enhanced != 0 && nonce != NULL) { int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8-byte alignment is required. */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_NONCE; nd_opt->nd_opt_len = optlen >> 3; bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN); } ip6->ip6_plen = htons((u_short)icmp6len); nd_ns->nd_ns_cksum = 0; nd_ns->nd_ns_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type; m_tag_prepend(m, mtag); } ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); return; bad: m_freem(m); return; } #ifndef BURN_BRIDGES void nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6, const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce) { nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB); } #endif /* * Neighbor advertisement input handling. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) */ void nd6_na_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_advert *nd_na; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; int flags; int is_router; int is_solicited; int is_override; char *lladdr = NULL; int lladdrlen = 0; int checklink = 0; struct ifaddr *ifa; struct llentry *ln = NULL; union nd_opts ndopts; struct mbuf *chain = NULL; struct sockaddr_in6 sin6; u_char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len); if (nd_na == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif flags = nd_na->nd_na_flags_reserved; is_router = ((flags & ND_NA_FLAG_ROUTER) != 0); is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0); is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0); memset(&sin6, 0, sizeof(sin6)); taddr6 = nd_na->nd_na_target; if (in6_setscope(&taddr6, ifp, NULL)) goto bad; /* XXX: impossible */ if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_ERR, "nd6_na_input: invalid target address %s\n", ip6_sprintf(ip6bufs, &taddr6))); goto bad; } if (IN6_IS_ADDR_MULTICAST(&daddr6)) if (is_solicited) { nd6log((LOG_ERR, "nd6_na_input: a solicited adv is multicasted\n")); goto bad; } icmp6len -= sizeof(*nd_na); nd6_option_init(nd_na + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_na_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } /* * This effectively disables the DAD check on a non-master CARP * address. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* * Target address matches one of my interface address. * * If my address is tentative, this means that there's somebody * already using the same address as mine. This indicates DAD failure. * This is defined in RFC 2462. * * Otherwise, process as defined in RFC 2461. */ if (ifa && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) { nd6_dad_na_input(ifa); ifa_free(ifa); goto freeit; } /* Just for safety, maybe unnecessary. */ if (ifa) { ifa_free(ifa); log(LOG_ERR, "nd6_na_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &taddr6)); goto freeit; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s " "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } /* * If no neighbor cache entry is found, NA SHOULD silently be * discarded. */ IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { goto freeit; } if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, * discard the packet. */ if (ifp->if_addrlen && lladdr == NULL) { goto freeit; } /* * Record link-layer address, and update the state. */ linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) return; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { ln = NULL; goto freeit; } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (is_solicited) nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); else nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); if ((ln->ln_router = is_router) != 0) { /* * This means a router's state has changed from * non-reachable to probably reachable, and might * affect the status of associated prefixes.. */ checklink = 1; } } else { int llchange; /* * Check if the link-layer address has changed or not. */ if (lladdr == NULL) llchange = 0; else { if (ln->la_flags & LLE_VALID) { if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; } else llchange = 1; } /* * This is VERY complex. Look at it with care. * * override solicit lladdr llchange action * (L: record lladdr) * * 0 0 n -- (2c) * 0 0 y n (2b) L * 0 0 y y (1) REACHABLE->STALE * 0 1 n -- (2c) *->REACHABLE * 0 1 y n (2b) L *->REACHABLE * 0 1 y y (1) REACHABLE->STALE * 1 0 n -- (2a) * 1 0 y n (2a) L * 1 0 y y (2a) L *->STALE * 1 1 n -- (2a) *->REACHABLE * 1 1 y n (2a) L *->REACHABLE * 1 1 y y (2a) L *->REACHABLE */ if (!is_override && (lladdr != NULL && llchange)) { /* (1) */ /* * If state is REACHABLE, make it STALE. * no other updates should be done. */ if (ln->ln_state == ND6_LLINFO_REACHABLE) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); goto freeit; } else if (is_override /* (2a) */ || (!is_override && (lladdr != NULL && !llchange)) /* (2b) */ || lladdr == NULL) { /* (2c) */ /* * Update link-local address, if any. */ if (lladdr != NULL) { linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) goto freeit; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { ln = NULL; goto freeit; } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } /* * If solicited, make the state REACHABLE. * If not solicited and the link-layer address was * changed, make it STALE. */ if (is_solicited) nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); else { if (lladdr != NULL && llchange) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); } } if (ln->ln_router && !is_router) { /* * The peer dropped the router flag. * Remove the sender from the Default Router List and * update the Destination Cache entries. */ struct nd_defrouter *dr; struct in6_addr *in6; struct ifnet *nd6_ifp; in6 = &ln->r_l3addr.addr6; /* * Lock to protect the default router list. * XXX: this might be unnecessary, since this function * is only called under the network software interrupt * context. However, we keep it just for safety. */ nd6_ifp = lltable_get_ifp(ln->lle_tbl); dr = defrouter_lookup(in6, nd6_ifp); if (dr) defrtrlist_del(dr); else if (ND_IFINFO(nd6_ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * Even if the neighbor is not in the default * router list, the neighbor may be used * as a next hop for some destinations * (e.g. redirect case). So we must * call rt6_flush explicitly. */ rt6_flush(&ip6->ip6_src, ifp); } } ln->ln_router = is_router; } /* XXX - QL * Does this matter? * rt->rt_flags &= ~RTF_REJECT; */ ln->la_asked = 0; if (ln->la_hold != NULL) nd6_grab_holdchain(ln, &chain, &sin6); freeit: if (ln != NULL) LLE_WUNLOCK(ln); if (chain != NULL) nd6_flush_holdchain(ifp, ifp, chain, &sin6); if (checklink) pfxlist_onlink_check(); m_freem(m); return; bad: if (ln != NULL) LLE_WUNLOCK(ln); ICMP6STAT_INC(icp6s_badna); m_freem(m); } /* * Neighbor advertisement output handling. * * Based on RFC 2461 * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) * * tlladdr - 1 if include target link-layer address * sdl0 - sockaddr_dl (= proxy NA) or NULL */ static void nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; - struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; - struct in6_addr src, daddr6; - struct sockaddr_in6 dst_sa; + struct in6_addr daddr6, dst6, src6; + uint32_t scopeid; + int icmp6len, maxlen, error; caddr_t mac = NULL; daddr6 = *daddr6_0; /* make a local copy for modification */ /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_na); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", __func__, max_linkhdr, maxlen, MCLBYTES)); if (max_linkhdr + maxlen > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; M_SETFIB(m, fibnum); if (IN6_IS_ADDR_MULTICAST(&daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_na); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len; m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor advertisement packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) { /* reply to DAD */ daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; daddr6.s6_addr16[1] = 0; daddr6.s6_addr32[1] = 0; daddr6.s6_addr32[2] = 0; daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE; if (in6_setscope(&daddr6, ifp, NULL)) goto bad; flags &= ~ND_NA_FLAG_SOLICITED; } ip6->ip6_dst = daddr6; - bzero(&dst_sa, sizeof(struct sockaddr_in6)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(struct sockaddr_in6); - dst_sa.sin6_addr = daddr6; /* * Select a source whose scope is the same as that of the dest. */ - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, NULL, NULL, &oifp, &src); + in6_splitscope(&daddr6, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " "determined: dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); + ip6_sprintf(ip6buf, &daddr6), error)); goto bad; } - ip6->ip6_src = src; + ip6->ip6_src = src6; nd_na = (struct nd_neighbor_advert *)(ip6 + 1); nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; nd_na->nd_na_code = 0; nd_na->nd_na_target = *taddr6; in6_clearscope(&nd_na->nd_na_target); /* XXX */ /* * "tlladdr" indicates NS's condition for adding tlladdr or not. * see nd6_ns_input() for details. * Basically, if NS packet is sent to unicast/anycast addr, * target lladdr option SHOULD NOT be included. */ if (tlladdr) { /* * sdl0 != NULL indicates proxy NA. If we do proxy, use * lladdr in sdl0. If we are not proxying (sending NA for * my address) use lladdr configured for the interface. */ if (sdl0 == NULL) { if (ifp->if_carp) mac = (*carp_macmatch6_p)(ifp, m, taddr6); if (mac == NULL) mac = nd6_ifptomac(ifp); } else if (sdl0->sa_family == AF_LINK) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)sdl0; if (sdl->sdl_alen == ifp->if_addrlen) mac = LLADDR(sdl); } } if (tlladdr && mac) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1); /* roundup to 8 bytes alignment! */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } else flags &= ~ND_NA_FLAG_OVERRIDE; ip6->ip6_plen = htons((u_short)icmp6len); nd_na->nd_na_flags_reserved = flags; nd_na->nd_na_cksum = 0; nd_na->nd_na_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_na->nd_na_type; m_tag_prepend(m, mtag); } ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); return; bad: m_freem(m); return; } #ifndef BURN_BRIDGES void nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0) { nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0, RT_DEFAULT_FIB); } #endif caddr_t nd6_ifptomac(struct ifnet *ifp) { switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_ISO88025: return IF_LLADDR(ifp); default: return NULL; } } struct dadq { TAILQ_ENTRY(dadq) dad_list; struct ifaddr *dad_ifa; int dad_count; /* max NS to send */ int dad_ns_tcount; /* # of trials to send NS */ int dad_ns_ocount; /* NS sent so far */ int dad_ns_icount; int dad_na_icount; int dad_ns_lcount; /* looped back NS */ int dad_loopbackprobe; /* probing state for loopback detection */ struct callout dad_timer_ch; struct vnet *dad_vnet; u_int dad_refcnt; #define ND_OPT_NONCE_LEN32 \ ((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t)) uint32_t dad_nonce[ND_OPT_NONCE_LEN32]; }; static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); static VNET_DEFINE(struct rwlock, dad_rwlock); #define V_dadq VNET(dadq) #define V_dad_rwlock VNET(dad_rwlock) #define DADQ_RLOCK() rw_rlock(&V_dad_rwlock) #define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock) #define DADQ_WLOCK() rw_wlock(&V_dad_rwlock) #define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock) static void nd6_dad_add(struct dadq *dp) { DADQ_WLOCK(); TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list); DADQ_WUNLOCK(); } static void nd6_dad_del(struct dadq *dp) { DADQ_WLOCK(); TAILQ_REMOVE(&V_dadq, dp, dad_list); DADQ_WUNLOCK(); nd6_dad_rele(dp); } static struct dadq * nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n) { struct dadq *dp; DADQ_RLOCK(); TAILQ_FOREACH(dp, &V_dadq, dad_list) { if (dp->dad_ifa != ifa) continue; /* * Skip if the nonce matches the received one. * +2 in the length is required because of type and * length fields are included in a header. */ if (n != NULL && n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 && memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0], ND_OPT_NONCE_LEN) == 0) { dp->dad_ns_lcount++; continue; } refcount_acquire(&dp->dad_refcnt); break; } DADQ_RUNLOCK(); return (dp); } static void nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns) { if (send_ns != 0) nd6_dad_ns_output(dp); callout_reset(&dp->dad_timer_ch, ticks, (void (*)(void *))nd6_dad_timer, (void *)dp); } static void nd6_dad_stoptimer(struct dadq *dp) { callout_drain(&dp->dad_timer_ch); } static void nd6_dad_rele(struct dadq *dp) { if (refcount_release(&dp->dad_refcnt)) { ifa_free(dp->dad_ifa); free(dp, M_IP6NDP); } } void nd6_dad_init(void) { rw_init(&V_dad_rwlock, "nd6 DAD queue"); TAILQ_INIT(&V_dadq); } /* * Start Duplicate Address Detection (DAD) for specified interface address. */ void nd6_dad_start(struct ifaddr *ifa, int delay) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; int send_ns; /* * If we don't need DAD, don't do it. * There are several cases: * - DAD is disabled (ip6_dad_count == 0) * - the interface address is anycast */ if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) { log(LOG_DEBUG, "nd6_dad_start: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } if (ia->ia6_flags & IN6_IFF_ANYCAST) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (!V_ip6_dad_count) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (ifa->ifa_ifp == NULL) panic("nd6_dad_start: ifa->ifa_ifp == NULL"); if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (!(ifa->ifa_ifp->if_flags & IFF_UP) || !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) || (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) { ia->ia6_flags |= IN6_IFF_TENTATIVE; return; } if ((dp = nd6_dad_find(ifa, NULL)) != NULL) { /* * DAD already in progress. Let the existing entry * to finish it. */ return; } dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO); if (dp == NULL) { log(LOG_ERR, "nd6_dad_start: memory allocation failed for " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } callout_init(&dp->dad_timer_ch, 0); #ifdef VIMAGE dp->dad_vnet = curvnet; #endif nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); /* * Send NS packet for DAD, ip6_dad_count times. * Note that we must delay the first transmission, if this is the * first packet to be sent from the interface after interface * (re)initialization. */ dp->dad_ifa = ifa; ifa_ref(dp->dad_ifa); dp->dad_count = V_ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; dp->dad_ns_lcount = dp->dad_loopbackprobe = 0; refcount_init(&dp->dad_refcnt, 1); nd6_dad_add(dp); send_ns = 0; if (delay == 0) { send_ns = 1; delay = (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000; } nd6_dad_starttimer(dp, delay, send_ns); } /* * terminate DAD unconditionally. used for address removals. */ void nd6_dad_stop(struct ifaddr *ifa) { struct dadq *dp; dp = nd6_dad_find(ifa, NULL); if (!dp) { /* DAD wasn't started yet */ return; } nd6_dad_stoptimer(dp); /* * The DAD queue entry may have been removed by nd6_dad_timer() while * we were waiting for it to stop, so re-do the lookup. */ nd6_dad_rele(dp); if (nd6_dad_find(ifa, NULL) == NULL) return; nd6_dad_del(dp); nd6_dad_rele(dp); } static void nd6_dad_timer(struct dadq *dp) { CURVNET_SET(dp->dad_vnet); struct ifaddr *ifa = dp->dad_ifa; struct ifnet *ifp = dp->dad_ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; char ip6buf[INET6_ADDRSTRLEN]; /* Sanity check */ if (ia == NULL) { log(LOG_ERR, "nd6_dad_timer: called with null parameter\n"); goto err; } if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { /* Do not need DAD for ifdisabled interface. */ log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of " "ND6_IFF_IFDISABLED.\n", ifp->if_xname); goto err; } if (ia->ia6_flags & IN6_IFF_DUPLICATED) { log(LOG_ERR, "nd6_dad_timer: called with duplicated address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto err; } if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) { log(LOG_ERR, "nd6_dad_timer: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto err; } /* Stop DAD if the interface is down even after dad_maxtry attempts. */ if ((dp->dad_ns_tcount > V_dad_maxtry) && (((ifp->if_flags & IFF_UP) == 0) || ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) { nd6log((LOG_INFO, "%s: could not run DAD " "because the interface was down or not running.\n", if_name(ifa->ifa_ifp))); goto err; } /* Need more checks? */ if (dp->dad_ns_ocount < dp->dad_count) { /* * We have more NS to go. Send NS packet for DAD. */ nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); goto done; } else { /* * We have transmitted sufficient number of DAD packets. * See what we've got. */ if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) /* We've seen NS or NA, means DAD has failed. */ nd6_dad_duplicated(ifa, dp); else if (V_dad_enhanced != 0 && dp->dad_ns_lcount > 0 && dp->dad_ns_lcount > dp->dad_loopbackprobe) { /* * Sec. 4.1 in RFC 7527 requires transmission of * additional probes until the loopback condition * becomes clear when a looped back probe is detected. */ log(LOG_ERR, "%s: a looped back NS message is " "detected during DAD for %s. " "Another DAD probes are being sent.\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, IFA_IN6(ifa))); dp->dad_loopbackprobe = dp->dad_ns_lcount; /* * Send an NS immediately and increase dad_count by * V_nd6_mmaxtries - 1. */ dp->dad_count = dp->dad_ns_ocount + V_nd6_mmaxtries - 1; nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); goto done; } else { /* * We are done with DAD. No NA came, no NS came. * No duplicate address found. Check IFDISABLED flag * again in case that it is changed between the * beginning of this function and here. */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0) ia->ia6_flags &= ~IN6_IFF_TENTATIVE; nd6log((LOG_DEBUG, "%s: DAD complete for %s - no duplicates found\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); if (dp->dad_ns_lcount > 0) log(LOG_ERR, "%s: DAD completed while " "a looped back NS message is detected " "during DAD for %s.\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, IFA_IN6(ifa))); } } err: nd6_dad_del(dp); done: CURVNET_RESTORE(); } static void nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp; char ip6buf[INET6_ADDRSTRLEN]; log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " "NS in/out/loopback=%d/%d/%d, NA in=%d\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount, dp->dad_na_icount); ia->ia6_flags &= ~IN6_IFF_TENTATIVE; ia->ia6_flags |= IN6_IFF_DUPLICATED; ifp = ifa->ifa_ifp; log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)); log(LOG_ERR, "%s: manual intervention required\n", if_name(ifp)); /* * If the address is a link-local address formed from an interface * identifier based on the hardware address which is supposed to be * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP * operation on the interface SHOULD be disabled. * [RFC 4862, Section 5.4.5] */ if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { struct in6_addr in6; /* * To avoid over-reaction, we only apply this logic when we are * very sure that hardware addresses are supposed to be unique. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_ATM: case IFT_IEEE1394: case IFT_IEEE80211: case IFT_INFINIBAND: in6 = ia->ia_addr.sin6_addr; if (in6_get_hw_ifid(ifp, &in6) == 0 && IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) { ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "%s: possible hardware address " "duplication detected, disable IPv6\n", if_name(ifp)); } break; } } } static void nd6_dad_ns_output(struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa; struct ifnet *ifp = dp->dad_ifa->ifa_ifp; int i; dp->dad_ns_tcount++; if ((ifp->if_flags & IFF_UP) == 0) { return; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { return; } dp->dad_ns_ocount++; if (V_dad_enhanced != 0) { for (i = 0; i < ND_OPT_NONCE_LEN32; i++) dp->dad_nonce[i] = arc4random(); /* * XXXHRS: Note that in the case that * DupAddrDetectTransmits > 1, multiple NS messages with * different nonces can be looped back in an unexpected * order. The current implementation recognizes only * the latest nonce on the sender side. Practically it * should work well in almost all cases. */ } nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr, (uint8_t *)&dp->dad_nonce[0]); } static void nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce) { struct in6_ifaddr *ia; struct ifnet *ifp; const struct in6_addr *taddr6; struct dadq *dp; if (ifa == NULL) panic("ifa == NULL in nd6_dad_ns_input"); ia = (struct in6_ifaddr *)ifa; ifp = ifa->ifa_ifp; taddr6 = &ia->ia_addr.sin6_addr; /* Ignore Nonce option when Enhanced DAD is disabled. */ if (V_dad_enhanced == 0) ndopt_nonce = NULL; dp = nd6_dad_find(ifa, ndopt_nonce); if (dp == NULL) return; dp->dad_ns_icount++; nd6_dad_rele(dp); } static void nd6_dad_na_input(struct ifaddr *ifa) { struct dadq *dp; if (ifa == NULL) panic("ifa == NULL in nd6_dad_na_input"); dp = nd6_dad_find(ifa, NULL); if (dp != NULL) { dp->dad_na_icount++; nd6_dad_rele(dp); } } Index: projects/clang380-import/sys/netinet6/raw_ip6.c =================================================================== --- projects/clang380-import/sys/netinet6/raw_ip6.c (revision 293686) +++ projects/clang380-import/sys/netinet6/raw_ip6.c (revision 293687) @@ -1,924 +1,905 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif /* IPSEC */ #include #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) /* * Raw interface to IP6 protocol. */ VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) extern u_long rip_sendspace; extern u_long rip_recvspace; VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat); VNET_PCPUSTAT_SYSINIT(rip6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(rip6stat); #endif /* VIMAGE */ /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip6_mrouter); /* * The various mrouter functions. */ int (*ip6_mrouter_set)(struct socket *, struct sockopt *); int (*ip6_mrouter_get)(struct socket *, struct sockopt *); int (*ip6_mrouter_done)(void); int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); int (*mrt6_ioctl)(u_long, caddr_t); /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip6_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); register struct inpcb *in6p; struct inpcb *last = 0; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; RIP6STAT_INC(rip6s_ipackets); init_sin6(&fromsa, m); /* general init */ ifp = m->m_pkthdr.rcvif; INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { /* XXX inp locking */ if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p && in6p->inp_ip_p != proto) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (jailed_without_vnet(in6p->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && prison_check_ip6(in6p->inp_cred, &ip6->ip6_dst) != 0) continue; } INP_RLOCK(in6p); if (in6p->in6p_cksum != -1) { RIP6STAT_INC(rip6s_isum); if (in6_cksum(m, proto, *offp, m->m_pkthdr.len - *offp)) { INP_RUNLOCK(in6p); RIP6STAT_INC(rip6s_badsum); continue; } } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (in6p->in6p_moptions && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If the incoming datagram is for MLD, allow it * through unconditionally to the raw socket. * * Use the M_RTALERT_MLD flag to check for MLD * traffic without having to inspect the mbuf chain * more deeply, as all MLDv1/v2 host messages MUST * contain the Router Alert option. * * In the case of MLDv1, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. im6o_mc_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if ((m->m_flags & M_RTALERT_MLD) == 0) { struct sockaddr_in6 mcaddr; bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(in6p->in6p_moptions, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); } if (blocked != MCAST_PASS) { IP6STAT_INC(ip6s_notmember); INP_RUNLOCK(in6p); continue; } } if (last != NULL) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); #ifdef IPSEC /* * Check AH/ESP integrity. */ if (n && ipsec6_in_reject(n, last)) { m_freem(n); /* Do not inject data into pcb. */ } else #endif /* IPSEC */ if (n) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); #ifdef IPSEC /* * Check AH/ESP integrity. */ if ((last != NULL) && ipsec6_in_reject(m, last)) { m_freem(m); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ INP_RUNLOCK(last); } else #endif /* IPSEC */ if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); /* Strip intermediate headers. */ m_adj(m, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); INP_RUNLOCK(last); } else { RIP6STAT_INC(rip6s_nosock); if (m->m_flags & M_MCAST) RIP6STAT_INC(rip6s_nosockmcast); if (proto == IPPROTO_NONE) m_freem(m); else { char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, prvnxtp - mtod(m, char *)); } IP6STAT_DEC(ip6s_delivered); } return (IPPROTO_DONE); } void rip6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* * If the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } (void) in6_pcbnotify(&V_ripcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } /* * Generate IPv6 header and pass packet to ip6_output. Tack on options user * may have setup with control call. */ int rip6_output(struct mbuf *m, struct socket *so, ...) { struct mbuf *control; struct m_tag *mtag; struct sockaddr_in6 *dstsock; struct in6_addr *dst; struct ip6_hdr *ip6; struct inpcb *in6p; u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *optp; struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; + int hlim = 0; struct in6_addr in6a; va_list ap; va_start(ap, so); dstsock = va_arg(ap, struct sockaddr_in6 *); control = va_arg(ap, struct mbuf *); va_end(ap); in6p = sotoinpcb(so); INP_WLOCK(in6p); dst = &dstsock->sin6_addr; if (control != NULL) { if ((error = ip6_setpktopts(control, &opt, in6p->in6p_outputopts, so->so_cred, so->so_proto->pr_protocol)) != 0) { goto bad; } optp = &opt; } else optp = in6p->in6p_outputopts; /* * Check and convert scope zone ID into internal form. * * XXX: we may still need to determine the zone later. */ if (!(so->so_state & SS_ISCONNECTED)) { if (!optp || !optp->ip6po_pktinfo || !optp->ip6po_pktinfo->ipi6_ifindex) use_defzone = V_ip6_use_defzone; if (dstsock->sin6_scope_id == 0 && !use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(dstsock, use_defzone)) != 0) goto bad; } /* * For an ICMPv6 packet, we should know its type and code to update * statistics. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6; if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { error = ENOBUFS; goto bad; } icmp6 = mtod(m, struct icmp6_hdr *); type = icmp6->icmp6_type; code = icmp6->icmp6_code; } M_PREPEND(m, sizeof(*ip6), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } ip6 = mtod(m, struct ip6_hdr *); /* * Source address selection. */ - error = in6_selectsrc(dstsock, optp, in6p, so->so_cred, - &oifp, &in6a); + error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred, + scope_ambiguous, &in6a, &hlim); + if (error) goto bad; error = prison_check_ip6(in6p->inp_cred, &in6a); if (error != 0) goto bad; ip6->ip6_src = in6a; - if (oifp && scope_ambiguous) { - /* - * Application should provide a proper zone ID or the use of - * default zone IDs should be enabled. Unfortunately, some - * applications do not behave as it should, so we need a - * workaround. Even if an appropriate ID is not determined - * (when it's required), if we can determine the outgoing - * interface. determine the zone ID based on the interface. - */ - error = in6_setscope(&dstsock->sin6_addr, oifp, NULL); - if (error != 0) - goto bad; - } ip6->ip6_dst = dstsock->sin6_addr; /* * Fill in the rest of the IPv6 header fields. */ ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (in6p->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->inp_ip_p; - ip6->ip6_hlim = in6_selecthlim(in6p, oifp); + ip6->ip6_hlim = hlim; if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; /* Compute checksum. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else off = in6p->in6p_cksum; if (plen < off + 1) { error = EINVAL; goto bad; } off += sizeof(struct ip6_hdr); n = m; while (n && n->m_len <= off) { off -= n->m_len; n = n->m_next; } if (!n) goto bad; p = (u_int16_t *)(mtod(n, caddr_t) + off); *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } /* * Send RA/RS messages to user land for protection, before sending * them to rtadvd/rtsol. */ if ((send_sendso_input_hook != NULL) && so->so_proto->pr_protocol == IPPROTO_ICMPV6) { switch (type) { case ND_ROUTER_ADVERT: case ND_ROUTER_SOLICIT: mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; m_tag_prepend(m, mtag); } } error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); ICMP6STAT_INC(icp6s_outhist[type]); } else RIP6STAT_INC(rip6s_opackets); goto freectl; bad: if (m) m_freem(m); freectl: if (control != NULL) { ip6_clearpktopts(&opt, -1); m_freem(control); } INP_WUNLOCK(in6p); return (error); } /* * Raw IPv6 socket option processing. */ int rip6_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp; int error; if (sopt->sopt_level == IPPROTO_ICMPV6) /* * XXX: is it better to call icmp6_ctloutput() directly * from protosw? */ return (icmp6_ctloutput(so, sopt)); else if (sopt->sopt_level != IPPROTO_IPV6) { if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_SETFIB) { inp = sotoinpcb(so); INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(inp); return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; } return (error); } static int rip6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct icmp6_filter *filter; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip6_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT); if (filter == NULL) return (ENOMEM); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); free(filter, M_PCB); return (error); } inp = (struct inpcb *)so->so_pcb; INP_INFO_WUNLOCK(&V_ripcbinfo); inp->inp_vflag |= INP_IPV6; inp->inp_ip_p = (long)proto; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; inp->in6p_icmp6filt = filter; ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt); INP_WUNLOCK(inp); return (0); } static void rip6_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_detach: inp == NULL")); if (so == V_ip6_mrouter && ip6_mrouter_done) ip6_mrouter_done(); /* xxx: RSVP */ INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); free(inp->in6p_icmp6filt, M_PCB); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } /* XXXRW: This can't ever be called. */ static void rip6_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_abort: inp == NULL")); soisdisconnected(so); } static void rip6_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_close: inp == NULL")); soisdisconnected(so); } static int rip6_disconnect(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL")); if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp->in6p_faddr = in6addr_any; rip6_abort(so); return (0); } static int rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct ifaddr *ifa = NULL; int error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_bind: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0) return (error); if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6) return (EADDRNOTAVAIL); if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) return (EADDRNOTAVAIL); if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { ifa_free(ifa); return (EADDRNOTAVAIL); } if (ifa != NULL) ifa_free(ifa); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); inp->in6p_laddr = addr->sin6_addr; INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; - struct ifnet *ifp = NULL; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_connect: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (TAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin6_family != AF_INET6) return (EAFNOSUPPORT); /* * Application should provide a proper zone ID or the use of default * zone IDs should be enabled. Unfortunately, some applications do * not behave as it should, so we need a workaround. Even if an * appropriate ID is not determined, we'll see if we can determine * the outgoing interface. If we can, determine the zone ID based on * the interface below. */ if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ - error = in6_selectsrc(addr, inp->in6p_outputopts, - inp, so->so_cred, &ifp, &in6a); + error = in6_selectsrc_socket(addr, inp->in6p_outputopts, + inp, so->so_cred, scope_ambiguous, &in6a, NULL); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } - /* XXX: see above */ - if (ifp && scope_ambiguous && - (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) { - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_ripcbinfo); - return (error); - } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 tmp; struct sockaddr_in6 *dst; int ret; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_send: inp == NULL")); /* Always copy sockaddr to avoid overwrites. */ /* Unlocked read. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } /* XXX */ bzero(&tmp, sizeof(tmp)); tmp.sin6_family = AF_INET6; tmp.sin6_len = sizeof(struct sockaddr_in6); INP_RLOCK(inp); bcopy(&inp->in6p_faddr, &tmp.sin6_addr, sizeof(struct in6_addr)); INP_RUNLOCK(inp); dst = &tmp; } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } if (nam->sa_len != sizeof(struct sockaddr_in6)) { m_freem(m); return (EINVAL); } tmp = *(struct sockaddr_in6 *)nam; dst = &tmp; if (dst->sin6_family == AF_UNSPEC) { /* * XXX: we allow this case for backward * compatibility to buggy applications that * rely on old (and wrong) kernel behavior. */ log(LOG_INFO, "rip6 SEND: address family is " "unspec. Assume AF_INET6\n"); dst->sin6_family = AF_INET6; } else if (dst->sin6_family != AF_INET6) { m_freem(m); return(EAFNOSUPPORT); } } ret = rip6_output(m, so, dst, control); return (ret); } struct pr_usrreqs rip6_usrreqs = { .pru_abort = rip6_abort, .pru_attach = rip6_attach, .pru_bind = rip6_bind, .pru_connect = rip6_connect, .pru_control = in6_control, .pru_detach = rip6_detach, .pru_disconnect = rip6_disconnect, .pru_peeraddr = in6_getpeeraddr, .pru_send = rip6_send, .pru_shutdown = rip6_shutdown, .pru_sockaddr = in6_getsockaddr, .pru_close = rip6_close, }; Index: projects/clang380-import/sys/netinet6/udp6_usrreq.c =================================================================== --- projects/clang380-import/sys/netinet6/udp6_usrreq.c (revision 293686) +++ projects/clang380-import/sys/netinet6/udp6_usrreq.c (revision 293687) @@ -1,1285 +1,1279 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * Copyright (c) 2014 Kevin Lo * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ * $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif /* IPSEC */ #include /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ extern struct protosw inetsw[]; static void udp6_detach(struct socket *so); static int udp6_append(struct inpcb *inp, struct mbuf *n, int off, struct sockaddr_in6 *fromsa) { struct socket *so; struct mbuf *opts; struct udpcb *up; INP_LOCK_ASSERT(inp); /* * Engage the tunneling protocol. */ up = intoudpcb(inp); if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa, up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } #ifdef IPSEC /* Check AH/ESP integrity. */ if (ipsec6_in_reject(n, inp)) { m_freem(n); return (0); } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); return (0); } #endif opts = NULL; if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(inp, n, &opts); m_adj(n, off + sizeof(struct udphdr)); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n, opts) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); m_freem(n); if (opts) m_freem(opts); UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); return (0); } int udp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ifnet *ifp; struct ip6_hdr *ip6; struct udphdr *uh; struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; int off = *offp; int cscov_partial; int plen, ulen; struct sockaddr_in6 fromsa; struct m_tag *fwd_tag; uint16_t uh_sum; uint8_t nxt; ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh)); if (!uh) return (IPPROTO_DONE); #endif UDPSTAT_INC(udps_ipackets); /* * Destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); ulen = ntohs((u_short)uh->uh_ulen); nxt = proto; cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0; if (nxt == IPPROTO_UDPLITE) { /* Zero means checksum over the complete packet. */ if (ulen == 0) ulen = plen; if (ulen == plen) cscov_partial = 0; if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } if (uh->uh_sum == 0) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } } else { if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) { UDPSTAT_INC(udps_badlen); goto badunlocked; } if (uh->uh_sum == 0) { UDPSTAT_INC(udps_nosum); goto badunlocked; } } if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) && !cscov_partial) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else uh_sum = in6_cksum_pseudo(ip6, ulen, nxt, m->m_pkthdr.csum_data); uh_sum ^= 0xffff; } else uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen); if (uh_sum != 0) { UDPSTAT_INC(udps_badsum); goto badunlocked; } /* * Construct sockaddr format source address. */ init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; pcbinfo = udp_get_inpcbinfo(nxt); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; struct inpcbhead *pcblist; struct ip6_moptions *imo; INP_INFO_RLOCK(pcbinfo); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet will not match laddr. To * handle this situation, matching is relaxed if the * receiving interface is the same as one specified in the * socket and if the destination multicast address matches * one of the multicast groups specified in the socket. */ /* * KAME note: traditionally we dropped udpiphdr from mbuf * here. We need udphdr for IPsec processing so we do that * later. */ pcblist = udp_get_pcblist(nxt); last = NULL; LIST_FOREACH(inp, pcblist, inp_list) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_lport != uh->uh_dport) continue; if (inp->inp_fport != 0 && inp->inp_fport != uh->uh_sport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src) || inp->inp_fport != uh->uh_sport) continue; } /* * XXXRW: Because we weren't holding either the inpcb * or the hash lock when we checked for a match * before, we should probably recheck now that the * inpcb lock is (supposed to be) held. */ /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ imo = inp->in6p_moptions; if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct sockaddr_in6 mcaddr; int blocked; INP_RLOCK(inp); bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(imo, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IP6STAT_INC(ip6s_notmember); if (blocked == MCAST_NOTSMEMBER || blocked == MCAST_MUTED) UDPSTAT_INC(udps_filtermcast); INP_RUNLOCK(inp); /* XXX */ continue; } INP_RUNLOCK(inp); } if (last != NULL) { struct mbuf *n; if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { INP_RLOCK(last); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, n, off, &fromsa)) goto inp_lost; INP_RUNLOCK(last); } } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids * searching through all pcbs in the common case of a * non-shared port. It assumes that an application * will never clear these options after setting them. */ if ((last->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast * or multicast datgram.) */ UDPSTAT_INC(udps_noport); UDPSTAT_INC(udps_noportmcast); goto badheadlocked; } INP_RLOCK(last); INP_INFO_RUNLOCK(pcbinfo); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, m, off, &fromsa) == 0) INP_RUNLOCK(last); inp_lost: return (IPPROTO_DONE); } /* * Locate pcb for datagram. */ /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { struct sockaddr_in6 *next_hop6; next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src, uh->uh_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? htons(next_hop6->sin6_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif); } /* Remove the tag from the packet. We don't need it anymore. */ m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP6_NEXTHOP; } else inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (inp == NULL) { if (udp_log_in_vain) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; log(LOG_INFO, "Connection attempt to UDP [%s]:%d from [%s]:%d\n", ip6_sprintf(ip6bufd, &ip6->ip6_dst), ntohs(uh->uh_dport), ip6_sprintf(ip6bufs, &ip6->ip6_src), ntohs(uh->uh_sport)); } UDPSTAT_INC(udps_noport); if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); UDPSTAT_INC(udps_noportmcast); goto badunlocked; } if (V_udp_blackhole) goto badunlocked; if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) goto badunlocked; icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return (IPPROTO_DONE); } INP_RLOCK_ASSERT(inp); up = intoudpcb(inp); if (cscov_partial) { if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) { INP_RUNLOCK(inp); m_freem(m); return (IPPROTO_DONE); } } UDP_PROBE(receive, NULL, inp, ip6, inp, uh); if (udp6_append(inp, m, off, &fromsa) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); badheadlocked: INP_INFO_RUNLOCK(pcbinfo); badunlocked: if (m) m_freem(m); return (IPPROTO_DONE); } static void udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d, struct inpcbinfo *pcbinfo) { struct udphdr uh; struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; } *uhp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* Check if we can safely examine src and dst ports. */ if (m->m_pkthdr.len < off + sizeof(*uhp)) return; bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport, (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, cmdarg, notify); } else (void)in6_pcbnotify(pcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } void udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo)); } void udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo)); } static int udp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); if (req->newlen != sizeof(addrs)) return (EINVAL); if (req->oldlen != sizeof(struct xucred)) return (EINVAL); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { INP_RLOCK_ASSERT(inp); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseesocket(req->td->td_ucred, inp->inp_socket); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); static int udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct mbuf *control, struct thread *td) { u_int32_t ulen = m->m_pkthdr.len; u_int32_t plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; - struct ifnet *oifp = NULL; int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; int error = 0; uint8_t nxt; uint16_t cscov = 0; struct ip6_pktopts *optp, opt; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; struct sockaddr_in6 tmp; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (addr6) { /* addr6 has been validated in udp6_send(). */ sin6 = (struct sockaddr_in6 *)addr6; /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; /* * Application should provide a proper zone ID or the use of * default zone IDs should be enabled. Unfortunately, some * applications do not behave as it should, so we need a * workaround. Even if an appropriate ID is not determined, * we'll see if we can determine the outgoing interface. If we * can, determine the zone ID based on the interface below. */ if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return (error); } nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; if (control) { if ((error = ip6_setpktopts(control, &opt, inp->in6p_outputopts, td->td_ucred, nxt)) != 0) goto release; optp = &opt; } else optp = inp->in6p_outputopts; if (sin6) { faddr = &sin6->sin6_addr; /* * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address * and the local port. */ if (sin6->sin6_port == 0) { error = EADDRNOTAVAIL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { /* how about ::ffff:0.0.0.0 case? */ error = EISCONN; goto release; } fport = sin6->sin6_port; /* allow 0 port */ if (IN6_IS_ADDR_V4MAPPED(faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * I believe we should explicitly discard the * packet when mapped addresses are disabled, * rather than send the packet as an IPv6 one. * If we chose the latter approach, the packet * might be sent out on the wire based on the * default route, the situation which we'd * probably want to avoid. * (20010421 jinmei@kame.net) */ error = EINVAL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { /* * when remote addr is an IPv4-mapped address, * local addr should not be an IPv6 address, * since you cannot determine how to map IPv6 * source address to IPv4. */ error = EINVAL; goto release; } af = AF_INET; } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { - error = in6_selectsrc(sin6, optp, inp, - td->td_ucred, &oifp, &in6a); + error = in6_selectsrc_socket(sin6, optp, inp, + td->td_ucred, scope_ambiguous, &in6a, NULL); if (error) goto release; - if (oifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, - oifp, NULL))) { - goto release; - } laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ if (laddr == NULL) { if (error == 0) error = EADDRNOTAVAIL; goto release; } if (inp->inp_lport == 0 && (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; goto release; } } else { if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto release; } if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * XXX: this case would happen when the * application sets the V6ONLY flag after * connecting the foreign address. * Such applications should be fixed, * so we bark here. */ log(LOG_INFO, "udp6_output: IPV6_V6ONLY " "option was set for a connected socket\n"); error = EINVAL; goto release; } else af = AF_INET; } laddr = &inp->in6p_laddr; faddr = &inp->in6p_faddr; fport = inp->inp_fport; } if (af == AF_INET) hlen = sizeof(struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto release; } /* * Stuff checksum and output datagram. */ udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; if (nxt == IPPROTO_UDPLITE) { struct udpcb *up; up = intoudpcb(inp); cscov = up->u_txcslen; if (cscov >= plen) cscov = 0; udp6->uh_ulen = htons(cscov); /* * For UDP-Lite, checksum coverage length of zero means * the entire UDPLite packet is covered by the checksum. */ cscov_partial = (cscov == 0) ? 0 : 1; } else if (plen <= 0xffff) udp6->uh_ulen = htons((u_short)plen); else udp6->uh_ulen = 0; udp6->uh_sum = 0; switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_plen = htons((u_short)plen); ip6->ip6_nxt = nxt; ip6->ip6_hlim = in6_selecthlim(inp, NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; if (cscov_partial) { if ((udp6->uh_sum = in6_cksum_partial(m, nxt, sizeof(struct ip6_hdr), plen, cscov)) == 0) udp6->uh_sum = 0xffff; } else { udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } #ifdef RSS { uint32_t hash_val, hash_type; uint8_t pr; pr = inp->inp_socket->so_proto->pr_protocol; /* * Calculate an appropriate RSS hash for UDP and * UDP Lite. * * The called function will take care of figuring out * whether a 2-tuple or 4-tuple hash is required based * on the currently configured scheme. * * Later later on connected socket values should be * cached in the inpcb and reused, rather than constantly * re-calculating it. * * UDP Lite is a different protocol number and will * likely end up being hashed as a 2-tuple until * RSS / NICs grow UDP Lite protocol awareness. */ if (rss_proto_software_hash_v6(faddr, laddr, fport, inp->inp_lport, pr, &hash_val, &hash_type) == 0) { m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); } } #endif flags = 0; #ifdef RSS /* * Don't override with the inp cached flowid. * * Until the whole UDP path is vetted, it may actually * be incorrect. */ flags |= IP_NODEFAULTFLOWID; #endif UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions, NULL, inp); break; case AF_INET: error = EAFNOSUPPORT; goto release; } goto releaseopt; release: m_freem(m); releaseopt: if (control) { ip6_clearpktopts(&opt, -1); m_freem(control); } return (error); } static void udp6_abort(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_abort: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_abort)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp == NULL, ("udp6_attach: inp != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return (error); } INP_INFO_WLOCK(pcbinfo); error = in_pcballoc(so, pcbinfo); if (error) { INP_INFO_WUNLOCK(pcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; /* just to be sure */ /* * XXX: ugly!! * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; error = udp_newudpcb(inp); if (error) { in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); return (error); } INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); return (0); } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) inp->inp_vflag |= INP_IPV4; #ifdef INET else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, td->td_ucred); goto out; } #endif } error = in6_pcbbind(inp, nam, td->td_ucred); #ifdef INET out: #endif INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); } static void udp6_close(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_close: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_disconnect)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct sockaddr_in6 *sin6; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); sin6 = (struct sockaddr_in6 *)nam; KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); /* * XXXRW: Need to clarify locking of v4/v6 flags. */ INP_WLOCK(inp); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto out; } in6_sin6_2_sin(&sin, sin6); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); goto out; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = EISCONN; goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in6_pcbconnect(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); out: INP_WUNLOCK(inp); return (error); } static void udp6_detach(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_detach: inp == NULL")); INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); udp_discardcb(up); } static int udp6_disconnect(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (void)(*pru->pru_disconnect)(so); return (0); } #endif if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto out; } INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); out: INP_WUNLOCK(inp); return (0); } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error = 0; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_send: inp == NULL")); INP_WLOCK(inp); if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; goto bad; } if (addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto bad; } } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { int hasv4addr; struct sockaddr_in6 *sin6 = 0; if (addr == 0) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; /* * XXXRW: We release UDP-layer locks before calling * udp_send() in order to avoid recursion. However, * this does mean there is a short window where inp's * fields are unstable. Could this lead to a * potential race in which the factors causing us to * select the UDPv4 output routine are invalidated? */ INP_WUNLOCK(inp); if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[nxt]].pr_usrreqs; /* addr will just be freed in sendit(). */ return ((*pru->pru_send)(so, flags, m, addr, control, td)); } } #endif #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif INP_HASH_WLOCK(pcbinfo); error = udp6_output(inp, m, addr, control, td); INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); bad: INP_WUNLOCK(inp); m_freem(m); return (error); } struct pr_usrreqs udp6_usrreqs = { .pru_abort = udp6_abort, .pru_attach = udp6_attach, .pru_bind = udp6_bind, .pru_connect = udp6_connect, .pru_control = in6_control, .pru_detach = udp6_detach, .pru_disconnect = udp6_disconnect, .pru_peeraddr = in6_mapped_peeraddr, .pru_send = udp6_send, .pru_shutdown = udp_shutdown, .pru_sockaddr = in6_mapped_sockaddr, .pru_soreceive = soreceive_dgram, .pru_sosend = sosend_dgram, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = udp6_close }; Index: projects/clang380-import/sys/netpfil/ipfw/ip_fw_table.c =================================================================== --- projects/clang380-import/sys/netpfil/ipfw/ip_fw_table.c (revision 293686) +++ projects/clang380-import/sys/netpfil/ipfw/ip_fw_table.c (revision 293687) @@ -1,3552 +1,3552 @@ /*- * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Lookup table support for ipfw. * * This file contains handlers for all generic tables' operations: * add/del/flush entries, list/dump tables etc.. * * Table data modification is protected by both UH and runtime lock * while reading configuration/data is protected by UH lock. * * Lookup algorithms for all table types are located in ip_fw_table_algo.c */ #include "opt_ipfw.h" #include #include #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include /* struct ipfw_rule_ref */ #include #include #include /* * Table has the following `type` concepts: * * `no.type` represents lookup key type (addr, ifp, uid, etc..) * vmask represents bitmask of table values which are present at the moment. * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old * single-value-for-all approach. */ struct table_config { struct named_object no; uint8_t tflags; /* type flags */ uint8_t locked; /* 1 if locked from changes */ uint8_t linked; /* 1 if already linked */ uint8_t ochanged; /* used by set swapping */ uint8_t vshared; /* 1 if using shared value array */ uint8_t spare[3]; uint32_t count; /* Number of records */ uint32_t limit; /* Max number of records */ uint32_t vmask; /* bitmask with supported values */ uint32_t ocount; /* used by set swapping */ uint64_t gencnt; /* generation count */ char tablename[64]; /* table name */ struct table_algo *ta; /* Callbacks for given algo */ void *astate; /* algorithm state */ struct table_info ti_copy; /* data to put to table_info */ struct namedobj_instance *vi; }; static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, struct table_config **tc); static struct table_config *find_table(struct namedobj_instance *ni, struct tid_info *ti); static struct table_config *alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags); static void free_table_config(struct namedobj_instance *ni, struct table_config *tc); static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref); static void link_table(struct ip_fw_chain *ch, struct table_config *tc); static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc); static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc); #define OP_ADD 1 #define OP_DEL 0 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd); static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i); static int dump_table_tentry(void *e, void *arg); static int dump_table_xentry(void *e, void *arg); static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, struct tid_info *b); static int check_table_name(const char *name); static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, struct table_config *tc, struct table_info *ti, uint32_t count); static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); static struct table_algo *find_table_algo(struct tables_config *tableconf, struct tid_info *ti, char *name); static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti); #define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) #define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) #define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ void rollback_toperation_state(struct ip_fw_chain *ch, void *object) { struct tables_config *tcfg; struct op_state *os; tcfg = CHAIN_TO_TCFG(ch); TAILQ_FOREACH(os, &tcfg->state_list, next) os->func(object, os); } void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) { struct tables_config *tcfg; tcfg = CHAIN_TO_TCFG(ch); TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next); } void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) { struct tables_config *tcfg; tcfg = CHAIN_TO_TCFG(ch); TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next); } void tc_ref(struct table_config *tc) { tc->no.refcnt++; } void tc_unref(struct table_config *tc) { tc->no.refcnt--; } static struct table_value * get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) { struct table_value *pval; pval = (struct table_value *)ch->valuestate; return (&pval[kidx]); } /* * Checks if we're able to insert/update entry @tei into table * w.r.t @tc limits. * May alter @tei to indicate insertion error / insert * options. * * Returns 0 if operation can be performed/ */ static int check_table_limit(struct table_config *tc, struct tentry_info *tei) { if (tc->limit == 0 || tc->count < tc->limit) return (0); if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { /* Notify userland on error cause */ tei->flags |= TEI_FLAGS_LIMIT; return (EFBIG); } /* * We have UPDATE flag set. * Permit updating record (if found), * but restrict adding new one since we've * already hit the limit. */ tei->flags |= TEI_FLAGS_DONTADD; return (0); } /* * Convert algorithm callback return code into * one of pre-defined states known by userland. */ static void store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num) { int flag; flag = 0; switch (error) { case 0: if (op == OP_ADD && num != 0) flag = TEI_FLAGS_ADDED; if (op == OP_DEL) flag = TEI_FLAGS_DELETED; break; case ENOENT: flag = TEI_FLAGS_NOTFOUND; break; case EEXIST: flag = TEI_FLAGS_EXISTS; break; default: flag = TEI_FLAGS_ERROR; } tei->flags |= flag; } /* * Creates and references table with default parameters. * Saves table config, algo and allocated kidx info @ptc, @pta and * @pkidx if non-zero. * Used for table auto-creation to support old binaries. * * Returns 0 on success. */ static int create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t *pkidx) { ipfw_xtable_info xi; int error; memset(&xi, 0, sizeof(xi)); /* Set default value mask for legacy clients */ xi.vmask = IPFW_VTYPE_LEGACY; error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1); if (error != 0) return (error); return (0); } /* * Find and reference existing table optionally * creating new one. * * Saves found table config into @ptc. * Note function may drop/acquire UH_WLOCK. * Returns 0 if table was found/created and referenced * or non-zero return code. */ static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc) { struct namedobj_instance *ni; struct table_config *tc; uint16_t kidx; int error; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); tc = NULL; if ((tc = find_table(ni, ti)) != NULL) { /* check table type */ if (tc->no.subtype != ti->type) return (EINVAL); if (tc->locked != 0) return (EACCES); /* Try to exit early on limit hit */ if (op == OP_ADD && count == 1 && check_table_limit(tc, tei) != 0) return (EFBIG); /* Reference and return */ tc->no.refcnt++; *ptc = tc; return (0); } if (op == OP_DEL) return (ESRCH); /* Compability mode: create new table for old clients */ if ((tei->flags & TEI_FLAGS_COMPAT) == 0) return (ESRCH); IPFW_UH_WUNLOCK(ch); error = create_table_compat(ch, ti, &kidx); IPFW_UH_WLOCK(ch); if (error != 0) return (error); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx)); /* OK, now we've got referenced table. */ *ptc = tc; return (0); } /* * Rolls back already @added to @tc entries using state array @ta_buf_m. * Assume the following layout: * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1]) * for storing deleted state */ static void rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc, struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m, uint32_t count, uint32_t added) { struct table_algo *ta; struct tentry_info *ptei; caddr_t v, vv; size_t ta_buf_sz; int error, i; uint32_t num; IPFW_UH_WLOCK_ASSERT(ch); ta = tc->ta; ta_buf_sz = ta->ta_buf_size; v = ta_buf_m; vv = v + count * ta_buf_sz; for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) { ptei = &tei[i]; if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) { /* * We have old value stored by previous * call in @ptei->value. Do add once again * to restore it. */ error = ta->add(tc->astate, tinfo, ptei, v, &num); KASSERT(error == 0, ("rollback UPDATE fail")); KASSERT(num == 0, ("rollback UPDATE fail2")); continue; } error = ta->prepare_del(ch, ptei, vv); KASSERT(error == 0, ("pre-rollback INSERT failed")); error = ta->del(tc->astate, tinfo, ptei, vv, &num); KASSERT(error == 0, ("rollback INSERT failed")); tc->count -= num; } } /* * Prepares add/del state for all @count entries in @tei. * Uses either stack buffer (@ta_buf) or allocates a new one. * Stores pointer to allocated buffer back to @ta_buf. * * Returns 0 on success. */ static int prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf) { caddr_t ta_buf_m, v; size_t ta_buf_sz, sz; struct tentry_info *ptei; int error, i; error = 0; ta_buf_sz = ta->ta_buf_size; if (count == 1) { /* Sigle add/delete, use on-stack buffer */ memset(*ta_buf, 0, TA_BUF_SZ); ta_buf_m = *ta_buf; } else { /* * Multiple adds/deletes, allocate larger buffer * * Note we need 2xcount buffer for add case: * we have hold both ADD state * and DELETE state (this may be needed * if we need to rollback all changes) */ sz = count * ta_buf_sz; ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP, M_WAITOK | M_ZERO); } v = ta_buf_m; for (i = 0; i < count; i++, v += ta_buf_sz) { ptei = &tei[i]; error = (op == OP_ADD) ? ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v); /* * Some syntax error (incorrect mask, or address, or * anything). Return error regardless of atomicity * settings. */ if (error != 0) break; } *ta_buf = ta_buf_m; return (error); } /* * Flushes allocated state for each @count entries in @tei. * Frees @ta_buf_m if differs from stack buffer @ta_buf. */ static void flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, struct tentry_info *tei, uint32_t count, int rollback, caddr_t ta_buf_m, caddr_t ta_buf) { caddr_t v; struct tentry_info *ptei; size_t ta_buf_sz; int i; ta_buf_sz = ta->ta_buf_size; /* Run cleaning callback anyway */ v = ta_buf_m; for (i = 0; i < count; i++, v += ta_buf_sz) { ptei = &tei[i]; ta->flush_entry(ch, ptei, v); if (ptei->ptv != NULL) { free(ptei->ptv, M_IPFW); ptei->ptv = NULL; } } /* Clean up "deleted" state in case of rollback */ if (rollback != 0) { v = ta_buf_m + count * ta_buf_sz; for (i = 0; i < count; i++, v += ta_buf_sz) ta->flush_entry(ch, &tei[i], v); } if (ta_buf_m != ta_buf) free(ta_buf_m, M_TEMP); } static void rollback_add_entry(void *object, struct op_state *_state) { struct ip_fw_chain *ch; struct tableop_state *ts; ts = (struct tableop_state *)_state; if (ts->tc != object && ts->ch != object) return; ch = ts->ch; IPFW_UH_WLOCK_ASSERT(ch); /* Call specifid unlockers */ rollback_table_values(ts); /* Indicate we've called */ ts->modified = 1; } /* * Adds/updates one or more entries in table @ti. * * Function may drop/reacquire UH wlock multiple times due to * items alloc, algorithm callbacks (check_space), value linkage * (new values, value storage realloc), etc.. * Other processes like other adds (which may involve storage resize), * table swaps (which changes table data and may change algo type), * table modify (which may change value mask) may be executed * simultaneously so we need to deal with it. * * The following approach was implemented: * we have per-chain linked list, protected with UH lock. * add_table_entry prepares special on-stack structure wthich is passed * to its descendants. Users add this structure to this list before unlock. * After performing needed operations and acquiring UH lock back, each user * checks if structure has changed. If true, it rolls local state back and * returns without error to the caller. * add_table_entry() on its own checks if structure has changed and restarts * its operation from the beginning (goto restart). * * Functions which are modifying fields of interest (currently * resize_shared_value_storage() and swap_tables() ) * traverses given list while holding UH lock immediately before * performing their operations calling function provided be list entry * ( currently rollback_add_entry ) which performs rollback for all necessary * state and sets appropriate values in structure indicating rollback * has happened. * * Algo interaction: * Function references @ti first to ensure table won't * disappear or change its type. * After that, prepare_add callback is called for each @tei entry. * Next, we try to add each entry under UH+WHLOCK * using add() callback. * Finally, we free all state by calling flush_entry callback * for each @tei. * * Returns 0 on success. */ int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint8_t flags, uint32_t count) { struct table_config *tc; struct table_algo *ta; uint16_t kidx; int error, first_error, i, rollback; uint32_t num, numadd; struct tentry_info *ptei; struct tableop_state ts; char ta_buf[TA_BUF_SZ]; caddr_t ta_buf_m, v; memset(&ts, 0, sizeof(ts)); ta = NULL; IPFW_UH_WLOCK(ch); /* * Find and reference existing table. */ restart: if (ts.modified != 0) { IPFW_UH_WUNLOCK(ch); flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); memset(&ts, 0, sizeof(ts)); ta = NULL; IPFW_UH_WLOCK(ch); } error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } ta = tc->ta; /* Fill in tablestate */ ts.ch = ch; ts.opstate.func = rollback_add_entry; ts.tc = tc; ts.vshared = tc->vshared; ts.vmask = tc->vmask; ts.ta = ta; ts.tei = tei; ts.count = count; rollback = 0; add_toperation_state(ch, &ts); IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); IPFW_UH_WLOCK(ch); del_toperation_state(ch, &ts); /* Drop reference we've used in first search */ tc->no.refcnt--; /* Check prepare_batch_buffer() error */ if (error != 0) goto cleanup; /* * Check if table swap has happened. * (so table algo might be changed). * Restart operation to achieve consistent behavior. */ if (ts.modified != 0) goto restart; /* * Link all values values to shared/per-table value array. * * May release/reacquire UH_WLOCK. */ error = ipfw_link_table_values(ch, &ts); if (error != 0) goto cleanup; if (ts.modified != 0) goto restart; /* * Ensure we are able to add all entries without additional * memory allocations. May release/reacquire UH_WLOCK. */ kidx = tc->no.kidx; error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); if (error != 0) goto cleanup; if (ts.modified != 0) goto restart; /* We've got valid table in @tc. Let's try to add data */ kidx = tc->no.kidx; ta = tc->ta; numadd = 0; first_error = 0; IPFW_WLOCK(ch); v = ta_buf_m; for (i = 0; i < count; i++, v += ta->ta_buf_size) { ptei = &tei[i]; num = 0; /* check limit before adding */ if ((error = check_table_limit(tc, ptei)) == 0) { error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, &num); /* Set status flag to inform userland */ store_tei_result(ptei, OP_ADD, error, num); } if (error == 0) { /* Update number of records to ease limit checking */ tc->count += num; numadd += num; continue; } if (first_error == 0) first_error = error; /* * Some error have happened. Check our atomicity * settings: continue if atomicity is not required, * rollback changes otherwise. */ if ((flags & IPFW_CTF_ATOMIC) == 0) continue; rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx), tei, ta_buf_m, count, i); rollback = 1; break; } IPFW_WUNLOCK(ch); ipfw_garbage_table_values(ch, tc, tei, count, rollback); /* Permit post-add algorithm grow/rehash. */ if (numadd != 0) check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); /* Return first error to user, if any */ error = first_error; cleanup: IPFW_UH_WUNLOCK(ch); flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); return (error); } /* * Deletes one or more entries in table @ti. * * Returns 0 on success. */ int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint8_t flags, uint32_t count) { struct table_config *tc; struct table_algo *ta; struct tentry_info *ptei; uint16_t kidx; int error, first_error, i; uint32_t num, numdel; char ta_buf[TA_BUF_SZ]; caddr_t ta_buf_m, v; /* * Find and reference existing table. */ IPFW_UH_WLOCK(ch); error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } ta = tc->ta; IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m); if (error != 0) goto cleanup; IPFW_UH_WLOCK(ch); /* Drop reference we've used in first search */ tc->no.refcnt--; /* * Check if table algo is still the same. * (changed ta may be the result of table swap). */ if (ta != tc->ta) { IPFW_UH_WUNLOCK(ch); error = EINVAL; goto cleanup; } kidx = tc->no.kidx; numdel = 0; first_error = 0; IPFW_WLOCK(ch); v = ta_buf_m; for (i = 0; i < count; i++, v += ta->ta_buf_size) { ptei = &tei[i]; num = 0; error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, &num); /* Save state for userland */ store_tei_result(ptei, OP_DEL, error, num); if (error != 0 && first_error == 0) first_error = error; tc->count -= num; numdel += num; } IPFW_WUNLOCK(ch); /* Unlink non-used values */ ipfw_garbage_table_values(ch, tc, tei, count, 0); if (numdel != 0) { /* Run post-del hook to permit shrinking */ check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); } IPFW_UH_WUNLOCK(ch); /* Return first error to user, if any */ error = first_error; cleanup: flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf); return (error); } /* * Ensure that table @tc has enough space to add @count entries without * need for reallocation. * * Callbacks order: * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize. * * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage * 3) modify (UH_WLOCK + WLOCK) - switch pointers * 4) flush_modify (UH_WLOCK) - free state, if needed * * Returns 0 on success. */ static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, struct table_config *tc, struct table_info *ti, uint32_t count) { struct table_algo *ta; uint64_t pflags; char ta_buf[TA_BUF_SZ]; int error; IPFW_UH_WLOCK_ASSERT(ch); error = 0; ta = tc->ta; if (ta->need_modify == NULL) return (0); /* Acquire reference not to loose @tc between locks/unlocks */ tc->no.refcnt++; /* * TODO: think about avoiding race between large add/large delete * operation on algorithm which implements shrinking along with * growing. */ while (true) { pflags = 0; if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { error = 0; break; } /* We have to shrink/grow table */ if (ts != NULL) add_toperation_state(ch, ts); IPFW_UH_WUNLOCK(ch); memset(&ta_buf, 0, sizeof(ta_buf)); error = ta->prepare_mod(ta_buf, &pflags); IPFW_UH_WLOCK(ch); if (ts != NULL) del_toperation_state(ch, ts); if (error != 0) break; if (ts != NULL && ts->modified != 0) { /* * Swap operation has happened * so we're currently operating on other * table data. Stop doing this. */ ta->flush_mod(ta_buf); break; } /* Check if we still need to alter table */ ti = KIDX_TO_TI(ch, tc->no.kidx); if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { IPFW_UH_WUNLOCK(ch); /* * Other thread has already performed resize. * Flush our state and return. */ ta->flush_mod(ta_buf); break; } error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); if (error == 0) { /* Do actual modification */ IPFW_WLOCK(ch); ta->modify(tc->astate, ti, ta_buf, pflags); IPFW_WUNLOCK(ch); } /* Anyway, flush data and retry */ ta->flush_mod(ta_buf); } tc->no.refcnt--; return (error); } /* * Adds or deletes record in table. * Data layout (v0): * Request: [ ip_fw3_opheader ipfw_table_xentry ] * * Returns 0 on success */ static int manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_table_xentry *xent; struct tentry_info tei; struct tid_info ti; struct table_value v; int error, hdrlen, read; hdrlen = offsetof(ipfw_table_xentry, k); /* Check minimum header size */ if (sd->valsize < (sizeof(*op3) + hdrlen)) return (EINVAL); read = sizeof(ip_fw3_opheader); /* Check if xentry len field is valid */ xent = (ipfw_table_xentry *)(op3 + 1); if (xent->len < hdrlen || xent->len + read > sd->valsize) return (EINVAL); memset(&tei, 0, sizeof(tei)); tei.paddr = &xent->k; tei.masklen = xent->masklen; ipfw_import_table_value_legacy(xent->value, &v); tei.pvalue = &v; /* Old requests compability */ tei.flags = TEI_FLAGS_COMPAT; if (xent->type == IPFW_TABLE_ADDR) { if (xent->len - hdrlen == sizeof(in_addr_t)) tei.subtype = AF_INET; else tei.subtype = AF_INET6; } memset(&ti, 0, sizeof(ti)); ti.uidx = xent->tbl; ti.type = xent->type; error = (op3->opcode == IP_FW_TABLE_XADD) ? add_table_entry(ch, &ti, &tei, 0, 1) : del_table_entry(ch, &ti, &tei, 0, 1); return (error); } /* * Adds or deletes record in table. * Data layout (v1)(current): * Request: [ ipfw_obj_header * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] * ] * * Returns 0 on success */ static int manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent, *ptent; ipfw_obj_ctlv *ctlv; ipfw_obj_header *oh; struct tentry_info *ptei, tei, *tei_buf; struct tid_info ti; int error, i, kidx, read; /* Check minimum header size */ if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) return (EINVAL); /* Check if passed data is too long */ if (sd->valsize != sd->kavail) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); read = sizeof(*oh); ctlv = (ipfw_obj_ctlv *)(oh + 1); if (ctlv->head.length + read != sd->valsize) return (EINVAL); read += sizeof(*ctlv); tent = (ipfw_obj_tentry *)(ctlv + 1); if (ctlv->count * sizeof(*tent) + read != sd->valsize) return (EINVAL); if (ctlv->count == 0) return (0); /* * Mark entire buffer as "read". * This instructs sopt api write it back * after function return. */ ipfw_get_sopt_header(sd, sd->valsize); /* Perform basic checks for each entry */ ptent = tent; kidx = tent->idx; for (i = 0; i < ctlv->count; i++, ptent++) { if (ptent->head.length != sizeof(*ptent)) return (EINVAL); if (ptent->idx != kidx) return (ENOTSUP); } /* Convert data into kernel request objects */ objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = kidx; /* Use on-stack buffer for single add/del */ if (ctlv->count == 1) { memset(&tei, 0, sizeof(tei)); tei_buf = &tei; } else tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP, M_WAITOK | M_ZERO); ptei = tei_buf; ptent = tent; for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { ptei->paddr = &ptent->k; ptei->subtype = ptent->subtype; ptei->masklen = ptent->masklen; if (ptent->head.flags & IPFW_TF_UPDATE) ptei->flags |= TEI_FLAGS_UPDATE; ipfw_import_table_value_v1(&ptent->v.value); ptei->pvalue = (struct table_value *)&ptent->v.value; } error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) : del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count); /* Translate result back to userland */ ptei = tei_buf; ptent = tent; for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { if (ptei->flags & TEI_FLAGS_ADDED) ptent->result = IPFW_TR_ADDED; else if (ptei->flags & TEI_FLAGS_DELETED) ptent->result = IPFW_TR_DELETED; else if (ptei->flags & TEI_FLAGS_UPDATED) ptent->result = IPFW_TR_UPDATED; else if (ptei->flags & TEI_FLAGS_LIMIT) ptent->result = IPFW_TR_LIMIT; else if (ptei->flags & TEI_FLAGS_ERROR) ptent->result = IPFW_TR_ERROR; else if (ptei->flags & TEI_FLAGS_NOTFOUND) ptent->result = IPFW_TR_NOTFOUND; else if (ptei->flags & TEI_FLAGS_EXISTS) ptent->result = IPFW_TR_EXISTS; ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value); } if (tei_buf != &tei) free(tei_buf, M_TEMP); return (error); } /* * Looks up an entry in given table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_tentry ] * Reply: [ ipfw_obj_header ipfw_obj_tentry ] * * Returns 0 on success */ static int find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent; ipfw_obj_header *oh; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct table_info *kti; struct namedobj_instance *ni; int error; size_t sz; /* Check minimum header size */ sz = sizeof(*oh) + sizeof(*tent); if (sd->valsize != sz) return (EINVAL); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); tent = (ipfw_obj_tentry *)(oh + 1); /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = tent->idx; IPFW_UH_RLOCK(ch); ni = CHAIN_TO_NI(ch); /* * Find existing table and check its type . */ ta = NULL; if ((tc = find_table(ni, &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } /* check table type */ if (tc->no.subtype != ti.type) { IPFW_UH_RUNLOCK(ch); return (EINVAL); } kti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; if (ta->find_tentry == NULL) return (ENOTSUP); error = ta->find_tentry(tc->astate, kti, tent); IPFW_UH_RUNLOCK(ch); return (error); } /* * Flushes all entries or destroys given table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; struct _ipfw_obj_header *oh; struct tid_info ti; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (struct _ipfw_obj_header *)op3; objheader_to_ti(oh, &ti); if (op3->opcode == IP_FW_TABLE_XDESTROY) error = destroy_table(ch, &ti); else if (op3->opcode == IP_FW_TABLE_XFLUSH) error = flush_table(ch, &ti); else return (ENOTSUP); return (error); } static void restart_flush(void *object, struct op_state *_state) { struct tableop_state *ts; ts = (struct tableop_state *)_state; if (ts->tc != object) return; /* Indicate we've called */ ts->modified = 1; } /* * Flushes given table. * * Function create new table instance with the same * parameters, swaps it with old one and * flushes state without holding runtime WLOCK. * * Returns 0 on success. */ int flush_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; struct table_info ti_old, ti_new, *tablestate; void *astate_old, *astate_new; char algostate[64], *pstate; struct tableop_state ts; int error, need_gc; uint16_t kidx; uint8_t tflags; /* * Stage 1: save table algoritm. * Reference found table to ensure it won't disappear. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } need_gc = 0; astate_new = NULL; memset(&ti_new, 0, sizeof(ti_new)); restart: /* Set up swap handler */ memset(&ts, 0, sizeof(ts)); ts.opstate.func = restart_flush; ts.tc = tc; ta = tc->ta; /* Do not flush readonly tables */ if ((ta->flags & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } /* Save startup algo parameters */ if (ta->print_config != NULL) { ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), algostate, sizeof(algostate)); pstate = algostate; } else pstate = NULL; tflags = tc->tflags; tc->no.refcnt++; add_toperation_state(ch, &ts); IPFW_UH_WUNLOCK(ch); /* * Stage 1.5: if this is not the first attempt, destroy previous state */ if (need_gc != 0) { ta->destroy(astate_new, &ti_new); need_gc = 0; } /* * Stage 2: allocate new table instance using same algo. */ memset(&ti_new, 0, sizeof(struct table_info)); error = ta->init(ch, &astate_new, &ti_new, pstate, tflags); /* * Stage 3: swap old state pointers with newly-allocated ones. * Decrease refcount. */ IPFW_UH_WLOCK(ch); tc->no.refcnt--; del_toperation_state(ch, &ts); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } /* * Restart operation if table swap has happened: * even if algo may be the same, algo init parameters * may change. Restart operation instead of doing * complex checks. */ if (ts.modified != 0) { /* Delay destroying data since we're holding UH lock */ need_gc = 1; goto restart; } ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; tablestate = (struct table_info *)ch->tablestate; IPFW_WLOCK(ch); ti_old = tablestate[kidx]; tablestate[kidx] = ti_new; IPFW_WUNLOCK(ch); astate_old = tc->astate; tc->astate = astate_new; tc->ti_copy = ti_new; tc->count = 0; /* Notify algo on real @ti address */ if (ta->change_ti != NULL) ta->change_ti(tc->astate, &tablestate[kidx]); /* * Stage 4: unref values. */ ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old); IPFW_UH_WUNLOCK(ch); /* * Stage 5: perform real flush/destroy. */ ta->destroy(astate_old, &ti_old); return (0); } /* * Swaps two tables. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_ntlv ] * * Returns 0 on success */ static int swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; struct _ipfw_obj_header *oh; struct tid_info ti_a, ti_b; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv)) return (EINVAL); oh = (struct _ipfw_obj_header *)op3; ntlv_to_ti(&oh->ntlv, &ti_a); ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b); error = swap_tables(ch, &ti_a, &ti_b); return (error); } /* * Swaps two tables of the same type/valtype. * * Checks if tables are compatible and limits * permits swap, than actually perform swap. * * Each table consists of 2 different parts: * config: * @tc (with name, set, kidx) and rule bindings, which is "stable". * number of items * table algo * runtime: * runtime data @ti (ch->tablestate) * runtime cache in @tc * algo-specific data (@tc->astate) * * So we switch: * all runtime data * number of items * table algo * * After that we call @ti change handler for each table. * * Note that referencing @tc won't protect tc->ta from change. * XXX: Do we need to restrict swap between locked tables? * XXX: Do we need to exchange ftype? * * Returns 0 on success. */ static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, struct tid_info *b) { struct namedobj_instance *ni; struct table_config *tc_a, *tc_b; struct table_algo *ta; struct table_info ti, *tablestate; void *astate; uint32_t count; /* * Stage 1: find both tables and ensure they are of * the same type. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc_a = find_table(ni, a)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } if ((tc_b = find_table(ni, b)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* It is very easy to swap between the same table */ if (tc_a == tc_b) { IPFW_UH_WUNLOCK(ch); return (0); } /* Check type and value are the same */ if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) { IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Check limits before swap */ if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) || (tc_b->limit != 0 && tc_a->count > tc_b->limit)) { IPFW_UH_WUNLOCK(ch); return (EFBIG); } /* Check if one of the tables is readonly */ if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } /* Notify we're going to swap */ rollback_toperation_state(ch, tc_a); rollback_toperation_state(ch, tc_b); /* Everything is fine, prepare to swap */ tablestate = (struct table_info *)ch->tablestate; ti = tablestate[tc_a->no.kidx]; ta = tc_a->ta; astate = tc_a->astate; count = tc_a->count; IPFW_WLOCK(ch); /* a <- b */ tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx]; tc_a->ta = tc_b->ta; tc_a->astate = tc_b->astate; tc_a->count = tc_b->count; /* b <- a */ tablestate[tc_b->no.kidx] = ti; tc_b->ta = ta; tc_b->astate = astate; tc_b->count = count; IPFW_WUNLOCK(ch); /* Ensure tc.ti copies are in sync */ tc_a->ti_copy = tablestate[tc_a->no.kidx]; tc_b->ti_copy = tablestate[tc_b->no.kidx]; /* Notify both tables on @ti change */ if (tc_a->ta->change_ti != NULL) tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]); if (tc_b->ta->change_ti != NULL) tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]); IPFW_UH_WUNLOCK(ch); return (0); } /* * Destroys table specified by @ti. * Data layout (v0)(current): * Request: [ ip_fw3_opheader ] * * Returns 0 on success */ static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* Do not permit destroying referenced tables */ if (tc->no.refcnt > 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } IPFW_WLOCK(ch); unlink_table(ch, tc); IPFW_WUNLOCK(ch); /* Free obj index */ if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) printf("Error unlinking kidx %d from table %s\n", tc->no.kidx, tc->tablename); /* Unref values used in tables while holding UH lock */ ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy); IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (0); } static uint32_t roundup2p(uint32_t v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return (v); } /* * Grow tables index. * * Returns 0 on success. */ int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) { unsigned int ntables_old, tbl; struct namedobj_instance *ni; void *new_idx, *old_tablestate, *tablestate; struct table_info *ti; struct table_config *tc; int i, new_blocks; /* Check new value for validity */ if (ntables == 0) return (EINVAL); if (ntables > IPFW_TABLES_MAX) ntables = IPFW_TABLES_MAX; /* Alight to nearest power of 2 */ ntables = (unsigned int)roundup2p(ntables); /* Allocate new pointers */ tablestate = malloc(ntables * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); IPFW_UH_WLOCK(ch); tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; ni = CHAIN_TO_NI(ch); /* Temporary restrict decreasing max_tables */ if (ntables < V_fw_tables_max) { /* * FIXME: Check if we really can shrink */ IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Copy table info/indices */ memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); IPFW_WLOCK(ch); /* Change pointers */ old_tablestate = ch->tablestate; ch->tablestate = tablestate; ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); ntables_old = V_fw_tables_max; V_fw_tables_max = ntables; IPFW_WUNLOCK(ch); /* Notify all consumers that their @ti pointer has changed */ ti = (struct table_info *)ch->tablestate; for (i = 0; i < tbl; i++, ti++) { if (ti->lookup == NULL) continue; tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); if (tc == NULL || tc->ta->change_ti == NULL) continue; tc->ta->change_ti(tc->astate, ti); } IPFW_UH_WUNLOCK(ch); /* Free old pointers */ free(old_tablestate, M_IPFW); ipfw_objhash_bitmap_free(new_idx, new_blocks); return (0); } /* * Switch between "set 0" and "rule's set" table binding, * Check all ruleset bindings and permits changing * IFF each binding has both rule AND table in default set (set 0). * * Returns 0 on success. */ int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets) { struct namedobj_instance *ni; struct named_object *no; struct ip_fw *rule; ipfw_insn *cmd; int cmdlen, i, l; uint16_t kidx; IPFW_UH_WLOCK(ch); if (V_fw_tables_sets == sets) { IPFW_UH_WUNLOCK(ch); return (0); } ni = CHAIN_TO_NI(ch); /* * Scan all rules and examine tables opcodes. */ for (i = 0; i < ch->n_rules; i++) { rule = ch->map[i]; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); /* Check if both table object and rule has the set 0 */ if (no->set != 0 || rule->set != 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } } } V_fw_tables_sets = sets; IPFW_UH_WUNLOCK(ch); return (0); } /* * Lookup an IP @addr in table @tbl. * Stores found value in @val. * * Returns 1 if @addr was found. */ int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { struct table_info *ti; ti = KIDX_TO_TI(ch, tbl); return (ti->lookup(ti, &addr, sizeof(in_addr_t), val)); } /* * Lookup an arbtrary key @paddr of legth @plen in table @tbl. * Stores found value in @val. * * Returns 1 if key was found. */ int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val) { struct table_info *ti; ti = KIDX_TO_TI(ch, tbl); return (ti->lookup(ti, paddr, plen, val)); } /* * Info/List/dump support for tables. * */ /* * High-level 'get' cmds sysctl handlers */ /* * Lists all tables currently available in kernel. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] * * Returns 0 on success */ static int list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; int error; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); error = export_tables(ch, olh, sd); IPFW_UH_RUNLOCK(ch); return (error); } /* * Store table info to buffer provided by @sd. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] * Reply: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success. */ static int describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; struct table_config *tc; struct tid_info ti; size_t sz; sz = sizeof(*oh) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); IPFW_UH_RUNLOCK(ch); return (0); } /* * Modifies existing table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success */ static int modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; char *tname; struct tid_info ti; struct namedobj_instance *ni; struct table_config *tc; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); oh = (struct _ipfw_obj_header *)sd->kbuf; i = (ipfw_xtable_info *)(oh + 1); /* * Verify user-supplied strings. * Check for null-terminated/zero-length strings/ */ tname = oh->ntlv.name; if (check_table_name(tname) != 0) return (EINVAL); objheader_to_ti(oh, &ti); ti.type = i->type; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, &ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* Do not support any modifications for readonly tables */ if ((tc->ta->flags & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0) tc->limit = i->limit; if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0) tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0); IPFW_UH_WUNLOCK(ch); return (0); } /* * Creates new table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success */ static int create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; char *tname, *aname; struct tid_info ti; struct namedobj_instance *ni; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); oh = (struct _ipfw_obj_header *)sd->kbuf; i = (ipfw_xtable_info *)(oh + 1); /* * Verify user-supplied strings. * Check for null-terminated/zero-length strings/ */ tname = oh->ntlv.name; aname = i->algoname; if (check_table_name(tname) != 0 || strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) return (EINVAL); if (aname[0] == '\0') { /* Use default algorithm */ aname = NULL; } objheader_to_ti(oh, &ti); ti.type = i->type; ni = CHAIN_TO_NI(ch); IPFW_UH_RLOCK(ch); if (find_table(ni, &ti) != NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } IPFW_UH_RUNLOCK(ch); return (create_table_internal(ch, &ti, aname, i, NULL, 0)); } /* * Creates new table based on @ti and @aname. * * Relies on table name checking inside find_name_tlv() * Assume @aname to be checked and valid. * Stores allocated table kidx inside @pkidx (if non-NULL). * Reference created table if @compat is non-zero. * * Returns 0 on success. */ static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat) { struct namedobj_instance *ni; struct table_config *tc, *tc_new, *tmp; struct table_algo *ta; uint16_t kidx; ni = CHAIN_TO_NI(ch); ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); if (ta == NULL) return (ENOTSUP); tc = alloc_table_config(ch, ti, ta, aname, i->tflags); if (tc == NULL) return (ENOMEM); tc->vmask = i->vmask; tc->limit = i->limit; if (ta->flags & TA_FLAG_READONLY) tc->locked = 1; else tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; IPFW_UH_WLOCK(ch); /* Check if table has been already created */ tc_new = find_table(ni, ti); if (tc_new != NULL) { /* * Compat: do not fail if we're * requesting to create existing table * which has the same type */ if (compat == 0 || tc_new->no.subtype != tc->no.subtype) { IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (EEXIST); } /* Exchange tc and tc_new for proper refcounting & freeing */ tmp = tc; tc = tc_new; tc_new = tmp; } else { /* New table */ if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { IPFW_UH_WUNLOCK(ch); printf("Unable to allocate table index." " Consider increasing net.inet.ip.fw.tables_max"); free_table_config(ni, tc); return (EBUSY); } tc->no.kidx = kidx; tc->no.etlv = IPFW_TLV_TBL_NAME; IPFW_WLOCK(ch); link_table(ch, tc); IPFW_WUNLOCK(ch); } if (compat != 0) tc->no.refcnt++; if (pkidx != NULL) *pkidx = tc->no.kidx; IPFW_UH_WUNLOCK(ch); if (tc_new != NULL) free_table_config(ni, tc_new); return (0); } static void ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti) { memset(ti, 0, sizeof(struct tid_info)); ti->set = ntlv->set; ti->uidx = ntlv->idx; ti->tlvs = ntlv; ti->tlen = ntlv->head.length; } static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) { ntlv_to_ti(&oh->ntlv, ti); } struct namedobj_instance * ipfw_get_table_objhash(struct ip_fw_chain *ch) { return (CHAIN_TO_NI(ch)); } /* * Exports basic table info as name TLV. * Used inside dump_static_rules() to provide info * about all tables referenced by current ruleset. * * Returns 0 on success. */ int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, struct sockopt_data *sd) { struct namedobj_instance *ni; struct named_object *no; ipfw_obj_ntlv *ntlv; ni = CHAIN_TO_NI(ch); no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("invalid table kidx passed")); ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) return (ENOMEM); ntlv->head.type = IPFW_TLV_TBL_NAME; ntlv->head.length = sizeof(*ntlv); ntlv->idx = no->kidx; strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); return (0); } struct dump_args { struct ip_fw_chain *ch; struct table_info *ti; struct table_config *tc; struct sockopt_data *sd; uint32_t cnt; uint16_t uidx; int error; uint32_t size; ipfw_table_entry *ent; ta_foreach_f *f; void *farg; ipfw_obj_tentry tent; }; static int count_ext_entries(void *e, void *arg) { struct dump_args *da; da = (struct dump_args *)arg; da->cnt++; return (0); } /* * Gets number of items from table either using * internal counter or calling algo callback for * externally-managed tables. * * Returns number of records. */ static uint32_t table_get_count(struct ip_fw_chain *ch, struct table_config *tc) { struct table_info *ti; struct table_algo *ta; struct dump_args da; ti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; /* Use internal counter for self-managed tables */ if ((ta->flags & TA_FLAG_READONLY) == 0) return (tc->count); /* Use callback to quickly get number of items */ if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0) return (ta->get_count(tc->astate, ti)); /* Count number of iterms ourselves */ memset(&da, 0, sizeof(da)); ta->foreach(tc->astate, ti, count_ext_entries, &da); return (da.cnt); } /* * Exports table @tc info into standard ipfw_xtable_info format. */ static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i) { struct table_info *ti; struct table_algo *ta; i->type = tc->no.subtype; i->tflags = tc->tflags; i->vmask = tc->vmask; i->set = tc->no.set; i->kidx = tc->no.kidx; i->refcnt = tc->no.refcnt; i->count = table_get_count(ch, tc); i->limit = tc->limit; i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; - i->size = tc->count * sizeof(ipfw_obj_tentry); + i->size = i->count * sizeof(ipfw_obj_tentry); i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); ti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; if (ta->print_config != NULL) { /* Use algo function to print table config to string */ ta->print_config(tc->astate, ti, i->algoname, sizeof(i->algoname)); } else strlcpy(i->algoname, ta->name, sizeof(i->algoname)); /* Dump algo-specific data, if possible */ if (ta->dump_tinfo != NULL) { ta->dump_tinfo(tc->astate, ti, &i->ta_info); i->ta_info.flags |= IPFW_TATFLAGS_DATA; } } struct dump_table_args { struct ip_fw_chain *ch; struct sockopt_data *sd; }; static void export_table_internal(struct namedobj_instance *ni, struct named_object *no, void *arg) { ipfw_xtable_info *i; struct dump_table_args *dta; dta = (struct dump_table_args *)arg; i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); KASSERT(i != 0, ("previously checked buffer is not enough")); export_table_info(dta->ch, (struct table_config *)no, i); } /* * Export all tables as ipfw_xtable_info structures to * storage provided by @sd. * * If supplied buffer is too small, fills in required size * and returns ENOMEM. * Returns 0 on success. */ static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd) { uint32_t size; uint32_t count; struct dump_table_args dta; count = ipfw_objhash_count(CHAIN_TO_NI(ch)); size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_xtable_info); if (size > olh->size) { olh->size = size; return (ENOMEM); } olh->size = size; dta.ch = ch; dta.sd = sd; ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); return (0); } /* * Dumps all table data * Data layout (v1)(current): * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] * * Returns 0 on success */ static int dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; uint32_t sz; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); i = (ipfw_xtable_info *)(oh + 1); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, i); if (sd->valsize < i->size) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* * Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); IPFW_UH_RUNLOCK(ch); return (da.error); } /* * Dumps all table data * Data layout (version 0)(legacy): * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() * Reply: [ ipfw_xtable ipfw_table_xentry x N ] * * Returns 0 on success */ static int dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_xtable *xtbl; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; size_t sz, count; xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); if (xtbl == NULL) return (EINVAL); memset(&ti, 0, sizeof(ti)); ti.uidx = xtbl->tbl; IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (0); } count = table_get_count(ch, tc); sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); xtbl->cnt = count; xtbl->size = sz; xtbl->type = tc->no.subtype; xtbl->tbl = ti.uidx; if (sd->valsize < sz) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); IPFW_UH_RUNLOCK(ch); return (0); } /* * Legacy function to retrieve number of items in table. */ static int get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { uint32_t *tbl; struct tid_info ti; size_t sz; int error; sz = sizeof(*op3) + sizeof(uint32_t); op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz); if (op3 == NULL) return (EINVAL); tbl = (uint32_t *)(op3 + 1); memset(&ti, 0, sizeof(ti)); ti.uidx = *tbl; IPFW_UH_RLOCK(ch); error = ipfw_count_xtable(ch, &ti, tbl); IPFW_UH_RUNLOCK(ch); return (error); } /* * Legacy IP_FW_TABLE_GETSIZE handler */ int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (ESRCH); *cnt = table_get_count(ch, tc); return (0); } /* * Legacy IP_FW_TABLE_XGETSIZE handler */ int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; uint32_t count; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { *cnt = 0; return (0); /* 'table all list' requires success */ } count = table_get_count(ch, tc); *cnt = count * sizeof(ipfw_table_xentry); if (count > 0) *cnt += sizeof(ipfw_xtable); return (0); } static int dump_table_entry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_entry *ent; struct table_value *pval; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; /* Out of memory, returning */ if (da->cnt == da->size) return (1); ent = da->ent++; ent->tbl = da->uidx; da->cnt++; error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); if (error != 0) return (error); ent->addr = da->tent.k.addr.s_addr; ent->masklen = da->tent.masklen; pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); ent->value = ipfw_export_table_value_legacy(pval); return (0); } /* * Dumps table in pre-8.1 legacy format. */ int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, ipfw_table *tbl) { struct table_config *tc; struct table_algo *ta; struct dump_args da; tbl->cnt = 0; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (0); /* XXX: We should return ESRCH */ ta = tc->ta; /* This dump format supports IPv4 only */ if (tc->no.subtype != IPFW_TABLE_ADDR) return (0); memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.ent = &tbl->ent[0]; da.size = tbl->size; tbl->cnt = 0; ta->foreach(tc->astate, da.ti, dump_table_entry, &da); tbl->cnt = da.cnt; return (0); } /* * Dumps table entry in eXtended format (v1)(current). */ static int dump_table_tentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; struct table_value *pval; ipfw_obj_tentry *tent; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); /* Out of memory, returning */ if (tent == NULL) { da->error = ENOMEM; return (1); } tent->head.length = sizeof(ipfw_obj_tentry); tent->idx = da->uidx; error = ta->dump_tentry(tc->astate, da->ti, e, tent); if (error != 0) return (error); pval = get_table_value(da->ch, da->tc, tent->v.kidx); ipfw_export_table_value_v1(pval, &tent->v.value); return (0); } /* * Dumps table entry in eXtended format (v0). */ static int dump_table_xentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_xentry *xent; ipfw_obj_tentry *tent; struct table_value *pval; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); /* Out of memory, returning */ if (xent == NULL) return (1); xent->len = sizeof(ipfw_table_xentry); xent->tbl = da->uidx; memset(&da->tent, 0, sizeof(da->tent)); tent = &da->tent; error = ta->dump_tentry(tc->astate, da->ti, e, tent); if (error != 0) return (error); /* Convert current format to previous one */ xent->masklen = tent->masklen; pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); xent->value = ipfw_export_table_value_legacy(pval); /* Apply some hacks */ if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) { xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; xent->flags = IPFW_TCF_INET; } else memcpy(&xent->k, &tent->k, sizeof(xent->k)); return (0); } /* * Helper function to export table algo data * to tentry format before calling user function. * * Returns 0 on success. */ static int prepare_table_tentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); if (error != 0) return (error); da->f(&da->tent, da->farg); return (0); } /* * Allow external consumers to read table entries in standard format. */ int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx, ta_foreach_f *f, void *arg) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; struct dump_args da; ni = CHAIN_TO_NI(ch); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); if (tc == NULL) return (ESRCH); ta = tc->ta; memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.f = f; da.farg = arg; ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da); return (0); } /* * Table algorithms */ /* * Finds algoritm by index, table type or supplied name. * * Returns pointer to algo or NULL. */ static struct table_algo * find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) { int i, l; struct table_algo *ta; if (ti->type > IPFW_TABLE_MAXTYPE) return (NULL); /* Search by index */ if (ti->atype != 0) { if (ti->atype > tcfg->algo_count) return (NULL); return (tcfg->algo[ti->atype]); } if (name == NULL) { /* Return default algorithm for given type if set */ return (tcfg->def_algo[ti->type]); } /* Search by name */ /* TODO: better search */ for (i = 1; i <= tcfg->algo_count; i++) { ta = tcfg->algo[i]; /* * One can supply additional algorithm * parameters so we compare only the first word * of supplied name: * 'addr:chash hsize=32' * '^^^^^^^^^' * */ l = strlen(ta->name); if (strncmp(name, ta->name, l) != 0) continue; if (name[l] != '\0' && name[l] != ' ') continue; /* Check if we're requesting proper table type */ if (ti->type != 0 && ti->type != ta->type) return (NULL); return (ta); } return (NULL); } /* * Register new table algo @ta. * Stores algo id inside @idx. * * Returns 0 on success. */ int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, int *idx) { struct tables_config *tcfg; struct table_algo *ta_new; size_t sz; if (size > sizeof(struct table_algo)) return (EINVAL); /* Check for the required on-stack size for add/del */ sz = roundup2(ta->ta_buf_size, sizeof(void *)); if (sz > TA_BUF_SZ) return (EINVAL); KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); /* Copy algorithm data to stable storage. */ ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); memcpy(ta_new, ta, size); tcfg = CHAIN_TO_TCFG(ch); KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); tcfg->algo[++tcfg->algo_count] = ta_new; ta_new->idx = tcfg->algo_count; /* Set algorithm as default one for given type */ if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && tcfg->def_algo[ta_new->type] == NULL) tcfg->def_algo[ta_new->type] = ta_new; *idx = ta_new->idx; return (0); } /* * Unregisters table algo using @idx as id. * XXX: It is NOT safe to call this function in any place * other than ipfw instance destroy handler. */ void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) { struct tables_config *tcfg; struct table_algo *ta; tcfg = CHAIN_TO_TCFG(ch); KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", idx, tcfg->algo_count)); ta = tcfg->algo[idx]; KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); if (tcfg->def_algo[ta->type] == ta) tcfg->def_algo[ta->type] = NULL; free(ta, M_IPFW); } /* * Lists all table algorithms currently available. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] * * Returns 0 on success */ static int list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; struct tables_config *tcfg; ipfw_ta_info *i; struct table_algo *ta; uint32_t count, n, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); tcfg = CHAIN_TO_TCFG(ch); count = tcfg->algo_count; size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_ta_info); if (size > olh->size) { olh->size = size; IPFW_UH_RUNLOCK(ch); return (ENOMEM); } olh->size = size; for (n = 1; n <= count; n++) { i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); KASSERT(i != 0, ("previously checked buffer is not enough")); ta = tcfg->algo[n]; strlcpy(i->algoname, ta->name, sizeof(i->algoname)); i->type = ta->type; i->refcnt = ta->refcnt; } IPFW_UH_RUNLOCK(ch); return (0); } static int classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { /* Basic IPv4/IPv6 or u32 lookups */ *puidx = cmd->arg1; /* Assume ADDR by default */ *ptype = IPFW_TABLE_ADDR; int v; if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { /* * generic lookup. The key must be * in 32bit big-endian format. */ v = ((ipfw_insn_u32 *)cmd)->d[1]; switch (v) { case 0: case 1: /* IPv4 src/dst */ break; case 2: case 3: /* src/dst port */ *ptype = IPFW_TABLE_NUMBER; break; case 4: /* uid/gid */ *ptype = IPFW_TABLE_NUMBER; break; case 5: /* jid */ *ptype = IPFW_TABLE_NUMBER; break; case 6: /* dscp */ *ptype = IPFW_TABLE_NUMBER; break; } } return (0); } static int classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { ipfw_insn_if *cmdif; /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') return (1); *ptype = IPFW_TABLE_INTERFACE; *puidx = cmdif->p.kidx; return (0); } static int classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { *puidx = cmd->arg1; *ptype = IPFW_TABLE_FLOW; return (0); } static void update_arg1(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; } static void update_via(ipfw_insn *cmd, uint16_t idx) { ipfw_insn_if *cmdif; cmdif = (ipfw_insn_if *)cmd; cmdif->p.kidx = idx; } static int table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { struct table_config *tc; int error; IPFW_UH_WLOCK_ASSERT(ch); error = find_table_err(CHAIN_TO_NI(ch), ti, &tc); if (error != 0) return (error); *pno = &tc->no; return (0); } /* XXX: sets-sets! */ static struct named_object * table_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { struct namedobj_instance *ni; struct table_config *tc; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx); KASSERT(tc != NULL, ("Table with index %d not found", idx)); return (&tc->no); } static struct opcode_obj_rewrite opcodes[] = { { O_IP_SRC_LOOKUP, IPFW_TLV_TBL_NAME, classify_srcdst, update_arg1, table_findbyname, table_findbykidx, create_table_compat }, { O_IP_DST_LOOKUP, IPFW_TLV_TBL_NAME, classify_srcdst, update_arg1, table_findbyname, table_findbykidx, create_table_compat }, { O_IP_FLOW_LOOKUP, IPFW_TLV_TBL_NAME, classify_flow, update_arg1, table_findbyname, table_findbykidx, create_table_compat }, { O_XMIT, IPFW_TLV_TBL_NAME, classify_via, update_via, table_findbyname, table_findbykidx, create_table_compat }, { O_RECV, IPFW_TLV_TBL_NAME, classify_via, update_via, table_findbyname, table_findbykidx, create_table_compat }, { O_VIA, IPFW_TLV_TBL_NAME, classify_via, update_via, table_findbyname, table_findbykidx, create_table_compat }, }; /* * Checks table name for validity. * Enforce basic length checks, the rest * should be done in userland. * * Returns 0 if name is considered valid. */ static int check_table_name(const char *name) { /* * TODO: do some more complicated checks */ return (ipfw_check_object_name_generic(name)); } /* * Find tablename TLV by @uid. * Check @tlvs for valid data inside. * * Returns pointer to found TLV or NULL. */ static ipfw_obj_ntlv * find_name_tlv(void *tlvs, int len, uint16_t uidx) { ipfw_obj_ntlv *ntlv; uintptr_t pa, pe; int l; pa = (uintptr_t)tlvs; pe = pa + len; l = 0; for (; pa < pe; pa += l) { ntlv = (ipfw_obj_ntlv *)pa; l = ntlv->head.length; if (l != sizeof(*ntlv)) return (NULL); if (ntlv->head.type != IPFW_TLV_TBL_NAME) continue; if (ntlv->idx != uidx) continue; if (check_table_name(ntlv->name) != 0) return (NULL); return (ntlv); } return (NULL); } /* * Finds table config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns 0 in success and fills in @tc with found config */ static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, struct table_config **tc) { char *name, bname[16]; struct named_object *no; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx); if (ntlv == NULL) return (EINVAL); name = ntlv->name; /* * Use set provided by @ti instead of @ntlv one. * This is needed due to different sets behavior * controlled by V_fw_tables_sets. */ set = ti->set; } else { snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } no = ipfw_objhash_lookup_name(ni, set, name); *tc = (struct table_config *)no; return (0); } /* * Finds table config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns pointer to table_config or NULL. */ static struct table_config * find_table(struct namedobj_instance *ni, struct tid_info *ti) { struct table_config *tc; if (find_table_err(ni, ti, &tc) != 0) return (NULL); return (tc); } /* * Allocate new table config structure using * specified @algo and @aname. * * Returns pointer to config or NULL. */ static struct table_config * alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *aname, uint8_t tflags) { char *name, bname[16]; struct table_config *tc; int error; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx); if (ntlv == NULL) return (NULL); name = ntlv->name; set = ntlv->set; } else { /* Compat part: convert number to string representation */ snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); tc->no.name = tc->tablename; tc->no.subtype = ta->type; tc->no.set = set; tc->tflags = tflags; tc->ta = ta; strlcpy(tc->tablename, name, sizeof(tc->tablename)); /* Set "shared" value type by default */ tc->vshared = 1; /* Preallocate data structures for new tables */ error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags); if (error != 0) { free(tc, M_IPFW); return (NULL); } return (tc); } /* * Destroys table state and config. */ static void free_table_config(struct namedobj_instance *ni, struct table_config *tc) { KASSERT(tc->linked == 0, ("free() on linked config")); /* UH lock MUST NOT be held */ /* * We're using ta without any locking/referencing. * TODO: fix this if we're going to use unloadable algos. */ tc->ta->destroy(tc->astate, &tc->ti_copy); free(tc, M_IPFW); } /* * Links @tc to @chain table named instance. * Sets appropriate type/states in @chain table info. */ static void link_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; ipfw_objhash_add(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); *ti = tc->ti_copy; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, ti); tc->linked = 1; tc->ta->refcnt++; } /* * Unlinks @tc from @chain table named instance. * Zeroes states in @chain and stores them in @tc. */ static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; /* Clear state. @ti copy is already saved inside @tc */ ipfw_objhash_del(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); memset(ti, 0, sizeof(struct table_info)); tc->linked = 0; tc->ta->refcnt--; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, NULL); } struct swap_table_args { int set; int new_set; int mv; }; /* * Change set for each matching table. * * Ensure we dispatch each table once by setting/checking ochange * fields. */ static void swap_table_set(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct table_config *tc; struct swap_table_args *sta; tc = (struct table_config *)no; sta = (struct swap_table_args *)arg; if (no->set != sta->set && (no->set != sta->new_set || sta->mv != 0)) return; if (tc->ochanged != 0) return; tc->ochanged = 1; ipfw_objhash_del(ni, no); if (no->set == sta->set) no->set = sta->new_set; else no->set = sta->set; ipfw_objhash_add(ni, no); } /* * Cleans up ochange field for all tables. */ static void clean_table_set_data(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct table_config *tc; struct swap_table_args *sta; tc = (struct table_config *)no; sta = (struct swap_table_args *)arg; tc->ochanged = 0; } /* * Swaps tables within two sets. */ void ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t set, uint32_t new_set, int mv) { struct swap_table_args sta; IPFW_UH_WLOCK_ASSERT(ch); sta.set = set; sta.new_set = new_set; sta.mv = mv; ipfw_objhash_foreach(CHAIN_TO_NI(ch), swap_table_set, &sta); ipfw_objhash_foreach(CHAIN_TO_NI(ch), clean_table_set_data, &sta); } /* * Move all tables which are reference by rules in @rr to set @new_set. * Makes sure that all relevant tables are referenced ONLLY by given rules. * * Retuns 0 on success, */ int ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt, uint32_t new_set) { struct ip_fw *rule; struct table_config *tc; struct named_object *no; struct namedobj_instance *ni; int bad, i, l, cmdlen; uint16_t kidx; ipfw_insn *cmd; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); /* Stage 1: count number of references by given rules */ for (i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("objhash lookup failed on index %d", kidx)); tc = (struct table_config *)no; tc->ocount++; } } /* Stage 2: verify "ownership" */ bad = 0; for (i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("objhash lookup failed on index %d", kidx)); tc = (struct table_config *)no; if (tc->no.refcnt != tc->ocount) { /* * Number of references differ: * Other rule(s) are holding reference to given * table, so it is not possible to change its set. * * Note that refcnt may account * references to some going-to-be-added rules. * Since we don't know their numbers (and event * if they will be added) it is perfectly OK * to return error here. */ bad = 1; break; } } if (bad != 0) break; } /* Stage 3: change set or cleanup */ for (i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("objhash lookup failed on index %d", kidx)); tc = (struct table_config *)no; tc->ocount = 0; if (bad != 0) continue; /* Actually change set. */ ipfw_objhash_del(ni, no); no->set = new_set; ipfw_objhash_add(ni, no); } } return (bad); } /* * Finds and bumps refcount for objects referenced by given @rule. * Auto-creates non-existing tables. * Fills in @oib array with userland/kernel indexes. * * Returns 0 on success. */ static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti) { int cmdlen, error, l, numnew; ipfw_insn *cmd; struct obj_idx *pidx; int found, unresolved; pidx = oib; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; error = 0; numnew = 0; found = 0; unresolved = 0; IPFW_UH_WLOCK(ch); /* Increase refcount on each existing referenced table. */ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); error = ref_opcode_object(ch, cmd, ti, pidx, &found, &unresolved); if (error != 0) break; if (found || unresolved) { pidx->off = rule->cmd_len - l; pidx++; } /* * Compability stuff for old clients: * prepare to manually create non-existing objects. */ if (unresolved) numnew++; } if (error != 0) { /* Unref everything we have already done */ unref_oib_objects(ch, rule->cmd, oib, pidx); IPFW_UH_WUNLOCK(ch); return (error); } IPFW_UH_WUNLOCK(ch); /* Perform auto-creation for non-existing objects */ if (numnew != 0) error = create_objects_compat(ch, rule->cmd, oib, pidx, ti); /* Calculate real number of dynamic objects */ ci->object_opcodes = (uint16_t)(pidx - oib); return (error); } /* * Checks is opcode is referencing table of appropriate type. * Adds reference count for found table if true. * Rewrites user-supplied opcode values with kernel ones. * * Returns 0 on success and appropriate error code otherwise. */ int ipfw_rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci) { int error; ipfw_insn *cmd; uint8_t type; struct obj_idx *p, *pidx_first, *pidx_last; struct tid_info ti; /* * Prepare an array for storing opcode indices. * Use stack allocation by default. */ if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) { /* Stack */ pidx_first = ci->obuf; } else pidx_first = malloc(ci->object_opcodes * sizeof(struct obj_idx), M_IPFW, M_WAITOK | M_ZERO); error = 0; type = 0; memset(&ti, 0, sizeof(ti)); /* * Use default set for looking up tables (old way) or * use set rule is assigned to (new way). */ ti.set = (V_fw_tables_sets != 0) ? ci->krule->set : 0; if (ci->ctlv != NULL) { ti.tlvs = (void *)(ci->ctlv + 1); ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv); } /* Reference all used tables and other objects */ error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti); if (error != 0) goto free; /* * Note that ref_rule_objects() might have updated ci->object_opcodes * to reflect actual number of object opcodes. */ /* Perform rule rewrite */ p = pidx_first; pidx_last = pidx_first + ci->object_opcodes; for (p = pidx_first; p < pidx_last; p++) { cmd = ci->krule->cmd + p->off; update_opcode_kidx(cmd, p->kidx); } free: if (pidx_first != ci->obuf) free(pidx_first, M_IPFW); return (error); } static struct ipfw_sopt_handler scodes[] = { { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table }, { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 }, { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 }, { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table }, { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table }, { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables }, { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 }, { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 }, { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 }, { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 }, { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 }, { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 }, { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry }, { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table }, { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo }, { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size }, }; static void destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, void *arg) { unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); if (ipfw_objhash_free_idx(ni, no->kidx) != 0) printf("Error unlinking kidx %d from table %s\n", no->kidx, no->name); free_table_config(ni, (struct table_config *)no); } /* * Shuts tables module down. */ void ipfw_destroy_tables(struct ip_fw_chain *ch, int last) { IPFW_DEL_SOPT_HANDLER(last, scodes); IPFW_DEL_OBJ_REWRITER(last, opcodes); /* Remove all tables from working set */ IPFW_UH_WLOCK(ch); IPFW_WLOCK(ch); ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); IPFW_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch); /* Free pointers itself */ free(ch->tablestate, M_IPFW); ipfw_table_value_destroy(ch, last); ipfw_table_algo_destroy(ch); ipfw_objhash_destroy(CHAIN_TO_NI(ch)); free(CHAIN_TO_TCFG(ch), M_IPFW); } /* * Starts tables module. */ int ipfw_init_tables(struct ip_fw_chain *ch, int first) { struct tables_config *tcfg; /* Allocate pointers */ ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); ch->tblcfg = tcfg; ipfw_table_value_init(ch, first); ipfw_table_algo_init(ch); IPFW_ADD_OBJ_REWRITER(first, opcodes); IPFW_ADD_SOPT_HANDLER(first, scodes); return (0); } Index: projects/clang380-import/sys/netpfil/ipfw/ip_fw_table_algo.c =================================================================== --- projects/clang380-import/sys/netpfil/ipfw/ip_fw_table_algo.c (revision 293686) +++ projects/clang380-import/sys/netpfil/ipfw/ip_fw_table_algo.c (revision 293687) @@ -1,4082 +1,4106 @@ /*- * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Lookup table algorithms. * */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include #include +#include #include /* struct ipfw_rule_ref */ #include +#include #include #include /* * IPFW table lookup algorithms. * * What is needed to add another table algo? * * Algo init: * * struct table_algo has to be filled with: * name: "type:algoname" format, e.g. "addr:radix". Currently * there are the following types: "addr", "iface", "number" and "flow". * type: one of IPFW_TABLE_* types * flags: one or more TA_FLAGS_* * ta_buf_size: size of structure used to store add/del item state. * Needs to be less than TA_BUF_SZ. * callbacks: see below for description. * * ipfw_add_table_algo / ipfw_del_table_algo has to be called * * Callbacks description: * * -init: request to initialize new table instance. * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, * struct table_info *ti, char *data, uint8_t tflags); * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. * * Allocate all structures needed for normal operations. * * Caller may want to parse @data for some algo-specific * options provided by userland. * * Caller may want to save configuration state pointer to @ta_state * * Caller needs to save desired runtime structure pointer(s) * inside @ti fields. Note that it is not correct to save * @ti pointer at this moment. Use -change_ti hook for that. * * Caller has to fill in ti->lookup to appropriate function * pointer. * * * * -destroy: request to destroy table instance. * typedef void (ta_destroy)(void *ta_state, struct table_info *ti); * MANDATORY, unlocked. (M_WAITOK). * * Frees all table entries and all tables structures allocated by -init. * * * * -prepare_add: request to allocate state for adding new entry. * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, * void *ta_buf); * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. * * Allocates state and fills it in with all necessary data (EXCEPT value) * from @tei to minimize operations needed to be done under WLOCK. * "value" field has to be copied to new entry in @add callback. * Buffer ta_buf of size ta->ta_buf_sz may be used to store * allocated state. * * * * -prepare_del: request to set state for deleting existing entry. * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, * void *ta_buf); * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success. * * Buffer ta_buf of size ta->ta_buf_sz may be used to store * allocated state. Caller should use on-stack ta_buf allocation * instead of doing malloc(). * * * * -add: request to insert new entry into runtime/config structures. * typedef int (ta_add)(void *ta_state, struct table_info *ti, * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. * * Insert new entry using previously-allocated state in @ta_buf. * * @tei may have the following flags: * TEI_FLAGS_UPDATE: request to add or update entry. * TEI_FLAGS_DONTADD: request to update (but not add) entry. * * Caller is required to do the following: * copy real entry value from @tei * entry added: return 0, set 1 to @pnum * entry updated: return 0, store 0 to @pnum, store old value in @tei, * add TEI_FLAGS_UPDATED flag to @tei. * entry exists: return EEXIST * entry not found: return ENOENT * other error: return non-zero error code. * * * * -del: request to delete existing entry from runtime/config structures. * typedef int (ta_del)(void *ta_state, struct table_info *ti, * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. * * Delete entry using previously set up in @ta_buf. * * Caller is required to do the following: * entry deleted: return 0, set 1 to @pnum, store old value in @tei. * entry not found: return ENOENT * other error: return non-zero error code. * * * * -flush_entry: flush entry state created by -prepare_add / -del / others * typedef void (ta_flush_entry)(struct ip_fw_chain *ch, * struct tentry_info *tei, void *ta_buf); * MANDATORY, may be locked. (M_NOWAIT). * * Delete state allocated by: * -prepare_add (-add returned EEXIST|UPDATED) * -prepare_del (if any) * -del * * Caller is required to handle empty @ta_buf correctly. * * * -find_tentry: finds entry specified by key @tei * typedef int ta_find_tentry(void *ta_state, struct table_info *ti, * ipfw_obj_tentry *tent); * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success. * * Finds entry specified by given key. * * Caller is requred to do the following: * entry found: returns 0, export entry to @tent * entry not found: returns ENOENT * * * -need_modify: checks if @ti has enough space to hold another @count items. * typedef int (ta_need_modify)(void *ta_state, struct table_info *ti, * uint32_t count, uint64_t *pflags); * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has. * * Checks if given table has enough space to add @count items without * resize. Caller may use @pflags to store desired modification data. * * * * -prepare_mod: allocate structures for table modification. * typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success. * * Allocate all needed state for table modification. Caller * should use `struct mod_item` to store new state in @ta_buf. * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf. * * * * -fill_mod: copy some data to new state/ * typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, * void *ta_buf, uint64_t *pflags); * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success. * * Copy as much data as we can to minimize changes under WLOCK. * For example, array can be merged inside this callback. * * * * -modify: perform final modification. * typedef void (ta_modify)(void *ta_state, struct table_info *ti, * void *ta_buf, uint64_t pflags); * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT). * * Performs all changes necessary to switch to new structures. * * Caller should save old pointers to @ta_buf storage. * * * * -flush_mod: flush table modification state. * typedef void (ta_flush_mod)(void *ta_buf); * OPTIONAL(need_modify), unlocked. (M_WAITOK). * * Performs flush for the following: * - prepare_mod (modification was not necessary) * - modify (for the old state) * * * * -change_gi: monitor table info pointer changes * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti); * OPTIONAL, locked (UH). (M_NOWAIT). * * Called on @ti pointer changed. Called immediately after -init * to set initial state. * * * * -foreach: calls @f for each table entry * typedef void ta_foreach(void *ta_state, struct table_info *ti, * ta_foreach_f *f, void *arg); * MANDATORY, locked(UH). (M_NOWAIT). * * Runs callback with specified argument for each table entry, * Typically used for dumping table entries. * * * * -dump_tentry: dump table entry in current @tentry format. * typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, * ipfw_obj_tentry *tent); * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success. * * Dumps entry @e to @tent. * * * -print_config: prints custom algoritm options into buffer. * typedef void (ta_print_config)(void *ta_state, struct table_info *ti, * char *buf, size_t bufsize); * OPTIONAL. locked(UH). (M_NOWAIT). * * Prints custom algorithm options in the format suitable to pass * back to -init callback. * * * * -dump_tinfo: dumps algo-specific info. * typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, * ipfw_ta_tinfo *tinfo); * OPTIONAL. locked(UH). (M_NOWAIT). * * Dumps options like items size/hash size, etc. */ MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); /* * Utility structures/functions common to more than one algo */ struct mod_item { void *main_ptr; size_t size; void *main_ptr6; size_t size6; }; static int badd(const void *key, void *item, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)); static int bdel(const void *key, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)); /* * ADDR implementation using radix * */ /* * The radix code expects addr and mask to be array of bytes, * with the first byte being the length of the array. rn_inithead * is called with the offset in bits of the lookup key within the * array. If we use a sockaddr_in as the underlying type, * sin_len is conveniently located at offset 0, sin_addr is at * offset 4 and normally aligned. * But for portability, let's avoid assumption and make the code explicit */ #define KEY_LEN(v) *((uint8_t *)&(v)) /* * Do not require radix to compare more than actual IPv4/IPv6 address */ #define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) #define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr)) #define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) #define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr)) struct radix_addr_entry { struct radix_node rn[2]; struct sockaddr_in addr; uint32_t value; uint8_t masklen; }; struct sa_in6 { uint8_t sin6_len; uint8_t sin6_family; uint8_t pad[2]; struct in6_addr sin6_addr; }; struct radix_addr_xentry { struct radix_node rn[2]; struct sa_in6 addr6; uint32_t value; uint8_t masklen; }; struct radix_cfg { struct radix_node_head *head4; struct radix_node_head *head6; size_t count4; size_t count6; }; struct ta_buf_radix { void *ent_ptr; struct sockaddr *addr_ptr; struct sockaddr *mask_ptr; union { struct { struct sockaddr_in sa; struct sockaddr_in ma; } a4; struct { struct sa_in6 sa; struct sa_in6 ma; } a6; } addr; }; static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static int flush_radix_entry(struct radix_node *rn, void *arg); static void ta_destroy_radix(void *ta_state, struct table_info *ti); static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int ta_find_radix_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, struct sockaddr *ma, int *set_mask); static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct radix_node_head *rnh; if (keylen == sizeof(in_addr_t)) { struct radix_addr_entry *ent; struct sockaddr_in sa; KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = *((in_addr_t *)key); rnh = (struct radix_node_head *)ti->state; ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, rnh)); if (ent != NULL) { *val = ent->value; return (1); } } else { struct radix_addr_xentry *xent; struct sa_in6 sa6; KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr)); rnh = (struct radix_node_head *)ti->xstate; xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); if (xent != NULL) { *val = xent->value; return (1); } } return (0); } /* * New table */ static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct radix_cfg *cfg; if (!rn_inithead(&ti->state, OFF_LEN_INET)) return (ENOMEM); if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) { rn_detachhead(&ti->state); return (ENOMEM); } cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO); *ta_state = cfg; ti->lookup = ta_lookup_radix; return (0); } static int flush_radix_entry(struct radix_node *rn, void *arg) { struct radix_node_head * const rnh = arg; struct radix_addr_entry *ent; ent = (struct radix_addr_entry *) rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); if (ent != NULL) free(ent, M_IPFW_TBL); return (0); } static void ta_destroy_radix(void *ta_state, struct table_info *ti) { struct radix_cfg *cfg; struct radix_node_head *rnh; cfg = (struct radix_cfg *)ta_state; rnh = (struct radix_node_head *)(ti->state); rnh->rnh_walktree(rnh, flush_radix_entry, rnh); rn_detachhead(&ti->state); rnh = (struct radix_node_head *)(ti->xstate); rnh->rnh_walktree(rnh, flush_radix_entry, rnh); rn_detachhead(&ti->xstate); free(cfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct radix_cfg *cfg; cfg = (struct radix_cfg *)ta_state; tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; tinfo->taclass4 = IPFW_TACLASS_RADIX; tinfo->count4 = cfg->count4; tinfo->itemsize4 = sizeof(struct radix_addr_entry); tinfo->taclass6 = IPFW_TACLASS_RADIX; tinfo->count6 = cfg->count6; tinfo->itemsize6 = sizeof(struct radix_addr_xentry); } static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct radix_addr_entry *n; #ifdef INET6 struct radix_addr_xentry *xn; #endif n = (struct radix_addr_entry *)e; /* Guess IPv4/IPv6 radix by sockaddr family */ if (n->addr.sin_family == AF_INET) { tent->k.addr.s_addr = n->addr.sin_addr.s_addr; tent->masklen = n->masklen; tent->subtype = AF_INET; tent->v.kidx = n->value; #ifdef INET6 } else { xn = (struct radix_addr_xentry *)e; memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr)); tent->masklen = xn->masklen; tent->subtype = AF_INET6; tent->v.kidx = xn->value; #endif } return (0); } static int ta_find_radix_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct radix_node_head *rnh; void *e; e = NULL; if (tent->subtype == AF_INET) { struct sockaddr_in sa; KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = tent->k.addr.s_addr; rnh = (struct radix_node_head *)ti->state; e = rnh->rnh_matchaddr(&sa, rnh); } else { struct sa_in6 sa6; KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); rnh = (struct radix_node_head *)ti->xstate; e = rnh->rnh_matchaddr(&sa6, rnh); } if (e != NULL) { ta_dump_radix_tentry(ta_state, ti, e, tent); return (0); } return (ENOENT); } static void ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct radix_node_head *rnh; rnh = (struct radix_node_head *)(ti->state); rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); rnh = (struct radix_node_head *)(ti->xstate); rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); } #ifdef INET6 static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask); static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask) { uint32_t *cp; for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) *cp++ = 0xFFFFFFFF; *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); } #endif static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, struct sockaddr *ma, int *set_mask) { int mlen; #ifdef INET struct sockaddr_in *addr, *mask; #endif #ifdef INET6 struct sa_in6 *addr6, *mask6; #endif in_addr_t a4; mlen = tei->masklen; if (tei->subtype == AF_INET) { #ifdef INET addr = (struct sockaddr_in *)sa; mask = (struct sockaddr_in *)ma; /* Set 'total' structure length */ KEY_LEN(*addr) = KEY_LEN_INET; KEY_LEN(*mask) = KEY_LEN_INET; addr->sin_family = AF_INET; mask->sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); a4 = *((in_addr_t *)tei->paddr); addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr; if (mlen != 32) *set_mask = 1; else *set_mask = 0; #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ addr6 = (struct sa_in6 *)sa; mask6 = (struct sa_in6 *)ma; /* Set 'total' structure length */ KEY_LEN(*addr6) = KEY_LEN_INET6; KEY_LEN(*mask6) = KEY_LEN_INET6; addr6->sin6_family = AF_INET6; ipv6_writemask(&mask6->sin6_addr, mlen); memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr)); APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr); if (mlen != 128) *set_mask = 1; else *set_mask = 0; #endif } } static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_radix *tb; struct radix_addr_entry *ent; #ifdef INET6 struct radix_addr_xentry *xent; #endif struct sockaddr *addr, *mask; int mlen, set_mask; tb = (struct ta_buf_radix *)ta_buf; mlen = tei->masklen; set_mask = 0; if (tei->subtype == AF_INET) { #ifdef INET if (mlen > 32) return (EINVAL); ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); ent->masklen = mlen; addr = (struct sockaddr *)&ent->addr; mask = (struct sockaddr *)&tb->addr.a4.ma; tb->ent_ptr = ent; #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ if (mlen > 128) return (EINVAL); xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); xent->masklen = mlen; addr = (struct sockaddr *)&xent->addr6; mask = (struct sockaddr *)&tb->addr.a6.ma; tb->ent_ptr = xent; #endif } else { /* Unknown CIDR type */ return (EINVAL); } tei_to_sockaddr_ent(tei, addr, mask, &set_mask); /* Set pointers */ tb->addr_ptr = addr; if (set_mask != 0) tb->mask_ptr = mask; return (0); } static int ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct radix_cfg *cfg; struct radix_node_head *rnh; struct radix_node *rn; struct ta_buf_radix *tb; uint32_t *old_value, value; cfg = (struct radix_cfg *)ta_state; tb = (struct ta_buf_radix *)ta_buf; /* Save current entry value from @tei */ if (tei->subtype == AF_INET) { rnh = ti->state; ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value; } else { rnh = ti->xstate; ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value; } /* Search for an entry first */ rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, rnh); if (rn != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ if (tei->subtype == AF_INET) old_value = &((struct radix_addr_entry *)rn)->value; else old_value = &((struct radix_addr_xentry *)rn)->value; value = *old_value; *old_value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, rnh, tb->ent_ptr); if (rn == NULL) { /* Unknown error */ return (EINVAL); } if (tei->subtype == AF_INET) cfg->count4++; else cfg->count6++; tb->ent_ptr = NULL; *pnum = 1; return (0); } static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_radix *tb; struct sockaddr *addr, *mask; int mlen, set_mask; tb = (struct ta_buf_radix *)ta_buf; mlen = tei->masklen; set_mask = 0; if (tei->subtype == AF_INET) { if (mlen > 32) return (EINVAL); addr = (struct sockaddr *)&tb->addr.a4.sa; mask = (struct sockaddr *)&tb->addr.a4.ma; #ifdef INET6 } else if (tei->subtype == AF_INET6) { if (mlen > 128) return (EINVAL); addr = (struct sockaddr *)&tb->addr.a6.sa; mask = (struct sockaddr *)&tb->addr.a6.ma; #endif } else return (EINVAL); tei_to_sockaddr_ent(tei, addr, mask, &set_mask); tb->addr_ptr = addr; if (set_mask != 0) tb->mask_ptr = mask; return (0); } static int ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct radix_cfg *cfg; struct radix_node_head *rnh; struct radix_node *rn; struct ta_buf_radix *tb; cfg = (struct radix_cfg *)ta_state; tb = (struct ta_buf_radix *)ta_buf; if (tei->subtype == AF_INET) rnh = ti->state; else rnh = ti->xstate; rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, rnh); if (rn == NULL) return (ENOENT); /* Save entry value to @tei */ if (tei->subtype == AF_INET) tei->value = ((struct radix_addr_entry *)rn)->value; else tei->value = ((struct radix_addr_xentry *)rn)->value; tb->ent_ptr = rn; if (tei->subtype == AF_INET) cfg->count4--; else cfg->count6--; *pnum = 1; return (0); } static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_radix *tb; tb = (struct ta_buf_radix *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } static int ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { /* * radix does not require additional memory allocations * other than nodes itself. Adding new masks to the tree do * but we don't have any API to call (and we don't known which * sizes do we need). */ return (0); } struct table_algo addr_radix = { .name = "addr:radix", .type = IPFW_TABLE_ADDR, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_radix), .init = ta_init_radix, .destroy = ta_destroy_radix, .prepare_add = ta_prepare_add_radix, .prepare_del = ta_prepare_del_radix, .add = ta_add_radix, .del = ta_del_radix, .flush_entry = ta_flush_radix_entry, .foreach = ta_foreach_radix, .dump_tentry = ta_dump_radix_tentry, .find_tentry = ta_find_radix_tentry, .dump_tinfo = ta_dump_radix_tinfo, .need_modify = ta_need_modify_radix, }; /* * addr:hash cmds * * * ti->data: * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] * [ 8][ 8[ 8][ 8] * * inv.mask4: 32 - mask * inv.mask6: * 1) _slow lookup: mask * 2) _aligned: (128 - mask) / 8 * 3) _64: 8 * * * pflags: * [v4=1/v6=0][hsize] * [ 32][ 32] */ struct chashentry; SLIST_HEAD(chashbhead, chashentry); struct chash_cfg { struct chashbhead *head4; struct chashbhead *head6; size_t size4; size_t size6; size_t items4; size_t items6; uint8_t mask4; uint8_t mask6; }; struct chashentry { SLIST_ENTRY(chashentry) next; uint32_t value; uint32_t type; union { uint32_t a4; /* Host format */ struct in6_addr a6; /* Network format */ } a; }; struct ta_buf_chash { void *ent_ptr; struct chashentry ent; }; #ifdef INET static __inline uint32_t hash_ip(uint32_t addr, int hsize); #endif #ifdef INET6 static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize); static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize); static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize); static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize); #endif static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int chash_parse_opts(struct chash_cfg *cfg, char *data); static void ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize); static int ta_log2(uint32_t v); static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_chash(void *ta_state, struct table_info *ti); static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static uint32_t hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size); static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent); static int ta_find_chash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_chash(void *ta_buf); #ifdef INET static __inline uint32_t hash_ip(uint32_t addr, int hsize) { return (addr % (hsize - 1)); } #endif #ifdef INET6 static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize) { uint32_t i; i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^ addr6->s6_addr32[2] ^ addr6->s6_addr32[3]; return (i % (hsize - 1)); } static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize) { uint32_t i; i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1]; return (i % (hsize - 1)); } static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize) { struct in6_addr mask6; ipv6_writemask(&mask6, mask); memcpy(addr6, key, sizeof(struct in6_addr)); APPLY_MASK(addr6, &mask6); return (hash_ip6(addr6, hsize)); } static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize) { uint64_t *paddr; paddr = (uint64_t *)addr6; *paddr = 0; *(paddr + 1) = 0; memcpy(addr6, key, mask); return (hash_ip6(addr6, hsize)); } #endif static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { #ifdef INET head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } #endif } else { #ifdef INET6 /* IPv6: worst scenario: non-round mask */ struct in6_addr addr6; head = (struct chashbhead *)ti->xstate; imask = (ti->data & 0xFF0000) >> 16; hsize = 1 << (ti->data & 0xFF); hash = hash_ip6_slow(&addr6, key, imask, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (memcmp(&ent->a.a6, &addr6, 16) == 0) { *val = ent->value; return (1); } } #endif } return (0); } static int ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { #ifdef INET head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } #endif } else { #ifdef INET6 /* IPv6: aligned to 8bit mask */ struct in6_addr addr6; uint64_t *paddr, *ptmp; head = (struct chashbhead *)ti->xstate; imask = (ti->data & 0xFF0000) >> 16; hsize = 1 << (ti->data & 0xFF); hash = hash_ip6_al(&addr6, key, imask, hsize); paddr = (uint64_t *)&addr6; SLIST_FOREACH(ent, &head[hash], next) { ptmp = (uint64_t *)&ent->a.a6; if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) { *val = ent->value; return (1); } } #endif } return (0); } static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { #ifdef INET head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } #endif } else { #ifdef INET6 /* IPv6: /64 */ uint64_t a6, *paddr; head = (struct chashbhead *)ti->xstate; paddr = (uint64_t *)key; hsize = 1 << (ti->data & 0xFF); a6 = *paddr; hash = hash_ip64((struct in6_addr *)key, hsize); SLIST_FOREACH(ent, &head[hash], next) { paddr = (uint64_t *)&ent->a.a6; if (a6 == *paddr) { *val = ent->value; return (1); } } #endif } return (0); } static int chash_parse_opts(struct chash_cfg *cfg, char *data) { char *pdel, *pend, *s; int mask4, mask6; mask4 = cfg->mask4; mask6 = cfg->mask6; if (data == NULL) return (0); if ((pdel = strchr(data, ' ')) == NULL) return (0); while (*pdel == ' ') pdel++; if (strncmp(pdel, "masks=", 6) != 0) return (EINVAL); if ((s = strchr(pdel, ' ')) != NULL) *s++ = '\0'; pdel += 6; /* Need /XX[,/YY] */ if (*pdel++ != '/') return (EINVAL); mask4 = strtol(pdel, &pend, 10); if (*pend == ',') { /* ,/YY */ pdel = pend + 1; if (*pdel++ != '/') return (EINVAL); mask6 = strtol(pdel, &pend, 10); if (*pend != '\0') return (EINVAL); } else if (*pend != '\0') return (EINVAL); if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128) return (EINVAL); cfg->mask4 = mask4; cfg->mask6 = mask6; return (0); } static void ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize) { struct chash_cfg *cfg; cfg = (struct chash_cfg *)ta_state; if (cfg->mask4 != 32 || cfg->mask6 != 128) snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash", cfg->mask4, cfg->mask6); else snprintf(buf, bufsize, "%s", "addr:hash"); } static int ta_log2(uint32_t v) { uint32_t r; r = 0; while (v >>= 1) r++; return (r); } /* * New table. * We assume 'data' to be either NULL or the following format: * 'addr:hash [masks=/32[,/128]]' */ static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int error, i; uint32_t hsize; struct chash_cfg *cfg; cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->mask4 = 32; cfg->mask6 = 128; if ((error = chash_parse_opts(cfg, data)) != 0) { free(cfg, M_IPFW); return (error); } cfg->size4 = 128; cfg->size6 = 128; cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW, M_WAITOK | M_ZERO); cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < cfg->size4; i++) SLIST_INIT(&cfg->head4[i]); for (i = 0; i < cfg->size6; i++) SLIST_INIT(&cfg->head6[i]); *ta_state = cfg; ti->state = cfg->head4; ti->xstate = cfg->head6; /* Store data depending on v6 mask length */ hsize = ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6); if (cfg->mask6 == 64) { ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16| hsize; ti->lookup = ta_lookup_chash_64; } else if ((cfg->mask6 % 8) == 0) { ti->data = (32 - cfg->mask4) << 24 | cfg->mask6 << 13 | hsize; ti->lookup = ta_lookup_chash_aligned; } else { /* don't do that! */ ti->data = (32 - cfg->mask4) << 24 | cfg->mask6 << 16 | hsize; ti->lookup = ta_lookup_chash_slow; } return (0); } static void ta_destroy_chash(void *ta_state, struct table_info *ti) { struct chash_cfg *cfg; struct chashentry *ent, *ent_next; int i; cfg = (struct chash_cfg *)ta_state; for (i = 0; i < cfg->size4; i++) SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) free(ent, M_IPFW_TBL); for (i = 0; i < cfg->size6; i++) SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) free(ent, M_IPFW_TBL); free(cfg->head4, M_IPFW); free(cfg->head6, M_IPFW); free(cfg, M_IPFW); } static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct chash_cfg *cfg; cfg = (struct chash_cfg *)ta_state; tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; tinfo->taclass4 = IPFW_TACLASS_HASH; tinfo->size4 = cfg->size4; tinfo->count4 = cfg->items4; tinfo->itemsize4 = sizeof(struct chashentry); tinfo->taclass6 = IPFW_TACLASS_HASH; tinfo->size6 = cfg->size6; tinfo->count6 = cfg->items6; tinfo->itemsize6 = sizeof(struct chashentry); } static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct chash_cfg *cfg; struct chashentry *ent; cfg = (struct chash_cfg *)ta_state; ent = (struct chashentry *)e; if (ent->type == AF_INET) { tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4)); tent->masklen = cfg->mask4; tent->subtype = AF_INET; tent->v.kidx = ent->value; #ifdef INET6 } else { memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr)); tent->masklen = cfg->mask6; tent->subtype = AF_INET6; tent->v.kidx = ent->value; #endif } return (0); } static uint32_t hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size) { uint32_t hash; hash = 0; if (af == AF_INET) { #ifdef INET hash = hash_ip(ent->a.a4, size); #endif } else { #ifdef INET6 if (mlen == 64) hash = hash_ip64(&ent->a.a6, size); else hash = hash_ip6(&ent->a.a6, size); #endif } return (hash); } static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent) { int mlen; #ifdef INET6 struct in6_addr mask6; #endif mlen = tei->masklen; if (tei->subtype == AF_INET) { #ifdef INET if (mlen > 32) return (EINVAL); ent->type = AF_INET; /* Calculate masked address */ ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen); #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ if (mlen > 128) return (EINVAL); ent->type = AF_INET6; ipv6_writemask(&mask6, mlen); memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr)); APPLY_MASK(&ent->a.a6, &mask6); #endif } else { /* Unknown CIDR type */ return (EINVAL); } return (0); } static int ta_find_chash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct chash_cfg *cfg; struct chashbhead *head; struct chashentry ent, *tmp; struct tentry_info tei; int error; uint32_t hash; cfg = (struct chash_cfg *)ta_state; memset(&ent, 0, sizeof(ent)); memset(&tei, 0, sizeof(tei)); if (tent->subtype == AF_INET) { tei.paddr = &tent->k.addr; tei.masklen = cfg->mask4; tei.subtype = AF_INET; if ((error = tei_to_chash_ent(&tei, &ent)) != 0) return (error); head = cfg->head4; hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (tmp->a.a4 != ent.a.a4) continue; ta_dump_chash_tentry(ta_state, ti, tmp, tent); return (0); } } else { tei.paddr = &tent->k.addr6; tei.masklen = cfg->mask6; tei.subtype = AF_INET6; if ((error = tei_to_chash_ent(&tei, &ent)) != 0) return (error); head = cfg->head6; hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0) continue; ta_dump_chash_tentry(ta_state, ti, tmp, tent); return (0); } } return (ENOENT); } static void ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct chash_cfg *cfg; struct chashentry *ent, *ent_next; int i; cfg = (struct chash_cfg *)ta_state; for (i = 0; i < cfg->size4; i++) SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) f(ent, arg); for (i = 0; i < cfg->size6; i++) SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) f(ent, arg); } static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; struct chashentry *ent; int error; tb = (struct ta_buf_chash *)ta_buf; ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); error = tei_to_chash_ent(tei, ent); if (error != 0) { free(ent, M_IPFW_TBL); return (error); } tb->ent_ptr = ent; return (0); } static int ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct chash_cfg *cfg; struct chashbhead *head; struct chashentry *ent, *tmp; struct ta_buf_chash *tb; int exists; uint32_t hash, value; cfg = (struct chash_cfg *)ta_state; tb = (struct ta_buf_chash *)ta_buf; ent = (struct chashentry *)tb->ent_ptr; hash = 0; exists = 0; /* Read current value from @tei */ ent->value = tei->value; /* Read cuurrent value */ if (tei->subtype == AF_INET) { if (tei->masklen != cfg->mask4) return (EINVAL); head = cfg->head4; hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (tmp->a.a4 == ent->a.a4) { exists = 1; break; } } } else { if (tei->masklen != cfg->mask6) return (EINVAL); head = cfg->head6; hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) { exists = 1; break; } } } if (exists == 1) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ value = tmp->value; tmp->value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; } else { if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); SLIST_INSERT_HEAD(&head[hash], ent, next); tb->ent_ptr = NULL; *pnum = 1; /* Update counters */ if (tei->subtype == AF_INET) cfg->items4++; else cfg->items6++; } return (0); } static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; tb = (struct ta_buf_chash *)ta_buf; return (tei_to_chash_ent(tei, &tb->ent)); } static int ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct chash_cfg *cfg; struct chashbhead *head; struct chashentry *tmp, *tmp_next, *ent; struct ta_buf_chash *tb; uint32_t hash; cfg = (struct chash_cfg *)ta_state; tb = (struct ta_buf_chash *)ta_buf; ent = &tb->ent; if (tei->subtype == AF_INET) { if (tei->masklen != cfg->mask4) return (EINVAL); head = cfg->head4; hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { if (tmp->a.a4 != ent->a.a4) continue; SLIST_REMOVE(&head[hash], tmp, chashentry, next); cfg->items4--; tb->ent_ptr = tmp; tei->value = tmp->value; *pnum = 1; return (0); } } else { if (tei->masklen != cfg->mask6) return (EINVAL); head = cfg->head6; hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0) continue; SLIST_REMOVE(&head[hash], tmp, chashentry, next); cfg->items6--; tb->ent_ptr = tmp; tei->value = tmp->value; *pnum = 1; return (0); } } return (ENOENT); } static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; tb = (struct ta_buf_chash *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } /* * Hash growing callbacks. */ static int ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct chash_cfg *cfg; uint64_t data; /* * Since we don't know exact number of IPv4/IPv6 records in @count, * ignore non-zero @count value at all. Check current hash sizes * and return appropriate data. */ cfg = (struct chash_cfg *)ta_state; data = 0; if (cfg->items4 > cfg->size4 && cfg->size4 < 65536) data |= (cfg->size4 * 2) << 16; if (cfg->items6 > cfg->size6 && cfg->size6 < 65536) data |= cfg->size6 * 2; if (data != 0) { *pflags = data; return (1); } return (0); } /* * Allocate new, larger chash. */ static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct chashbhead *head; int i; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = (*pflags >> 16) & 0xFFFF; mi->size6 = *pflags & 0xFFFF; if (mi->size > 0) { head = malloc(sizeof(struct chashbhead) * mi->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < mi->size; i++) SLIST_INIT(&head[i]); mi->main_ptr = head; } if (mi->size6 > 0) { head = malloc(sizeof(struct chashbhead) * mi->size6, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < mi->size6; i++) SLIST_INIT(&head[i]); mi->main_ptr6 = head; } return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { /* In is not possible to do rehash if we're not holidng WLOCK. */ return (0); } /* * Switch old & new arrays. */ static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct chash_cfg *cfg; struct chashbhead *old_head, *new_head; struct chashentry *ent, *ent_next; int af, i, mlen; uint32_t nhash; size_t old_size, new_size; mi = (struct mod_item *)ta_buf; cfg = (struct chash_cfg *)ta_state; /* Check which hash we need to grow and do we still need that */ if (mi->size > 0 && cfg->size4 < mi->size) { new_head = (struct chashbhead *)mi->main_ptr; new_size = mi->size; old_size = cfg->size4; old_head = ti->state; mlen = cfg->mask4; af = AF_INET; for (i = 0; i < old_size; i++) { SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { nhash = hash_ent(ent, af, mlen, new_size); SLIST_INSERT_HEAD(&new_head[nhash], ent, next); } } ti->state = new_head; cfg->head4 = new_head; cfg->size4 = mi->size; mi->main_ptr = old_head; } if (mi->size6 > 0 && cfg->size6 < mi->size6) { new_head = (struct chashbhead *)mi->main_ptr6; new_size = mi->size6; old_size = cfg->size6; old_head = ti->xstate; mlen = cfg->mask6; af = AF_INET6; for (i = 0; i < old_size; i++) { SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { nhash = hash_ent(ent, af, mlen, new_size); SLIST_INSERT_HEAD(&new_head[nhash], ent, next); } } ti->xstate = new_head; cfg->head6 = new_head; cfg->size6 = mi->size6; mi->main_ptr6 = old_head; } /* Update lower 32 bits with new values */ ti->data &= 0xFFFFFFFF00000000; ti->data |= ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6); } /* * Free unneded array. */ static void ta_flush_mod_chash(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); if (mi->main_ptr6 != NULL) free(mi->main_ptr6, M_IPFW); } struct table_algo addr_hash = { .name = "addr:hash", .type = IPFW_TABLE_ADDR, .ta_buf_size = sizeof(struct ta_buf_chash), .init = ta_init_chash, .destroy = ta_destroy_chash, .prepare_add = ta_prepare_add_chash, .prepare_del = ta_prepare_del_chash, .add = ta_add_chash, .del = ta_del_chash, .flush_entry = ta_flush_chash_entry, .foreach = ta_foreach_chash, .dump_tentry = ta_dump_chash_tentry, .find_tentry = ta_find_chash_tentry, .print_config = ta_print_chash_config, .dump_tinfo = ta_dump_chash_tinfo, .need_modify = ta_need_modify_chash, .prepare_mod = ta_prepare_mod_chash, .fill_mod = ta_fill_mod_chash, .modify = ta_modify_chash, .flush_mod = ta_flush_mod_chash, }; /* * Iface table cmds. * * Implementation: * * Runtime part: * - sorted array of "struct ifidx" pointed by ti->state. * Array is allocated with rounding up to IFIDX_CHUNK. Only existing * interfaces are stored in array, however its allocated size is * sufficient to hold all table records if needed. * - current array size is stored in ti->data * * Table data: * - "struct iftable_cfg" is allocated to store table state (ta_state). * - All table records are stored inside namedobj instance. * */ struct ifidx { uint16_t kidx; uint16_t spare; uint32_t value; }; #define DEFAULT_IFIDX_SIZE 64 struct iftable_cfg; struct ifentry { struct named_object no; struct ipfw_ifc ic; struct iftable_cfg *icfg; uint32_t value; int linked; }; struct iftable_cfg { struct namedobj_instance *ii; struct ip_fw_chain *ch; struct table_info *ti; void *main_ptr; size_t size; /* Number of items allocated in array */ size_t count; /* Number of all items */ size_t used; /* Number of items _active_ now */ }; struct ta_buf_ifidx { struct ifentry *ife; uint32_t value; }; int compare_ifidx(const void *k, const void *v); static struct ifidx * ifidx_find(struct table_info *ti, void *key); static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti); static void destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no, void *arg); static void ta_destroy_ifidx(void *ta_state, struct table_info *ti); static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_ifidx(void *ta_buf); static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, void *arg); static void ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); int compare_ifidx(const void *k, const void *v) { const struct ifidx *ifidx; uint16_t key; key = *((const uint16_t *)k); ifidx = (const struct ifidx *)v; if (key < ifidx->kidx) return (-1); else if (key > ifidx->kidx) return (1); return (0); } /* * Adds item @item with key @key into ascending-sorted array @base. * Assumes @base has enough additional storage. * * Returns 1 on success, 0 on duplicate key. */ static int badd(const void *key, void *item, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)) { int min, max, mid, shift, res; caddr_t paddr; if (nmemb == 0) { memcpy(base, item, size); return (1); } /* Binary search */ min = 0; max = nmemb - 1; mid = 0; while (min <= max) { mid = (min + max) / 2; res = compar(key, (const void *)((caddr_t)base + mid * size)); if (res == 0) return (0); if (res > 0) min = mid + 1; else max = mid - 1; } /* Item not found. */ res = compar(key, (const void *)((caddr_t)base + mid * size)); if (res > 0) shift = mid + 1; else shift = mid; paddr = (caddr_t)base + shift * size; if (nmemb > shift) memmove(paddr + size, paddr, (nmemb - shift) * size); memcpy(paddr, item, size); return (1); } /* * Deletes item with key @key from ascending-sorted array @base. * * Returns 1 on success, 0 for non-existent key. */ static int bdel(const void *key, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)) { caddr_t item; size_t sz; item = (caddr_t)bsearch(key, base, nmemb, size, compar); if (item == NULL) return (0); sz = (caddr_t)base + nmemb * size - item; if (sz > 0) memmove(item, item + size, sz); return (1); } static struct ifidx * ifidx_find(struct table_info *ti, void *key) { struct ifidx *ifi; ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx), compare_ifidx); return (ifi); } static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct ifidx *ifi; ifi = ifidx_find(ti, key); if (ifi != NULL) { *val = ifi->value; return (1); } return (0); } static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct iftable_cfg *icfg; icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO); icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE); icfg->size = DEFAULT_IFIDX_SIZE; icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW, M_WAITOK | M_ZERO); icfg->ch = ch; *ta_state = icfg; ti->state = icfg->main_ptr; ti->lookup = ta_lookup_ifidx; return (0); } /* * Handle tableinfo @ti pointer change (on table array resize). */ static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti) { struct iftable_cfg *icfg; icfg = (struct iftable_cfg *)ta_state; icfg->ti = ti; } static void destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no, void *arg) { struct ifentry *ife; struct ip_fw_chain *ch; ch = (struct ip_fw_chain *)arg; ife = (struct ifentry *)no; ipfw_iface_del_notify(ch, &ife->ic); ipfw_iface_unref(ch, &ife->ic); free(ife, M_IPFW_TBL); } /* * Destroys table @ti */ static void ta_destroy_ifidx(void *ta_state, struct table_info *ti) { struct iftable_cfg *icfg; struct ip_fw_chain *ch; icfg = (struct iftable_cfg *)ta_state; ch = icfg->ch; if (icfg->main_ptr != NULL) free(icfg->main_ptr, M_IPFW); IPFW_UH_WLOCK(ch); ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch); IPFW_UH_WUNLOCK(ch); ipfw_objhash_destroy(icfg->ii); free(icfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct iftable_cfg *cfg; cfg = (struct iftable_cfg *)ta_state; tinfo->taclass4 = IPFW_TACLASS_ARRAY; tinfo->size4 = cfg->size; tinfo->count4 = cfg->used; tinfo->itemsize4 = sizeof(struct ifidx); } /* * Prepare state to add to the table: * allocate ifentry and reference needed interface. */ static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; char *ifname; struct ifentry *ife; tb = (struct ta_buf_ifidx *)ta_buf; /* Check if string is terminated */ ifname = (char *)tei->paddr; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO); ife->ic.cb = if_notifier; ife->ic.cbdata = ife; if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) { free(ife, M_IPFW_TBL); return (EINVAL); } /* Use ipfw_iface 'ifname' field as stable storage */ ife->no.name = ife->ic.iface->ifname; tb->ife = ife; return (0); } static int ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife, *tmp; struct ta_buf_ifidx *tb; struct ipfw_iface *iif; struct ifidx *ifi; char *ifname; uint32_t value; tb = (struct ta_buf_ifidx *)ta_buf; ifname = (char *)tei->paddr; icfg = (struct iftable_cfg *)ta_state; ife = tb->ife; ife->icfg = icfg; ife->value = tei->value; tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (tmp != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Exchange values in @tmp and @tei */ value = tmp->value; tmp->value = tei->value; tei->value = value; iif = tmp->ic.iface; if (iif->resolved != 0) { /* We have to update runtime value, too */ ifi = ifidx_find(ti, &iif->ifindex); ifi->value = ife->value; } /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); /* Link to internal list */ ipfw_objhash_add(icfg->ii, &ife->no); /* Link notifier (possible running its callback) */ ipfw_iface_add_notify(icfg->ch, &ife->ic); icfg->count++; tb->ife = NULL; *pnum = 1; return (0); } /* * Prepare to delete key from table. * Do basic interface name checks. */ static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; char *ifname; tb = (struct ta_buf_ifidx *)ta_buf; /* Check if string is terminated */ ifname = (char *)tei->paddr; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); return (0); } /* * Remove key from both configuration list and * runtime array. Removed interface notification. */ static int ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife; struct ta_buf_ifidx *tb; char *ifname; uint16_t ifindex; int res; tb = (struct ta_buf_ifidx *)ta_buf; ifname = (char *)tei->paddr; icfg = (struct iftable_cfg *)ta_state; ife = tb->ife; ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (ife == NULL) return (ENOENT); if (ife->linked != 0) { /* We have to remove item from runtime */ ifindex = ife->ic.iface->ifindex; res = bdel(&ifindex, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d does not exist", ifindex)); icfg->used--; ti->data = icfg->used; ife->linked = 0; } /* Unlink from local list */ ipfw_objhash_del(icfg->ii, &ife->no); /* Unlink notifier and deref */ ipfw_iface_del_notify(icfg->ch, &ife->ic); ipfw_iface_unref(icfg->ch, &ife->ic); icfg->count--; tei->value = ife->value; tb->ife = ife; *pnum = 1; return (0); } /* * Flush deleted entry. * Drops interface reference and frees entry. */ static void ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; tb = (struct ta_buf_ifidx *)ta_buf; if (tb->ife != NULL) free(tb->ife, M_IPFW_TBL); } /* * Handle interface announce/withdrawal for particular table. * Every real runtime array modification happens here. */ static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex) { struct ifentry *ife; struct ifidx ifi; struct iftable_cfg *icfg; struct table_info *ti; int res; ife = (struct ifentry *)cbdata; icfg = ife->icfg; ti = icfg->ti; KASSERT(ti != NULL, ("ti=NULL, check change_ti handler")); if (ife->linked == 0 && ifindex != 0) { /* Interface announce */ ifi.kidx = ifindex; ifi.spare = 0; ifi.value = ife->value; res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d already exists", ifindex)); icfg->used++; ti->data = icfg->used; ife->linked = 1; } else if (ife->linked != 0 && ifindex == 0) { /* Interface withdrawal */ ifindex = ife->ic.iface->ifindex; res = bdel(&ifindex, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d does not exist", ifindex)); icfg->used--; ti->data = icfg->used; ife->linked = 0; } } /* * Table growing callbacks. */ static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct iftable_cfg *cfg; uint32_t size; cfg = (struct iftable_cfg *)ta_state; size = cfg->size; while (size < cfg->count + count) size *= 2; if (size != cfg->size) { *pflags = size; return (1); } return (0); } /* * Allocate ned, larger runtime ifidx array. */ static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW, M_WAITOK | M_ZERO); return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct iftable_cfg *icfg; mi = (struct mod_item *)ta_buf; icfg = (struct iftable_cfg *)ta_state; /* Check if we still need to grow array */ if (icfg->size >= mi->size) { *pflags = 0; return (0); } memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx)); return (0); } /* * Switch old & new arrays. */ static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct iftable_cfg *icfg; void *old_ptr; mi = (struct mod_item *)ta_buf; icfg = (struct iftable_cfg *)ta_state; old_ptr = icfg->main_ptr; icfg->main_ptr = mi->main_ptr; icfg->size = mi->size; ti->state = icfg->main_ptr; mi->main_ptr = old_ptr; } /* * Free unneded array. */ static void ta_flush_mod_ifidx(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct ifentry *ife; ife = (struct ifentry *)e; tent->masklen = 8 * IF_NAMESIZE; memcpy(&tent->k, ife->no.name, IF_NAMESIZE); tent->v.kidx = ife->value; return (0); } static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct iftable_cfg *icfg; struct ifentry *ife; char *ifname; icfg = (struct iftable_cfg *)ta_state; ifname = tent->k.iface; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (ife != NULL) { ta_dump_ifidx_tentry(ta_state, ti, ife, tent); return (0); } return (ENOENT); } struct wa_ifidx { ta_foreach_f *f; void *arg; }; static void foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, void *arg) { struct ifentry *ife; struct wa_ifidx *wa; ife = (struct ifentry *)no; wa = (struct wa_ifidx *)arg; wa->f(ife, wa->arg); } static void ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct iftable_cfg *icfg; struct wa_ifidx wa; icfg = (struct iftable_cfg *)ta_state; wa.f = f; wa.arg = arg; ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa); } struct table_algo iface_idx = { .name = "iface:array", .type = IPFW_TABLE_INTERFACE, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_ifidx), .init = ta_init_ifidx, .destroy = ta_destroy_ifidx, .prepare_add = ta_prepare_add_ifidx, .prepare_del = ta_prepare_del_ifidx, .add = ta_add_ifidx, .del = ta_del_ifidx, .flush_entry = ta_flush_ifidx_entry, .foreach = ta_foreach_ifidx, .dump_tentry = ta_dump_ifidx_tentry, .find_tentry = ta_find_ifidx_tentry, .dump_tinfo = ta_dump_ifidx_tinfo, .need_modify = ta_need_modify_ifidx, .prepare_mod = ta_prepare_mod_ifidx, .fill_mod = ta_fill_mod_ifidx, .modify = ta_modify_ifidx, .flush_mod = ta_flush_mod_ifidx, .change_ti = ta_change_ti_ifidx, }; /* * Number array cmds. * * Implementation: * * Runtime part: * - sorted array of "struct numarray" pointed by ti->state. * Array is allocated with rounding up to NUMARRAY_CHUNK. * - current array size is stored in ti->data * */ struct numarray { uint32_t number; uint32_t value; }; struct numarray_cfg { void *main_ptr; size_t size; /* Number of items allocated in array */ size_t used; /* Number of items _active_ now */ }; struct ta_buf_numarray { struct numarray na; }; int compare_numarray(const void *k, const void *v); static struct numarray *numarray_find(struct table_info *ti, void *key); static int ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_numarray(void *ta_state, struct table_info *ti); static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_numarray(void *ta_buf); static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); int compare_numarray(const void *k, const void *v) { const struct numarray *na; uint32_t key; key = *((const uint32_t *)k); na = (const struct numarray *)v; if (key < na->number) return (-1); else if (key > na->number) return (1); return (0); } static struct numarray * numarray_find(struct table_info *ti, void *key) { struct numarray *ri; ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray), compare_ifidx); return (ri); } static int ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct numarray *ri; ri = numarray_find(ti, key); if (ri != NULL) { *val = ri->value; return (1); } return (0); } static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct numarray_cfg *cfg; cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->size = 16; cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW, M_WAITOK | M_ZERO); *ta_state = cfg; ti->state = cfg->main_ptr; ti->lookup = ta_lookup_numarray; return (0); } /* * Destroys table @ti */ static void ta_destroy_numarray(void *ta_state, struct table_info *ti) { struct numarray_cfg *cfg; cfg = (struct numarray_cfg *)ta_state; if (cfg->main_ptr != NULL) free(cfg->main_ptr, M_IPFW); free(cfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct numarray_cfg *cfg; cfg = (struct numarray_cfg *)ta_state; tinfo->taclass4 = IPFW_TACLASS_ARRAY; tinfo->size4 = cfg->size; tinfo->count4 = cfg->used; tinfo->itemsize4 = sizeof(struct numarray); } /* * Prepare for addition/deletion to an array. */ static int ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_numarray *tb; tb = (struct ta_buf_numarray *)ta_buf; tb->na.number = *((uint32_t *)tei->paddr); return (0); } static int ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; struct numarray *ri; int res; uint32_t value; tb = (struct ta_buf_numarray *)ta_buf; cfg = (struct numarray_cfg *)ta_state; /* Read current value from @tei */ tb->na.value = tei->value; ri = numarray_find(ti, &tb->na.number); if (ri != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Exchange values between ri and @tei */ value = ri->value; ri->value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used, sizeof(struct numarray), compare_numarray); KASSERT(res == 1, ("number %d already exists", tb->na.number)); cfg->used++; ti->data = cfg->used; *pnum = 1; return (0); } /* * Remove key from both configuration list and * runtime array. Removed interface notification. */ static int ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; struct numarray *ri; int res; tb = (struct ta_buf_numarray *)ta_buf; cfg = (struct numarray_cfg *)ta_state; ri = numarray_find(ti, &tb->na.number); if (ri == NULL) return (ENOENT); tei->value = ri->value; res = bdel(&tb->na.number, cfg->main_ptr, cfg->used, sizeof(struct numarray), compare_numarray); KASSERT(res == 1, ("number %u does not exist", tb->na.number)); cfg->used--; ti->data = cfg->used; *pnum = 1; return (0); } static void ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { /* We don't have any state, do nothing */ } /* * Table growing callbacks. */ static int ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct numarray_cfg *cfg; size_t size; cfg = (struct numarray_cfg *)ta_state; size = cfg->size; while (size < cfg->used + count) size *= 2; if (size != cfg->size) { *pflags = size; return (1); } return (0); } /* * Allocate new, larger runtime array. */ static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW, M_WAITOK | M_ZERO); return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct numarray_cfg *cfg; mi = (struct mod_item *)ta_buf; cfg = (struct numarray_cfg *)ta_state; /* Check if we still need to grow array */ if (cfg->size >= mi->size) { *pflags = 0; return (0); } memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray)); return (0); } /* * Switch old & new arrays. */ static void ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct numarray_cfg *cfg; void *old_ptr; mi = (struct mod_item *)ta_buf; cfg = (struct numarray_cfg *)ta_state; old_ptr = cfg->main_ptr; cfg->main_ptr = mi->main_ptr; cfg->size = mi->size; ti->state = cfg->main_ptr; mi->main_ptr = old_ptr; } /* * Free unneded array. */ static void ta_flush_mod_numarray(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct numarray *na; na = (struct numarray *)e; tent->k.key = na->number; tent->v.kidx = na->value; return (0); } static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct numarray_cfg *cfg; struct numarray *ri; cfg = (struct numarray_cfg *)ta_state; ri = numarray_find(ti, &tent->k.key); if (ri != NULL) { ta_dump_numarray_tentry(ta_state, ti, ri, tent); return (0); } return (ENOENT); } static void ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct numarray_cfg *cfg; struct numarray *array; int i; cfg = (struct numarray_cfg *)ta_state; array = cfg->main_ptr; for (i = 0; i < cfg->used; i++) f(&array[i], arg); } struct table_algo number_array = { .name = "number:array", .type = IPFW_TABLE_NUMBER, .ta_buf_size = sizeof(struct ta_buf_numarray), .init = ta_init_numarray, .destroy = ta_destroy_numarray, .prepare_add = ta_prepare_add_numarray, .prepare_del = ta_prepare_add_numarray, .add = ta_add_numarray, .del = ta_del_numarray, .flush_entry = ta_flush_numarray_entry, .foreach = ta_foreach_numarray, .dump_tentry = ta_dump_numarray_tentry, .find_tentry = ta_find_numarray_tentry, .dump_tinfo = ta_dump_numarray_tinfo, .need_modify = ta_need_modify_numarray, .prepare_mod = ta_prepare_mod_numarray, .fill_mod = ta_fill_mod_numarray, .modify = ta_modify_numarray, .flush_mod = ta_flush_mod_numarray, }; /* * flow:hash cmds * * * ti->data: * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] * [ 8][ 8[ 8][ 8] * * inv.mask4: 32 - mask * inv.mask6: * 1) _slow lookup: mask * 2) _aligned: (128 - mask) / 8 * 3) _64: 8 * * * pflags: * [hsize4][hsize6] * [ 16][ 16] */ struct fhashentry; SLIST_HEAD(fhashbhead, fhashentry); struct fhashentry { SLIST_ENTRY(fhashentry) next; uint8_t af; uint8_t proto; uint16_t spare0; uint16_t dport; uint16_t sport; uint32_t value; uint32_t spare1; }; struct fhashentry4 { struct fhashentry e; struct in_addr dip; struct in_addr sip; }; struct fhashentry6 { struct fhashentry e; struct in6_addr dip6; struct in6_addr sip6; }; struct fhash_cfg { struct fhashbhead *head; size_t size; size_t items; struct fhashentry4 fe4; struct fhashentry6 fe6; }; struct ta_buf_fhash { void *ent_ptr; struct fhashentry6 fe6; }; static __inline int cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz); static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize); static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize); static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size); static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_fhash(void *ta_state, struct table_info *ti); static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent); static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); static int ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_fhash(void *ta_buf); static __inline int cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz) { uint64_t *ka, *kb; ka = (uint64_t *)(&a->next + 1); kb = (uint64_t *)(&b->next + 1); if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0)) return (1); return (0); } static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize) { uint32_t i; i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport); return (i % (hsize - 1)); } static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize) { uint32_t i; i = (f->dip6.__u6_addr.__u6_addr32[2]) ^ (f->dip6.__u6_addr.__u6_addr32[3]) ^ (f->sip6.__u6_addr.__u6_addr32[2]) ^ (f->sip6.__u6_addr.__u6_addr32[3]) ^ (f->e.dport) ^ (f->e.sport); return (i % (hsize - 1)); } static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size) { uint32_t hash; if (ent->af == AF_INET) { hash = hash_flow4((struct fhashentry4 *)ent, size); } else { hash = hash_flow6((struct fhashentry6 *)ent, size); } return (hash); } static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct fhashbhead *head; struct fhashentry *ent; struct fhashentry4 *m4; struct ipfw_flow_id *id; uint16_t hash, hsize; id = (struct ipfw_flow_id *)key; head = (struct fhashbhead *)ti->state; hsize = ti->data; m4 = (struct fhashentry4 *)ti->xstate; if (id->addr_type == 4) { struct fhashentry4 f; /* Copy hash mask */ f = *m4; f.dip.s_addr &= id->dst_ip; f.sip.s_addr &= id->src_ip; f.e.dport &= id->dst_port; f.e.sport &= id->src_port; f.e.proto &= id->proto; hash = hash_flow4(&f, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) { *val = ent->value; return (1); } } } else if (id->addr_type == 6) { struct fhashentry6 f; uint64_t *fp, *idp; /* Copy hash mask */ f = *((struct fhashentry6 *)(m4 + 1)); /* Handle lack of __u6_addr.__u6_addr64 */ fp = (uint64_t *)&f.dip6; idp = (uint64_t *)&id->dst_ip6; /* src IPv6 is stored after dst IPv6 */ *fp++ &= *idp++; *fp++ &= *idp++; *fp++ &= *idp++; *fp &= *idp; f.e.dport &= id->dst_port; f.e.sport &= id->src_port; f.e.proto &= id->proto; hash = hash_flow6(&f, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) { *val = ent->value; return (1); } } } return (0); } /* * New table. */ static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int i; struct fhash_cfg *cfg; struct fhashentry4 *fe4; struct fhashentry6 *fe6; cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->size = 512; cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < cfg->size; i++) SLIST_INIT(&cfg->head[i]); /* Fill in fe masks based on @tflags */ fe4 = &cfg->fe4; fe6 = &cfg->fe6; if (tflags & IPFW_TFFLAG_SRCIP) { memset(&fe4->sip, 0xFF, sizeof(fe4->sip)); memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6)); } if (tflags & IPFW_TFFLAG_DSTIP) { memset(&fe4->dip, 0xFF, sizeof(fe4->dip)); memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6)); } if (tflags & IPFW_TFFLAG_SRCPORT) { memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport)); memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport)); } if (tflags & IPFW_TFFLAG_DSTPORT) { memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport)); memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport)); } if (tflags & IPFW_TFFLAG_PROTO) { memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto)); memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto)); } fe4->e.af = AF_INET; fe6->e.af = AF_INET6; *ta_state = cfg; ti->state = cfg->head; ti->xstate = &cfg->fe4; ti->data = cfg->size; ti->lookup = ta_lookup_fhash; return (0); } static void ta_destroy_fhash(void *ta_state, struct table_info *ti) { struct fhash_cfg *cfg; struct fhashentry *ent, *ent_next; int i; cfg = (struct fhash_cfg *)ta_state; for (i = 0; i < cfg->size; i++) SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) free(ent, M_IPFW_TBL); free(cfg->head, M_IPFW); free(cfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct fhash_cfg *cfg; cfg = (struct fhash_cfg *)ta_state; tinfo->flags = IPFW_TATFLAGS_AFITEM; tinfo->taclass4 = IPFW_TACLASS_HASH; tinfo->size4 = cfg->size; tinfo->count4 = cfg->items; tinfo->itemsize4 = sizeof(struct fhashentry4); tinfo->itemsize6 = sizeof(struct fhashentry6); } static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct fhash_cfg *cfg; struct fhashentry *ent; struct fhashentry4 *fe4; #ifdef INET6 struct fhashentry6 *fe6; #endif struct tflow_entry *tfe; cfg = (struct fhash_cfg *)ta_state; ent = (struct fhashentry *)e; tfe = &tent->k.flow; tfe->af = ent->af; tfe->proto = ent->proto; tfe->dport = htons(ent->dport); tfe->sport = htons(ent->sport); tent->v.kidx = ent->value; tent->subtype = ent->af; if (ent->af == AF_INET) { fe4 = (struct fhashentry4 *)ent; tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr); tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr); tent->masklen = 32; #ifdef INET6 } else { fe6 = (struct fhashentry6 *)ent; tfe->a.a6.sip6 = fe6->sip6; tfe->a.a6.dip6 = fe6->dip6; tent->masklen = 128; #endif } return (0); } static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent) { #ifdef INET struct fhashentry4 *fe4; #endif #ifdef INET6 struct fhashentry6 *fe6; #endif struct tflow_entry *tfe; tfe = (struct tflow_entry *)tei->paddr; ent->af = tei->subtype; ent->proto = tfe->proto; ent->dport = ntohs(tfe->dport); ent->sport = ntohs(tfe->sport); if (tei->subtype == AF_INET) { #ifdef INET fe4 = (struct fhashentry4 *)ent; fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr); fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr); #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { fe6 = (struct fhashentry6 *)ent; fe6->sip6 = tfe->a.a6.sip6; fe6->dip6 = tfe->a.a6.dip6; #endif } else { /* Unknown CIDR type */ return (EINVAL); } return (0); } static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct fhashentry6 fe6; struct tentry_info tei; int error; uint32_t hash; size_t sz; cfg = (struct fhash_cfg *)ta_state; ent = &fe6.e; memset(&fe6, 0, sizeof(fe6)); memset(&tei, 0, sizeof(tei)); tei.paddr = &tent->k.flow; tei.subtype = tent->subtype; if ((error = tei_to_fhash_ent(&tei, ent)) != 0) return (error); head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei.subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) != 0) { ta_dump_fhash_tentry(ta_state, ti, tmp, tent); return (0); } } return (ENOENT); } static void ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct fhash_cfg *cfg; struct fhashentry *ent, *ent_next; int i; cfg = (struct fhash_cfg *)ta_state; for (i = 0; i < cfg->size; i++) SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) f(ent, arg); } static int ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; struct fhashentry *ent; size_t sz; int error; tb = (struct ta_buf_fhash *)ta_buf; if (tei->subtype == AF_INET) sz = sizeof(struct fhashentry4); else if (tei->subtype == AF_INET6) sz = sizeof(struct fhashentry6); else return (EINVAL); ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO); error = tei_to_fhash_ent(tei, ent); if (error != 0) { free(ent, M_IPFW_TBL); return (error); } tb->ent_ptr = ent; return (0); } static int ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct ta_buf_fhash *tb; int exists; uint32_t hash, value; size_t sz; cfg = (struct fhash_cfg *)ta_state; tb = (struct ta_buf_fhash *)ta_buf; ent = (struct fhashentry *)tb->ent_ptr; exists = 0; /* Read current value from @tei */ ent->value = tei->value; head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei->subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) != 0) { exists = 1; break; } } if (exists == 1) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ /* Exchange values between tmp and @tei */ value = tmp->value; tmp->value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; } else { if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); SLIST_INSERT_HEAD(&head[hash], ent, next); tb->ent_ptr = NULL; *pnum = 1; /* Update counters and check if we need to grow hash */ cfg->items++; } return (0); } static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; tb = (struct ta_buf_fhash *)ta_buf; return (tei_to_fhash_ent(tei, &tb->fe6.e)); } static int ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct ta_buf_fhash *tb; uint32_t hash; size_t sz; cfg = (struct fhash_cfg *)ta_state; tb = (struct ta_buf_fhash *)ta_buf; ent = &tb->fe6.e; head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei->subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) == 0) continue; SLIST_REMOVE(&head[hash], tmp, fhashentry, next); tei->value = tmp->value; *pnum = 1; cfg->items--; tb->ent_ptr = tmp; return (0); } return (ENOENT); } static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; tb = (struct ta_buf_fhash *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } /* * Hash growing callbacks. */ static int ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct fhash_cfg *cfg; cfg = (struct fhash_cfg *)ta_state; if (cfg->items > cfg->size && cfg->size < 65536) { *pflags = cfg->size * 2; return (1); } return (0); } /* * Allocate new, larger fhash. */ static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct fhashbhead *head; int i; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < mi->size; i++) SLIST_INIT(&head[i]); mi->main_ptr = head; return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { /* In is not possible to do rehash if we're not holidng WLOCK. */ return (0); } /* * Switch old & new arrays. */ static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct fhash_cfg *cfg; struct fhashbhead *old_head, *new_head; struct fhashentry *ent, *ent_next; int i; uint32_t nhash; size_t old_size; mi = (struct mod_item *)ta_buf; cfg = (struct fhash_cfg *)ta_state; old_size = cfg->size; old_head = ti->state; new_head = (struct fhashbhead *)mi->main_ptr; for (i = 0; i < old_size; i++) { SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { nhash = hash_flow_ent(ent, mi->size); SLIST_INSERT_HEAD(&new_head[nhash], ent, next); } } ti->state = new_head; ti->data = mi->size; cfg->head = new_head; cfg->size = mi->size; mi->main_ptr = old_head; } /* * Free unneded array. */ static void ta_flush_mod_fhash(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } struct table_algo flow_hash = { .name = "flow:hash", .type = IPFW_TABLE_FLOW, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_fhash), .init = ta_init_fhash, .destroy = ta_destroy_fhash, .prepare_add = ta_prepare_add_fhash, .prepare_del = ta_prepare_del_fhash, .add = ta_add_fhash, .del = ta_del_fhash, .flush_entry = ta_flush_fhash_entry, .foreach = ta_foreach_fhash, .dump_tentry = ta_dump_fhash_tentry, .find_tentry = ta_find_fhash_tentry, .dump_tinfo = ta_dump_fhash_tinfo, .need_modify = ta_need_modify_fhash, .prepare_mod = ta_prepare_mod_fhash, .fill_mod = ta_fill_mod_fhash, .modify = ta_modify_fhash, .flush_mod = ta_flush_mod_fhash, }; /* * Kernel fibs bindings. * * Implementation: * * Runtime part: * - fully relies on route API * - fib number is stored in ti->data * */ -static struct rtentry *lookup_kfib(void *key, int keylen, int fib); static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int kfib_parse_opts(int *pfib, char *data); static void ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize); static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_kfib(void *ta_state, struct table_info *ti); static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int contigmask(uint8_t *p, int len); static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); +static int ta_dump_kfib_tentry_int(struct sockaddr *paddr, + struct sockaddr *pmask, ipfw_obj_tentry *tent); static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); -static struct rtentry * -lookup_kfib(void *key, int keylen, int fib) -{ - struct sockaddr *s; - if (keylen == 4) { - struct sockaddr_in sin; - bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = *(in_addr_t *)key; - s = (struct sockaddr *)&sin; - } else { - struct sockaddr_in6 sin6; - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = *(struct in6_addr *)key; - s = (struct sockaddr *)&sin6; - } - - return (rtalloc1_fib(s, 0, 0, fib)); -} - static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { - struct rtentry *rte; +#ifdef INET + struct nhop4_basic nh4; + struct in_addr in; +#endif +#ifdef INET6 + struct nhop6_basic nh6; +#endif + int error; - if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL) + error = ENOENT; +#ifdef INET + if (keylen == 4) { + in.s_addr = *(in_addr_t *)key; + error = fib4_lookup_nh_basic(ti->data, + in, 0, 0, &nh4); + } +#endif +#ifdef INET6 + if (keylen == 6) + error = fib6_lookup_nh_basic(ti->data, + (struct in6_addr *)key, 0, 0, 0, &nh6); +#endif + + if (error != 0) return (0); *val = 0; - RTFREE_LOCKED(rte); return (1); } /* Parse 'fib=%d' */ static int kfib_parse_opts(int *pfib, char *data) { char *pdel, *pend, *s; int fibnum; if (data == NULL) return (0); if ((pdel = strchr(data, ' ')) == NULL) return (0); while (*pdel == ' ') pdel++; if (strncmp(pdel, "fib=", 4) != 0) return (EINVAL); if ((s = strchr(pdel, ' ')) != NULL) *s++ = '\0'; pdel += 4; /* Need \d+ */ fibnum = strtol(pdel, &pend, 10); if (*pend != '\0') return (EINVAL); *pfib = fibnum; return (0); } static void ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize) { if (ti->data != 0) snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data); else snprintf(buf, bufsize, "%s", "addr:kfib"); } static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int error, fibnum; fibnum = 0; if ((error = kfib_parse_opts(&fibnum, data)) != 0) return (error); if (fibnum >= rt_numfibs) return (E2BIG); ti->data = fibnum; ti->lookup = ta_lookup_kfib; return (0); } /* * Destroys table @ti */ static void ta_destroy_kfib(void *ta_state, struct table_info *ti) { } /* * Provide algo-specific table info */ static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { tinfo->flags = IPFW_TATFLAGS_AFDATA; tinfo->taclass4 = IPFW_TACLASS_RADIX; tinfo->count4 = 0; tinfo->itemsize4 = sizeof(struct rtentry); tinfo->taclass6 = IPFW_TACLASS_RADIX; tinfo->count6 = 0; tinfo->itemsize6 = sizeof(struct rtentry); } static int contigmask(uint8_t *p, int len) { int i, n; for (i = 0; i < len ; i++) if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */ break; for (n= i + 1; n < len; n++) if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0) return (-1); /* mask not contiguous */ return (i); } static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct rtentry *rte; + + rte = (struct rtentry *)e; + + return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent); +} + +static int +ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask, + ipfw_obj_tentry *tent) +{ #ifdef INET struct sockaddr_in *addr, *mask; #endif #ifdef INET6 struct sockaddr_in6 *addr6, *mask6; #endif int len; - rte = (struct rtentry *)e; - addr = (struct sockaddr_in *)rt_key(rte); - mask = (struct sockaddr_in *)rt_mask(rte); len = 0; /* Guess IPv4/IPv6 radix by sockaddr family */ #ifdef INET - if (addr->sin_family == AF_INET) { + if (paddr->sa_family == AF_INET) { + addr = (struct sockaddr_in *)paddr; + mask = (struct sockaddr_in *)pmask; tent->k.addr.s_addr = addr->sin_addr.s_addr; len = 32; if (mask != NULL) len = contigmask((uint8_t *)&mask->sin_addr, 32); if (len == -1) len = 0; tent->masklen = len; tent->subtype = AF_INET; tent->v.kidx = 0; /* Do we need to put GW here? */ } #endif #ifdef INET6 - if (addr->sin_family == AF_INET6) { - addr6 = (struct sockaddr_in6 *)addr; - mask6 = (struct sockaddr_in6 *)mask; + if (paddr->sa_family == AF_INET6) { + addr6 = (struct sockaddr_in6 *)paddr; + mask6 = (struct sockaddr_in6 *)pmask; memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr)); len = 128; if (mask6 != NULL) len = contigmask((uint8_t *)&mask6->sin6_addr, 128); if (len == -1) len = 0; tent->masklen = len; tent->subtype = AF_INET6; tent->v.kidx = 0; } #endif return (0); } static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { - struct rtentry *rte; - void *key; - int keylen; + struct rt_addrinfo info; + struct sockaddr_in6 key6, dst6, mask6; + struct sockaddr *dst, *key, *mask; + /* Prepare sockaddr for prefix/mask and info */ + bzero(&dst6, sizeof(dst6)); + dst6.sin6_len = sizeof(dst6); + dst = (struct sockaddr *)&dst6; + bzero(&mask6, sizeof(mask6)); + mask6.sin6_len = sizeof(mask6); + mask = (struct sockaddr *)&mask6; + + bzero(&info, sizeof(info)); + info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_NETMASK] = mask; + + /* Prepare the lookup key */ + bzero(&key6, sizeof(key6)); + key6.sin6_family = tent->subtype; + key = (struct sockaddr *)&key6; + if (tent->subtype == AF_INET) { - key = &tent->k.addr; - keylen = sizeof(struct in_addr); + ((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr; + key6.sin6_len = sizeof(struct sockaddr_in); } else { - key = &tent->k.addr6; - keylen = sizeof(struct in6_addr); + key6.sin6_addr = tent->k.addr6; + key6.sin6_len = sizeof(struct sockaddr_in6); } - if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL) - return (0); + if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0) + return (ENOENT); + if ((info.rti_addrs & RTA_NETMASK) == 0) + mask = NULL; - if (rte != NULL) { - ta_dump_kfib_tentry(ta_state, ti, rte, tent); - RTFREE_LOCKED(rte); - return (0); - } + ta_dump_kfib_tentry_int(dst, mask, tent); - return (ENOENT); + return (0); } static void ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct radix_node_head *rnh; int error; rnh = rt_tables_get_rnh(ti->data, AF_INET); if (rnh != NULL) { RADIX_NODE_HEAD_RLOCK(rnh); error = rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); RADIX_NODE_HEAD_RUNLOCK(rnh); } rnh = rt_tables_get_rnh(ti->data, AF_INET6); if (rnh != NULL) { RADIX_NODE_HEAD_RLOCK(rnh); error = rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); RADIX_NODE_HEAD_RUNLOCK(rnh); } } struct table_algo addr_kfib = { .name = "addr:kfib", .type = IPFW_TABLE_ADDR, .flags = TA_FLAG_READONLY, .ta_buf_size = 0, .init = ta_init_kfib, .destroy = ta_destroy_kfib, .foreach = ta_foreach_kfib, .dump_tentry = ta_dump_kfib_tentry, .find_tentry = ta_find_kfib_tentry, .dump_tinfo = ta_dump_kfib_tinfo, .print_config = ta_print_kfib_config, }; void ipfw_table_algo_init(struct ip_fw_chain *ch) { size_t sz; /* * Register all algorithms presented here. */ sz = sizeof(struct table_algo); ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx); ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx); ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx); ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx); ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx); ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx); } void ipfw_table_algo_destroy(struct ip_fw_chain *ch) { ipfw_del_table_algo(ch, addr_radix.idx); ipfw_del_table_algo(ch, addr_hash.idx); ipfw_del_table_algo(ch, iface_idx.idx); ipfw_del_table_algo(ch, number_array.idx); ipfw_del_table_algo(ch, flow_hash.idx); ipfw_del_table_algo(ch, addr_kfib.idx); } Index: projects/clang380-import/sys/netpfil/pf/pf_if.c =================================================================== --- projects/clang380-import/sys/netpfil/pf/pf_if.c (revision 293686) +++ projects/clang380-import/sys/netpfil/pf/pf_if.c (revision 293687) @@ -1,862 +1,863 @@ /*- * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2003 Cedric Berger * Copyright (c) 2005 Henning Brauer * Copyright (c) 2005 Ryan McBride * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include +#include #include #include #include #include #include #include #include #include #include VNET_DEFINE(struct pfi_kif *, pfi_all); static VNET_DEFINE(long, pfi_update); #define V_pfi_update VNET(pfi_update) #define PFI_BUFFER_MAX 0x10000 static VNET_DEFINE(struct pfr_addr *, pfi_buffer); static VNET_DEFINE(int, pfi_buffer_cnt); static VNET_DEFINE(int, pfi_buffer_max); #define V_pfi_buffer VNET(pfi_buffer) #define V_pfi_buffer_cnt VNET(pfi_buffer_cnt) #define V_pfi_buffer_max VNET(pfi_buffer_max) eventhandler_tag pfi_attach_cookie; eventhandler_tag pfi_detach_cookie; eventhandler_tag pfi_attach_group_cookie; eventhandler_tag pfi_change_group_cookie; eventhandler_tag pfi_detach_group_cookie; eventhandler_tag pfi_ifaddr_event_cookie; static void pfi_attach_ifnet(struct ifnet *); static void pfi_attach_ifgroup(struct ifg_group *); static void pfi_kif_update(struct pfi_kif *); static void pfi_dynaddr_update(struct pfi_dynaddr *dyn); static void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, int); static void pfi_instance_add(struct ifnet *, int, int); static void pfi_address_add(struct sockaddr *, int, int); static int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); static int pfi_skip_if(const char *, struct pfi_kif *); static int pfi_unmask(void *); static void pfi_attach_ifnet_event(void * __unused, struct ifnet *); static void pfi_detach_ifnet_event(void * __unused, struct ifnet *); static void pfi_attach_group_event(void *, struct ifg_group *); static void pfi_change_group_event(void *, char *); static void pfi_detach_group_event(void *, struct ifg_group *); static void pfi_ifaddr_event(void * __unused, struct ifnet *); RB_HEAD(pfi_ifhead, pfi_kif); static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); static VNET_DEFINE(struct pfi_ifhead, pfi_ifs); #define V_pfi_ifs VNET(pfi_ifs) #define PFI_BUFFER_MAX 0x10000 MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database"); LIST_HEAD(pfi_list, pfi_kif); static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs); #define V_pfi_unlinked_kifs VNET(pfi_unlinked_kifs) static struct mtx pfi_unlnkdkifs_mtx; MTX_SYSINIT(pfi_unlnkdkifs_mtx, &pfi_unlnkdkifs_mtx, "pf unlinked interfaces", MTX_DEF); void pfi_initialize(void) { struct ifg_group *ifg; struct ifnet *ifp; struct pfi_kif *kif; V_pfi_buffer_max = 64; V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer), PFI_MTYPE, M_WAITOK); kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); PF_RULES_WLOCK(); V_pfi_all = pfi_kif_attach(kif, IFG_ALL); PF_RULES_WUNLOCK(); IFNET_RLOCK(); TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) pfi_attach_ifgroup(ifg); TAILQ_FOREACH(ifp, &V_ifnet, if_link) pfi_attach_ifnet(ifp); IFNET_RUNLOCK(); pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event, pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event, pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event, pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event, pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event, pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event, pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); } void pfi_cleanup(void) { struct pfi_kif *p; EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie); EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie); EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie); EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie); EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie); EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie); V_pfi_all = NULL; while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) { RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p); free(p, PFI_MTYPE); } while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) { LIST_REMOVE(p, pfik_list); free(p, PFI_MTYPE); } free(V_pfi_buffer, PFI_MTYPE); } struct pfi_kif * pfi_kif_find(const char *kif_name) { struct pfi_kif_cmp s; PF_RULES_ASSERT(); bzero(&s, sizeof(s)); strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)); } struct pfi_kif * pfi_kif_attach(struct pfi_kif *kif, const char *kif_name) { struct pfi_kif *kif1; PF_RULES_WASSERT(); KASSERT(kif != NULL, ("%s: null kif", __func__)); kif1 = pfi_kif_find(kif_name); if (kif1 != NULL) { free(kif, PFI_MTYPE); return (kif1); } bzero(kif, sizeof(*kif)); strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); /* * It seems that the value of time_second is in unintialzied state * when pf sets interface statistics clear time in boot phase if pf * was statically linked to kernel. Instead of setting the bogus * time value have pfi_get_ifaces handle this case. In * pfi_get_ifaces it uses time_second if it sees the time is 0. */ kif->pfik_tzero = time_second > 1 ? time_second : 0; TAILQ_INIT(&kif->pfik_dynaddrs); RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif); return (kif); } void pfi_kif_ref(struct pfi_kif *kif) { PF_RULES_WASSERT(); kif->pfik_rulerefs++; } void pfi_kif_unref(struct pfi_kif *kif) { PF_RULES_WASSERT(); KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif)); kif->pfik_rulerefs--; if (kif->pfik_rulerefs > 0) return; /* kif referencing an existing ifnet or group should exist. */ if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all) return; RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif); kif->pfik_flags |= PFI_IFLAG_REFS; mtx_lock(&pfi_unlnkdkifs_mtx); LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list); mtx_unlock(&pfi_unlnkdkifs_mtx); } void pfi_kif_purge(void) { struct pfi_kif *kif, *kif1; /* * Do naive mark-and-sweep garbage collecting of old kifs. * Reference flag is raised by pf_purge_expired_states(). */ mtx_lock(&pfi_unlnkdkifs_mtx); LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) { if (!(kif->pfik_flags & PFI_IFLAG_REFS)) { LIST_REMOVE(kif, pfik_list); free(kif, PFI_MTYPE); } else kif->pfik_flags &= ~PFI_IFLAG_REFS; } mtx_unlock(&pfi_unlnkdkifs_mtx); } int pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) { struct ifg_list *p; if (rule_kif == NULL || rule_kif == packet_kif) return (1); if (rule_kif->pfik_group != NULL) /* XXXGL: locking? */ TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) if (p->ifgl_group == rule_kif->pfik_group) return (1); return (0); } static void pfi_attach_ifnet(struct ifnet *ifp) { struct pfi_kif *kif; kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); PF_RULES_WLOCK(); V_pfi_update++; kif = pfi_kif_attach(kif, ifp->if_xname); kif->pfik_ifp = ifp; ifp->if_pf_kif = kif; pfi_kif_update(kif); PF_RULES_WUNLOCK(); } static void pfi_attach_ifgroup(struct ifg_group *ifg) { struct pfi_kif *kif; kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); PF_RULES_WLOCK(); V_pfi_update++; kif = pfi_kif_attach(kif, ifg->ifg_group); kif->pfik_group = ifg; ifg->ifg_pf_kif = kif; PF_RULES_WUNLOCK(); } int pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: switch (dyn->pfid_acnt4) { case 0: return (0); case 1: return (PF_MATCHA(0, &dyn->pfid_addr4, &dyn->pfid_mask4, a, AF_INET)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: switch (dyn->pfid_acnt6) { case 0: return (0); case 1: return (PF_MATCHA(0, &dyn->pfid_addr6, &dyn->pfid_mask6, a, AF_INET6)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); } break; #endif /* INET6 */ default: return (0); } } int pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) { struct pfi_dynaddr *dyn; char tblname[PF_TABLE_NAME_SIZE]; struct pf_ruleset *ruleset = NULL; struct pfi_kif *kif; int rv = 0; PF_RULES_WASSERT(); KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u", __func__, aw->type)); KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn)); if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) { free(dyn, PFI_MTYPE); return (ENOMEM); } if (!strcmp(aw->v.ifname, "self")) dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL); else dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname); pfi_kif_ref(dyn->pfid_kif); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) dyn->pfid_net = 128; strlcpy(tblname, aw->v.ifname, sizeof(tblname)); if (aw->iflags & PFI_AFLAG_NETWORK) strlcat(tblname, ":network", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_BROADCAST) strlcat(tblname, ":broadcast", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_PEER) strlcat(tblname, ":peer", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_NOALIAS) strlcat(tblname, ":0", sizeof(tblname)); if (dyn->pfid_net != 128) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { rv = ENOMEM; goto _bad; } if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { rv = ENOMEM; goto _bad; } dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; dyn->pfid_iflags = aw->iflags; dyn->pfid_af = af; TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; pfi_kif_update(dyn->pfid_kif); return (0); _bad: if (dyn->pfid_kt != NULL) pfr_detach_table(dyn->pfid_kt); if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) pfi_kif_unref(dyn->pfid_kif); free(dyn, PFI_MTYPE); return (rv); } static void pfi_kif_update(struct pfi_kif *kif) { struct ifg_list *ifgl; struct pfi_dynaddr *p; PF_RULES_WASSERT(); /* update all dynaddr */ TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) pfi_dynaddr_update(p); /* again for all groups kif is member of */ if (kif->pfik_ifp != NULL) { IF_ADDR_RLOCK(kif->pfik_ifp); TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) pfi_kif_update((struct pfi_kif *) ifgl->ifgl_group->ifg_pf_kif); IF_ADDR_RUNLOCK(kif->pfik_ifp); } } static void pfi_dynaddr_update(struct pfi_dynaddr *dyn) { struct pfi_kif *kif; struct pfr_ktable *kt; PF_RULES_WASSERT(); KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt, ("%s: bad argument", __func__)); kif = dyn->pfid_kif; kt = dyn->pfid_kt; if (kt->pfrkt_larg != V_pfi_update) { /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); kt->pfrkt_larg = V_pfi_update; } pfr_dynaddr_update(kt, dyn); } static void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) { int e, size2 = 0; struct ifg_member *ifgm; V_pfi_buffer_cnt = 0; if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); else if (kif->pfik_group != NULL) { IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) pfi_instance_add(ifgm->ifgm_ifp, net, flags); IFNET_RUNLOCK_NOSLEEP(); } if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2, NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) printf("%s: cannot set %d new addresses into table %s: %d\n", __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e); } static void pfi_instance_add(struct ifnet *ifp, int net, int flags) { struct ifaddr *ia; int got4 = 0, got6 = 0; int net2, af; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) { if (ia->ifa_addr == NULL) continue; af = ia->ifa_addr->sa_family; if (af != AF_INET && af != AF_INET6) continue; /* * XXX: For point-to-point interfaces, (ifname:0) and IPv4, * jump over addresses without a proper route to work * around a problem with ppp not fully removing the * address used during IPCP. */ if ((ifp->if_flags & IFF_POINTOPOINT) && !(ia->ifa_flags & IFA_ROUTE) && (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET)) continue; if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && !(ifp->if_flags & IFF_BROADCAST)) continue; if ((flags & PFI_AFLAG_PEER) && !(ifp->if_flags & IFF_POINTOPOINT)) continue; if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL( &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr)) continue; if (flags & PFI_AFLAG_NOALIAS) { if (af == AF_INET && got4) continue; if (af == AF_INET6 && got6) continue; } if (af == AF_INET) got4 = 1; else if (af == AF_INET6) got6 = 1; net2 = net; if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { if (af == AF_INET) net2 = pfi_unmask(&((struct sockaddr_in *) ia->ifa_netmask)->sin_addr); else if (af == AF_INET6) net2 = pfi_unmask(&((struct sockaddr_in6 *) ia->ifa_netmask)->sin6_addr); } if (af == AF_INET && net2 > 32) net2 = 32; if (flags & PFI_AFLAG_BROADCAST) pfi_address_add(ia->ifa_broadaddr, af, net2); else if (flags & PFI_AFLAG_PEER) pfi_address_add(ia->ifa_dstaddr, af, net2); else pfi_address_add(ia->ifa_addr, af, net2); } IF_ADDR_RUNLOCK(ifp); } static void pfi_address_add(struct sockaddr *sa, int af, int net) { struct pfr_addr *p; int i; if (V_pfi_buffer_cnt >= V_pfi_buffer_max) { int new_max = V_pfi_buffer_max * 2; if (new_max > PFI_BUFFER_MAX) { printf("%s: address buffer full (%d/%d)\n", __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX); return; } p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE, M_NOWAIT); if (p == NULL) { printf("%s: no memory to grow buffer (%d/%d)\n", __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX); return; } memcpy(p, V_pfi_buffer, V_pfi_buffer_max * sizeof(*V_pfi_buffer)); /* no need to zero buffer */ free(V_pfi_buffer, PFI_MTYPE); V_pfi_buffer = p; V_pfi_buffer_max = new_max; } if (af == AF_INET && net > 32) net = 128; p = V_pfi_buffer + V_pfi_buffer_cnt++; bzero(p, sizeof(*p)); p->pfra_af = af; p->pfra_net = net; if (af == AF_INET) p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; else if (af == AF_INET6) { p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr)) p->pfra_ip6addr.s6_addr16[1] = 0; } /* mask network address bits */ if (net < 128) ((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8)); for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++) ((caddr_t)p)[i] = 0; } void pfi_dynaddr_remove(struct pfi_dynaddr *dyn) { KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__)); KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__)); TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); pfi_kif_unref(dyn->pfid_kif); pfr_detach_table(dyn->pfid_kt); free(dyn, PFI_MTYPE); } void pfi_dynaddr_copyout(struct pf_addr_wrap *aw) { KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u", __func__, aw->type)); if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL) return; aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; } static int pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) { return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); } void pfi_update_status(const char *name, struct pf_status *pfs) { struct pfi_kif *p; struct pfi_kif_cmp key; struct ifg_member p_member, *ifgm; TAILQ_HEAD(, ifg_member) ifg_members; int i, j, k; strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key); if (p == NULL) return; if (p->pfik_group != NULL) { bcopy(&p->pfik_group->ifg_members, &ifg_members, sizeof(ifg_members)); } else { /* build a temporary list for p only */ bzero(&p_member, sizeof(p_member)); p_member.ifgm_ifp = p->pfik_ifp; TAILQ_INIT(&ifg_members); TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next); } if (pfs) { bzero(pfs->pcounters, sizeof(pfs->pcounters)); bzero(pfs->bcounters, sizeof(pfs->bcounters)); } TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) { if (ifgm->ifgm_ifp == NULL) continue; p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif; /* just clear statistics */ if (pfs == NULL) { bzero(p->pfik_packets, sizeof(p->pfik_packets)); bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); p->pfik_tzero = time_second; continue; } for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) { pfs->pcounters[i][j][k] += p->pfik_packets[i][j][k]; pfs->bcounters[i][j] += p->pfik_bytes[i][j][k]; } } } void pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { struct pfi_kif *p, *nextp; int n = 0; for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) { nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); if (pfi_skip_if(name, p)) continue; if (*size <= n++) break; if (!p->pfik_tzero) p->pfik_tzero = time_second; bcopy(p, buf++, sizeof(*buf)); nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); } *size = n; } static int pfi_skip_if(const char *filter, struct pfi_kif *p) { int n; if (filter == NULL || !*filter) return (0); if (!strcmp(p->pfik_name, filter)) return (0); /* exact match */ n = strlen(filter); if (n < 1 || n >= IFNAMSIZ) return (1); /* sanity check */ if (filter[n-1] >= '0' && filter[n-1] <= '9') return (1); /* only do exact match in that case */ if (strncmp(p->pfik_name, filter, n)) return (1); /* prefix doesn't match */ return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); } int pfi_set_flags(const char *name, int flags) { struct pfi_kif *p; RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { if (pfi_skip_if(name, p)) continue; p->pfik_flags |= flags; } return (0); } int pfi_clear_flags(const char *name, int flags) { struct pfi_kif *p; RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { if (pfi_skip_if(name, p)) continue; p->pfik_flags &= ~flags; } return (0); } /* from pf_print_state.c */ static int pfi_unmask(void *addr) { struct pf_addr *m = addr; int i = 31, j = 0, b = 0; u_int32_t tmp; while (j < 4 && m->addr32[j] == 0xffffffff) { b += 32; j++; } if (j < 4) { tmp = ntohl(m->addr32[j]); for (i = 31; tmp & (1 << i); --i) b++; } return (b); } static void pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) { CURVNET_SET(ifp->if_vnet); pfi_attach_ifnet(ifp); #ifdef ALTQ PF_RULES_WLOCK(); pf_altq_ifnet_event(ifp, 0); PF_RULES_WUNLOCK(); #endif CURVNET_RESTORE(); } static void pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) { struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif; CURVNET_SET(ifp->if_vnet); PF_RULES_WLOCK(); V_pfi_update++; pfi_kif_update(kif); kif->pfik_ifp = NULL; ifp->if_pf_kif = NULL; #ifdef ALTQ pf_altq_ifnet_event(ifp, 1); #endif PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } static void pfi_attach_group_event(void *arg , struct ifg_group *ifg) { CURVNET_SET((struct vnet *)arg); pfi_attach_ifgroup(ifg); CURVNET_RESTORE(); } static void pfi_change_group_event(void *arg, char *gname) { struct pfi_kif *kif; kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); CURVNET_SET((struct vnet *)arg); PF_RULES_WLOCK(); V_pfi_update++; kif = pfi_kif_attach(kif, gname); pfi_kif_update(kif); PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } static void pfi_detach_group_event(void *arg, struct ifg_group *ifg) { struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif; CURVNET_SET((struct vnet *)arg); PF_RULES_WLOCK(); V_pfi_update++; kif->pfik_group = NULL; ifg->ifg_pf_kif = NULL; PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } static void pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) { CURVNET_SET(ifp->if_vnet); PF_RULES_WLOCK(); if (ifp && ifp->if_pf_kif) { V_pfi_update++; pfi_kif_update(ifp->if_pf_kif); } PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } Index: projects/clang380-import/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- projects/clang380-import/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c (revision 293686) +++ projects/clang380-import/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c (revision 293687) @@ -1,1544 +1,1540 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "ipoib.h" static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); #include #include #include #include #include /* For ARPHRD_xxx */ #include #include #include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); int ipoib_sendq_size = IPOIB_TX_RING_SIZE; int ipoib_recvq_size = IPOIB_RX_RING_SIZE; module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level = 1; module_param_named(debug_level, ipoib_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif struct ipoib_path_iter { struct ipoib_dev_priv *priv; struct ipoib_path path; }; static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }; struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); static void ipoib_start(struct ifnet *dev); static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void ipoib_input(struct ifnet *ifp, struct mbuf *m); #define IPOIB_MTAP(_ifp, _m) \ do { \ if (bpf_peers_present((_ifp)->if_bpf)) { \ M_ASSERTVALID(_m); \ ipoib_mtap_mb((_ifp), (_m)); \ } \ } while (0) /* * This is for clients that have an ipoib_header in the mbuf. */ static void ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb) { struct ipoib_header *ih; struct ether_header eh; ih = mtod(mb, struct ipoib_header *); eh.ether_type = ih->proto; bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN); bzero(&eh.ether_shost, ETHER_ADDR_LEN); mb->m_data += sizeof(struct ipoib_header); mb->m_len -= sizeof(struct ipoib_header); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); mb->m_data -= sizeof(struct ipoib_header); mb->m_len += sizeof(struct ipoib_header); } void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto) { struct ether_header eh; eh.ether_type = proto; bzero(&eh.ether_shost, ETHER_ADDR_LEN); bzero(&eh.ether_dhost, ETHER_ADDR_LEN); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); } static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, .remove = ipoib_remove_one }; int ipoib_open(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "bringing up interface\n"); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(priv)) return 0; if (ipoib_ib_dev_open(priv)) goto err_disable; if (ipoib_ib_dev_up(priv)) goto err_stop; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(cpriv); mutex_unlock(&priv->vlan_mutex); } dev->if_drv_flags |= IFF_DRV_RUNNING; dev->if_drv_flags &= ~IFF_DRV_OACTIVE; return 0; err_stop: ipoib_ib_dev_stop(priv, 1); err_disable: clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); return -EINVAL; } static void ipoib_init(void *arg) { struct ifnet *dev; struct ipoib_dev_priv *priv; priv = arg; dev = priv->dev; if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(priv); queue_work(ipoib_workqueue, &priv->flush_light); } static int ipoib_stop(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "stopping interface\n"); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); ipoib_ib_dev_down(priv, 0); ipoib_ib_dev_stop(priv, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0) ipoib_stop(cpriv); mutex_unlock(&priv->vlan_mutex); } return 0; } int ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu) { struct ifnet *dev = priv->dev; /* dev->if_mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(priv)) { if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv))) return -EINVAL; if (new_mtu > priv->mcast_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); dev->if_mtu = new_mtu; return 0; } if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; dev->if_mtu = min(priv->mcast_mtu, priv->admin_mtu); queue_work(ipoib_workqueue, &priv->flush_light); return 0; } static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ipoib_dev_priv *priv = ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) error = -ipoib_open(priv); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_stop(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) queue_work(ipoib_workqueue, &priv->restart_task); break; case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, INFINIBAND_ALEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ error = -ipoib_change_mtu(priv, ifr->ifr_mtu); break; default: error = EINVAL; break; } return (error); } static struct ipoib_path * __path_find(struct ipoib_dev_priv *priv, void *gid) { struct rb_node *n = priv->path_tree.rb_node; struct ipoib_path *path; int ret; while (n) { path = rb_entry(n, struct ipoib_path, rb_node); ret = memcmp(gid, path->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = n->rb_left; else if (ret > 0) n = n->rb_right; else return path; } return NULL; } static int __path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct rb_node **n = &priv->path_tree.rb_node; struct rb_node *pn = NULL; struct ipoib_path *tpath; int ret; while (*n) { pn = *n; tpath = rb_entry(pn, struct ipoib_path, rb_node); ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = &pn->rb_left; else if (ret > 0) n = &pn->rb_right; else return -EEXIST; } rb_link_node(&path->rb_node, pn, n); rb_insert_color(&path->rb_node, &priv->path_tree); list_add_tail(&path->list, &priv->path_list); return 0; } void ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path) { _IF_DRAIN(&path->queue); if (path->ah) ipoib_put_ah(path->ah); if (ipoib_cm_get(path)) ipoib_cm_destroy_tx(ipoib_cm_get(path)); kfree(path); } #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_path_iter * ipoib_path_iter_init(struct ipoib_dev_priv *priv) { struct ipoib_path_iter *iter; iter = kmalloc(sizeof *iter, GFP_KERNEL); if (!iter) return NULL; iter->priv = priv; memset(iter->path.pathrec.dgid.raw, 0, 16); if (ipoib_path_iter_next(iter)) { kfree(iter); return NULL; } return iter; } int ipoib_path_iter_next(struct ipoib_path_iter *iter) { struct ipoib_dev_priv *priv = iter->priv; struct rb_node *n; struct ipoib_path *path; int ret = 1; spin_lock_irq(&priv->lock); n = rb_first(&priv->path_tree); while (n) { path = rb_entry(n, struct ipoib_path, rb_node); if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, sizeof (union ib_gid)) < 0) { iter->path = *path; ret = 0; break; } n = rb_next(n); } spin_unlock_irq(&priv->lock); return ret; } void ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path) { *path = iter->path; } #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ void ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; spin_lock_irq(&priv->lock); list_for_each_entry_safe(path, tp, &priv->path_list, list) { ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n", be16_to_cpu(path->pathrec.dlid), path->pathrec.dgid.raw, ":"); path->valid = 0; } spin_unlock_irq(&priv->lock); } void ipoib_flush_paths(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; LIST_HEAD(remove_list); unsigned long flags; spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->path_list, &remove_list); list_for_each_entry(path, &remove_list, list) rb_erase(&path->rb_node, &priv->path_tree); list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); spin_unlock_irqrestore(&priv->lock, flags); wait_for_completion(&path->done); ipoib_path_free(priv, path); spin_lock_irqsave(&priv->lock, flags); } spin_unlock_irqrestore(&priv->lock, flags); } static void path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr) { struct ipoib_path *path = path_ptr; struct ipoib_dev_priv *priv = path->priv; struct ifnet *dev = priv->dev; struct ipoib_ah *ah = NULL; struct ipoib_ah *old_ah = NULL; struct ifqueue mbqueue; struct mbuf *mb; unsigned long flags; if (!status) ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n", be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":"); else ipoib_dbg(priv, "PathRec status %d for GID %16D\n", status, path->pathrec.dgid.raw, ":"); bzero(&mbqueue, sizeof(mbqueue)); if (!status) { struct ib_ah_attr av; if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) ah = ipoib_create_ah(priv, priv->pd, &av); } spin_lock_irqsave(&priv->lock, flags); if (ah) { path->pathrec = *pathrec; old_ah = path->ah; path->ah = ah; ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", ah, be16_to_cpu(pathrec->dlid), pathrec->sl); for (;;) { _IF_DEQUEUE(&path->queue, mb); if (mb == NULL) break; _IF_ENQUEUE(&mbqueue, mb); } #ifdef CONFIG_INFINIBAND_IPOIB_CM if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path)) ipoib_cm_set(path, ipoib_cm_create_tx(priv, path)); #endif path->valid = 1; } path->query = NULL; complete(&path->done); spin_unlock_irqrestore(&priv->lock, flags); if (old_ah) ipoib_put_ah(old_ah); for (;;) { _IF_DEQUEUE(&mbqueue, mb); if (mb == NULL) break; mb->m_pkthdr.rcvif = dev; if (dev->if_transmit(dev, mb)) ipoib_warn(priv, "dev_queue_xmit failed " "to requeue packet\n"); } } static struct ipoib_path * path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr) { struct ipoib_path *path; if (!priv->broadcast) return NULL; path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; path->priv = priv; bzero(&path->queue, sizeof(path->queue)); #ifdef CONFIG_INFINIBAND_IPOIB_CM memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN); #endif memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; path->pathrec.pkey = cpu_to_be16(priv->pkey); path->pathrec.numb_path = 1; path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; return path; } static int path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct ifnet *dev = priv->dev; ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU; struct ib_sa_path_rec p_rec; p_rec = path->pathrec; p_rec.mtu_selector = IB_SA_GT; switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { case 512: p_rec.mtu = IB_MTU_256; break; case 1024: p_rec.mtu = IB_MTU_512; break; case 2048: p_rec.mtu = IB_MTU_1024; break; case 4096: p_rec.mtu = IB_MTU_2048; break; default: /* Wildcard everything */ comp_mask = 0; p_rec.mtu = 0; p_rec.mtu_selector = 0; } ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n", p_rec.dgid.raw, ":", comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0); init_completion(&path->done); path->query_id = ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, &p_rec, comp_mask | IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_TRAFFIC_CLASS | IB_SA_PATH_REC_PKEY, 1000, GFP_ATOMIC, path_rec_completion, path, &path->query); if (path->query_id < 0) { ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); path->query = NULL; complete(&path->done); return path->query_id; } return 0; } static void ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh) { struct ipoib_path *path; path = __path_find(priv, eh->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { path = path_rec_create(priv, eh->hwaddr); new_path = 1; } if (path) { _IF_ENQUEUE(&path->queue, mb); if (!path->query && path_rec_start(priv, path)) { spin_unlock_irqrestore(&priv->lock, flags); if (new_path) ipoib_path_free(priv, path); return; } else __path_add(priv, path); } else { if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); m_freem(mb); } return; } if (ipoib_cm_get(path) && ipoib_cm_up(path)) { ipoib_cm_send(priv, mb, ipoib_cm_get(path)); } else if (path->ah) { ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr)); } else if ((path->query || !path_rec_start(priv, path)) && path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) { _IF_ENQUEUE(&path->queue, mb); } else { if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); m_freem(mb); } } static int ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) { struct ipoib_header *eh; eh = mtod(mb, struct ipoib_header *); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { /* Add in the P_Key for multicast*/ eh->hwaddr[8] = (priv->pkey >> 8) & 0xff; eh->hwaddr[9] = priv->pkey & 0xff; ipoib_mcast_send(priv, eh->hwaddr + 4, mb); } else ipoib_unicast_send(mb, priv, eh); return 0; } static void _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) { struct mbuf *mb; if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; spin_lock(&priv->lock); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; IPOIB_MTAP(dev, mb); ipoib_send_one(priv, mb); } spin_unlock(&priv->lock); } static void ipoib_start(struct ifnet *dev) { _ipoib_start(dev, dev->if_softc); } static void ipoib_vlan_start(struct ifnet *dev) { struct ipoib_dev_priv *priv; struct mbuf *mb; priv = VLAN_COOKIE(dev); if (priv != NULL) return _ipoib_start(dev, priv); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; m_freem(mb); if_inc_counter(dev, IFCOUNTER_OERRORS, 1); } } int ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port) { /* Allocate RX/TX "rings" to hold queued mbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, ipoib_recvq_size); goto out; } priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ if (ipoib_ib_dev_init(priv, ca, port)) goto out_tx_ring_cleanup; return 0; out_tx_ring_cleanup: kfree(priv->tx_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); out: return -ENOMEM; } static void ipoib_detach(struct ipoib_dev_priv *priv) { struct ifnet *dev; dev = priv->dev; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { bpfdetach(dev); if_detach(dev); if_free(dev); } else VLAN_SETCOOKIE(priv->dev, NULL); free(priv, M_TEMP); } void ipoib_dev_cleanup(struct ipoib_dev_priv *priv) { struct ipoib_dev_priv *cpriv, *tcpriv; /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { ipoib_dev_cleanup(cpriv); ipoib_detach(cpriv); } ipoib_ib_dev_cleanup(priv); kfree(priv->rx_ring); kfree(priv->tx_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; } static volatile int ipoib_unit; static struct ipoib_dev_priv * ipoib_priv_alloc(void) { struct ipoib_dev_priv *priv; priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); spin_lock_init(&priv->lock); spin_lock_init(&priv->drain_lock); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN); return (priv); } struct ipoib_dev_priv * ipoib_intf_alloc(const char *name) { struct ipoib_dev_priv *priv; struct sockaddr_dl *sdl; struct ifnet *dev; priv = ipoib_priv_alloc(); dev = priv->dev = if_alloc(IFT_INFINIBAND); if (!dev) { free(priv, M_TEMP); return NULL; } dev->if_softc = priv; if_initname(dev, name, atomic_fetchadd_int(&ipoib_unit, 1)); dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; dev->if_addrlen = INFINIBAND_ALEN; dev->if_hdrlen = IPOIB_HEADER_LEN; if_attach(dev); dev->if_init = ipoib_init; dev->if_ioctl = ipoib_ioctl; dev->if_start = ipoib_start; dev->if_output = ipoib_output; dev->if_input = ipoib_input; dev->if_resolvemulti = ipoib_resolvemulti; dev->if_baudrate = IF_Gbps(10); dev->if_broadcastaddr = priv->broadcastaddr; dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr; sdl->sdl_type = IFT_INFINIBAND; sdl->sdl_alen = dev->if_addrlen; priv->dev = dev; if_link_state_change(dev, LINK_STATE_DOWN); bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN); return dev->if_softc; } int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) { struct ib_device_attr *device_attr; int result = -ENOMEM; device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); if (!device_attr) { printk(KERN_WARNING "%s: allocation of %zu bytes failed\n", hca->name, sizeof *device_attr); return result; } result = ib_query_device(hca, device_attr); if (result) { printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n", hca->name, result); kfree(device_attr); return result; } priv->hca_caps = device_attr->device_cap_flags; kfree(device_attr); priv->dev->if_hwassist = 0; priv->dev->if_capabilities = 0; #ifndef CONFIG_INFINIBAND_IPOIB_CM if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { set_bit(IPOIB_FLAG_CSUM, &priv->flags); priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; } #if 0 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) { priv->dev->if_capabilities |= IFCAP_TSO4; priv->dev->if_hwassist |= CSUM_TSO; } #endif #endif priv->dev->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; priv->dev->if_capenable = priv->dev->if_capabilities; return 0; } static struct ifnet * ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); if (!priv) goto alloc_mem_failed; if (!ib_query_port(hca, port, &attr)) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", hca->name, port); goto device_init_failed; } /* MTU will be reset when mcast join happens */ priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu; result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_set_dev_features(priv, hca)) goto device_init_failed; /* * Set the full membership bit, so that we join the right * broadcast group, etc. */ priv->pkey |= 0x8000; priv->broadcastaddr[8] = priv->pkey >> 8; priv->broadcastaddr[9] = priv->pkey & 0xff; result = ib_query_gid(hca, port, 0, &priv->local_gid); if (result) { printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); result = ipoib_dev_init(priv, hca, port); if (result < 0) { printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_cm_admin_enabled(priv)) priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); result = ib_register_event_handler(&priv->event_handler); if (result < 0) { printk(KERN_WARNING "%s: ib_register_event_handler failed for " "port %d (ret = %d)\n", hca->name, port, result); goto event_failed; } if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port); return priv->dev; event_failed: ipoib_dev_cleanup(priv); device_init_failed: ipoib_detach(priv); alloc_mem_failed: return ERR_PTR(result); } static void ipoib_add_one(struct ib_device *device) { struct list_head *dev_list; struct ifnet *dev; struct ipoib_dev_priv *priv; int s, e, p; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) return; INIT_LIST_HEAD(dev_list); if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { s = 1; e = device->phys_port_cnt; } for (p = s; p <= e; ++p) { if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) continue; dev = ipoib_add_port("ib", device, p); if (!IS_ERR(dev)) { priv = dev->if_softc; list_add_tail(&priv->list, dev_list); } } ib_set_client_data(device, &ipoib_client, dev_list); } static void ipoib_remove_one(struct ib_device *device) { struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; dev_list = ib_get_client_data(device, &ipoib_client); list_for_each_entry_safe(priv, tmp, dev_list, list) { if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND) continue; ipoib_stop(priv); ib_unregister_event_handler(&priv->event_handler); /* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */ flush_workqueue(ipoib_workqueue); ipoib_dev_cleanup(priv); ipoib_detach(priv); } kfree(dev_list); } static void ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct ifnet *dev; uint16_t pkey; int error; if (ifp->if_type != IFT_INFINIBAND) return; dev = VLAN_DEVAT(ifp, vtag); if (dev == NULL) return; priv = NULL; error = 0; parent = ifp->if_softc; /* We only support 15 bits of pkey. */ if (vtag & 0x8000) return; pkey = vtag | 0x8000; /* Set full membership bit. */ if (pkey == parent->pkey) return; /* Check for dups */ mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { priv = NULL; error = EBUSY; goto out; } } priv = ipoib_priv_alloc(); priv->dev = dev; priv->max_ib_mtu = parent->max_ib_mtu; priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); error = ipoib_set_dev_features(priv, parent->ca); if (error) goto out; priv->pkey = pkey; priv->broadcastaddr[8] = pkey >> 8; priv->broadcastaddr[9] = pkey & 0xff; dev->if_broadcastaddr = priv->broadcastaddr; error = ipoib_dev_init(priv, parent->ca, parent->port); if (error) goto out; priv->parent = parent->dev; list_add_tail(&priv->list, &parent->child_intfs); VLAN_SETCOOKIE(dev, priv); dev->if_start = ipoib_vlan_start; dev->if_drv_flags &= ~IFF_DRV_RUNNING; dev->if_hdrlen = IPOIB_HEADER_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_open(priv); mutex_unlock(&parent->vlan_mutex); return; out: mutex_unlock(&parent->vlan_mutex); if (priv) free(priv, M_TEMP); if (error) ipoib_warn(parent, "failed to initialize subinterface: device %s, port %d vtag 0x%X", parent->ca->name, parent->port, vtag); return; } static void ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct ifnet *dev; uint16_t pkey; if (ifp->if_type != IFT_INFINIBAND) return; dev = VLAN_DEVAT(ifp, vtag); if (dev) VLAN_SETCOOKIE(dev, NULL); pkey = vtag | 0x8000; parent = ifp->if_softc; mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { ipoib_dev_cleanup(priv); list_del(&priv->list); break; } } mutex_unlock(&parent->vlan_mutex); } eventhandler_tag ipoib_vlan_attach; eventhandler_tag ipoib_vlan_detach; static int __init ipoib_init_module(void) { int ret; ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE)); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST); ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST); /* * We create our own workqueue mainly because we want to be * able to flush it when devices are being removed. We can't * use schedule_work()/flush_scheduled_work() because both * unregister_netdev() and linkwatch_event take the rtnl lock, * so flush_scheduled_work() can deadlock during device * removal. */ ipoib_workqueue = create_singlethread_workqueue("ipoib"); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; } ib_sa_register_client(&ipoib_sa_client); ret = ib_register_client(&ipoib_client); if (ret) goto err_sa; return 0; err_sa: ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); err_fs: return ret; } static void __exit ipoib_cleanup_module(void) { EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach); EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); } /* * Infiniband output routine. */ static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_char edst[INFINIBAND_ALEN]; #if defined(INET) || defined(INET6) struct llentry *lle = NULL; #endif - struct rtentry *rt0 = NULL; struct ipoib_header *eh; int error = 0, is_gw = 0; short type; - if (ro != NULL) { - rt0 = ro->ro_rt; - if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; - } + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) goto bad; #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) { error = ENETDOWN; goto bad; } if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) { error = ENETDOWN; goto bad; } switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, lle->ll_addr, sizeof(edst)); else if (m->m_flags & M_MCAST) ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); else error = arpresolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_INFINIBAND); switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN); else bcopy(ar_tha(ah), edst, INFINIBAND_ALEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, lle->ll_addr, sizeof(edst)); else if (m->m_flags & M_MCAST) ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); else error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL); if (error) return error; type = htons(ETHERTYPE_IPV6); break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } eh = mtod(m, struct ipoib_header *); (void)memcpy(&eh->proto, &type, sizeof(eh->proto)); (void)memcpy(&eh->hwaddr, edst, sizeof (edst)); /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. */ return ((ifp->if_transmit)(ifp, m)); bad: if (m != NULL) m_freem(m); return (error); } /* * Upper layer processing for a received Infiniband packet. */ void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto) { int isr; #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { if_printf(ifp, "discard frame at IFF_MONITOR\n"); m_freem(m); return; } /* * Dispatch frame to upper layer. */ switch (proto) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } netisr_dispatch(isr, m); return; discard: m_freem(m); } /* * Process a received Infiniband packet. */ static void ipoib_input(struct ifnet *ifp, struct mbuf *m) { struct ipoib_header *eh; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } CURVNET_SET_QUIET(ifp->if_vnet); /* Let BPF have it before we strip the header. */ IPOIB_MTAP(ifp, m); eh = mtod(m, struct ipoib_header *); /* * Reset layer specific mbuf flags to avoid confusing upper layers. * Strip off Infiniband header. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); m_adj(m, IPOIB_HEADER_LEN); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { if (memcmp(eh->hwaddr, ifp->if_broadcastaddr, ifp->if_addrlen) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } ipoib_demux(ifp, m, ntohs(eh->proto)); CURVNET_RESTORE(); } static int ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!IPOIB_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; /* * An IP6 address of 0 means listen to all * of the multicast address used for IP6. * This has no meaning in ipoib. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) return EADDRNOTAVAIL; if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: return EAFNOSUPPORT; } } module_init(ipoib_init_module); module_exit(ipoib_cleanup_module); static int ipoib_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t ipoib_mod = { .name = "ipoib", .evhand = ipoib_evhand, }; DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY); MODULE_DEPEND(ipoib, ibcore, 1, 1, 1); MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1); Index: projects/clang380-import/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c =================================================================== --- projects/clang380-import/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c (revision 293686) +++ projects/clang380-import/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c (revision 293687) @@ -1,782 +1,782 @@ /* * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "sdp.h" SDP_MODPARAM_INT(rcvbuf_initial_size, 32 * 1024, "Receive buffer initial size in bytes."); SDP_MODPARAM_SINT(rcvbuf_scale, 0x8, "Receive buffer size scale factor."); /* Like tcp_fin - called when SDP_MID_DISCONNECT is received */ static void sdp_handle_disconn(struct sdp_sock *ssk) { sdp_dbg(ssk->socket, "%s\n", __func__); SDP_WLOCK_ASSERT(ssk); if (TCPS_HAVERCVDFIN(ssk->state) == 0) socantrcvmore(ssk->socket); switch (ssk->state) { case TCPS_SYN_RECEIVED: case TCPS_ESTABLISHED: ssk->state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: /* Received a reply FIN - start Infiniband tear down */ sdp_dbg(ssk->socket, "%s: Starting Infiniband tear down sending DREQ\n", __func__); sdp_cancel_dreq_wait_timeout(ssk); ssk->qp_active = 0; if (ssk->id) { struct rdma_cm_id *id; id = ssk->id; SDP_WUNLOCK(ssk); rdma_disconnect(id); SDP_WLOCK(ssk); } else { sdp_warn(ssk->socket, "%s: ssk->id is NULL\n", __func__); return; } break; case TCPS_TIME_WAIT: /* This is a mutual close situation and we've got the DREQ from the peer before the SDP_MID_DISCONNECT */ break; case TCPS_CLOSED: /* FIN arrived after IB teardown started - do nothing */ sdp_dbg(ssk->socket, "%s: fin in state %s\n", __func__, sdp_state_str(ssk->state)); return; default: sdp_warn(ssk->socket, "%s: FIN in unexpected state. state=%d\n", __func__, ssk->state); break; } } static int sdp_post_recv(struct sdp_sock *ssk) { struct sdp_buf *rx_req; int i, rc; u64 addr; struct ib_device *dev; struct ib_recv_wr rx_wr = { NULL }; struct ib_sge ibsge[SDP_MAX_RECV_SGES]; struct ib_sge *sge = ibsge; struct ib_recv_wr *bad_wr; struct mbuf *mb, *m; struct sdp_bsdh *h; int id = ring_head(ssk->rx_ring); /* Now, allocate and repost recv */ sdp_prf(ssk->socket, mb, "Posting mb"); mb = m_getm2(NULL, ssk->recv_bytes, M_NOWAIT, MT_DATA, M_PKTHDR); if (mb == NULL) { /* Retry so we can't stall out with no memory. */ if (!rx_ring_posted(ssk)) queue_work(rx_comp_wq, &ssk->rx_comp_work); return -1; } for (m = mb; m != NULL; m = m->m_next) { m->m_len = M_SIZE(m); mb->m_pkthdr.len += m->m_len; } h = mtod(mb, struct sdp_bsdh *); rx_req = ssk->rx_ring.buffer + (id & (SDP_RX_SIZE - 1)); rx_req->mb = mb; dev = ssk->ib_device; for (i = 0; mb != NULL; i++, mb = mb->m_next, sge++) { addr = ib_dma_map_single(dev, mb->m_data, mb->m_len, DMA_TO_DEVICE); /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); BUG_ON(i >= SDP_MAX_RECV_SGES); rx_req->mapping[i] = addr; sge->addr = addr; sge->length = mb->m_len; sge->lkey = ssk->sdp_dev->mr->lkey; } rx_wr.next = NULL; rx_wr.wr_id = id | SDP_OP_RECV; rx_wr.sg_list = ibsge; rx_wr.num_sge = i; rc = ib_post_recv(ssk->qp, &rx_wr, &bad_wr); if (unlikely(rc)) { sdp_warn(ssk->socket, "ib_post_recv failed. status %d\n", rc); sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE); m_freem(mb); sdp_notify(ssk, ECONNRESET); return -1; } atomic_inc(&ssk->rx_ring.head); SDPSTATS_COUNTER_INC(post_recv); return 0; } static inline int sdp_post_recvs_needed(struct sdp_sock *ssk) { unsigned long bytes_in_process; unsigned long max_bytes; int buffer_size; int posted; if (!ssk->qp_active || !ssk->socket) return 0; posted = rx_ring_posted(ssk); if (posted >= SDP_RX_SIZE) return 0; if (posted < SDP_MIN_TX_CREDITS) return 1; buffer_size = ssk->recv_bytes; max_bytes = max(ssk->socket->so_snd.sb_hiwat, (1 + SDP_MIN_TX_CREDITS) * buffer_size); max_bytes *= rcvbuf_scale; /* * Compute bytes in the receive queue and socket buffer. */ bytes_in_process = (posted - SDP_MIN_TX_CREDITS) * buffer_size; bytes_in_process += sbused(&ssk->socket->so_rcv); return bytes_in_process < max_bytes; } static inline void sdp_post_recvs(struct sdp_sock *ssk) { while (sdp_post_recvs_needed(ssk)) if (sdp_post_recv(ssk)) return; } static inline struct mbuf * sdp_sock_queue_rcv_mb(struct socket *sk, struct mbuf *mb) { struct sdp_sock *ssk = sdp_sk(sk); struct sdp_bsdh *h; h = mtod(mb, struct sdp_bsdh *); #ifdef SDP_ZCOPY SDP_SKB_CB(mb)->seq = rcv_nxt(ssk); if (h->mid == SDP_MID_SRCAVAIL) { struct sdp_srcah *srcah = (struct sdp_srcah *)(h+1); struct rx_srcavail_state *rx_sa; ssk->srcavail_cancel_mseq = 0; ssk->rx_sa = rx_sa = RX_SRCAVAIL_STATE(mb) = kzalloc( sizeof(struct rx_srcavail_state), M_NOWAIT); rx_sa->mseq = ntohl(h->mseq); rx_sa->used = 0; rx_sa->len = mb_len = ntohl(srcah->len); rx_sa->rkey = ntohl(srcah->rkey); rx_sa->vaddr = be64_to_cpu(srcah->vaddr); rx_sa->flags = 0; if (ssk->tx_sa) { sdp_dbg_data(ssk->socket, "got RX SrcAvail while waiting " "for TX SrcAvail. waking up TX SrcAvail" "to be aborted\n"); wake_up(sk->sk_sleep); } atomic_add(mb->len, &ssk->rcv_nxt); sdp_dbg_data(sk, "queueing SrcAvail. mb_len = %d vaddr = %lld\n", mb_len, rx_sa->vaddr); } else #endif { atomic_add(mb->m_pkthdr.len, &ssk->rcv_nxt); } m_adj(mb, SDP_HEAD_SIZE); SOCKBUF_LOCK(&sk->so_rcv); if (unlikely(h->flags & SDP_OOB_PRES)) sdp_urg(ssk, mb); - sbappend_locked(&sk->so_rcv, mb); + sbappend_locked(&sk->so_rcv, mb, 0); sorwakeup_locked(sk); return mb; } static int sdp_get_recv_bytes(struct sdp_sock *ssk, u32 new_size) { return MIN(new_size, SDP_MAX_PACKET); } int sdp_init_buffers(struct sdp_sock *ssk, u32 new_size) { ssk->recv_bytes = sdp_get_recv_bytes(ssk, new_size); sdp_post_recvs(ssk); return 0; } int sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size) { u32 curr_size = ssk->recv_bytes; u32 max_size = SDP_MAX_PACKET; if (new_size > curr_size && new_size <= max_size) { ssk->recv_bytes = sdp_get_recv_bytes(ssk, new_size); return 0; } return -1; } static void sdp_handle_resize_request(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf) { if (sdp_resize_buffers(ssk, ntohl(buf->size)) == 0) ssk->recv_request_head = ring_head(ssk->rx_ring) + 1; else ssk->recv_request_head = ring_tail(ssk->rx_ring); ssk->recv_request = 1; } static void sdp_handle_resize_ack(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf) { u32 new_size = ntohl(buf->size); if (new_size > ssk->xmit_size_goal) ssk->xmit_size_goal = new_size; } static struct mbuf * sdp_recv_completion(struct sdp_sock *ssk, int id) { struct sdp_buf *rx_req; struct ib_device *dev; struct mbuf *mb; if (unlikely(id != ring_tail(ssk->rx_ring))) { printk(KERN_WARNING "Bogus recv completion id %d tail %d\n", id, ring_tail(ssk->rx_ring)); return NULL; } dev = ssk->ib_device; rx_req = &ssk->rx_ring.buffer[id & (SDP_RX_SIZE - 1)]; mb = rx_req->mb; sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE); atomic_inc(&ssk->rx_ring.tail); atomic_dec(&ssk->remote_credits); return mb; } /* socket lock should be taken before calling this */ static int sdp_process_rx_ctl_mb(struct sdp_sock *ssk, struct mbuf *mb) { struct sdp_bsdh *h; struct socket *sk; SDP_WLOCK_ASSERT(ssk); sk = ssk->socket; h = mtod(mb, struct sdp_bsdh *); switch (h->mid) { case SDP_MID_DATA: case SDP_MID_SRCAVAIL: sdp_dbg(sk, "DATA after socket rcv was shutdown\n"); /* got data in RCV_SHUTDOWN */ if (ssk->state == TCPS_FIN_WAIT_1) { sdp_dbg(sk, "RX data when state = FIN_WAIT1\n"); sdp_notify(ssk, ECONNRESET); } m_freem(mb); break; #ifdef SDP_ZCOPY case SDP_MID_RDMARDCOMPL: m_freem(mb); break; case SDP_MID_SENDSM: sdp_handle_sendsm(ssk, ntohl(h->mseq_ack)); m_freem(mb); break; case SDP_MID_SRCAVAIL_CANCEL: sdp_dbg_data(sk, "Handling SrcAvailCancel\n"); sdp_prf(sk, NULL, "Handling SrcAvailCancel"); if (ssk->rx_sa) { ssk->srcavail_cancel_mseq = ntohl(h->mseq); ssk->rx_sa->flags |= RX_SA_ABORTED; ssk->rx_sa = NULL; /* TODO: change it into SDP_MID_DATA and get the dirty logic from recvmsg */ } else { sdp_dbg(sk, "Got SrcAvailCancel - " "but no SrcAvail in process\n"); } m_freem(mb); break; case SDP_MID_SINKAVAIL: sdp_dbg_data(sk, "Got SinkAvail - not supported: ignored\n"); sdp_prf(sk, NULL, "Got SinkAvail - not supported: ignored"); /* FALLTHROUGH */ #endif case SDP_MID_ABORT: sdp_dbg_data(sk, "Handling ABORT\n"); sdp_prf(sk, NULL, "Handling ABORT"); sdp_notify(ssk, ECONNRESET); m_freem(mb); break; case SDP_MID_DISCONN: sdp_dbg_data(sk, "Handling DISCONN\n"); sdp_prf(sk, NULL, "Handling DISCONN"); sdp_handle_disconn(ssk); break; case SDP_MID_CHRCVBUF: sdp_dbg_data(sk, "Handling RX CHRCVBUF\n"); sdp_handle_resize_request(ssk, (struct sdp_chrecvbuf *)(h+1)); m_freem(mb); break; case SDP_MID_CHRCVBUF_ACK: sdp_dbg_data(sk, "Handling RX CHRCVBUF_ACK\n"); sdp_handle_resize_ack(ssk, (struct sdp_chrecvbuf *)(h+1)); m_freem(mb); break; default: /* TODO: Handle other messages */ sdp_warn(sk, "SDP: FIXME MID %d\n", h->mid); m_freem(mb); } return 0; } static int sdp_process_rx_mb(struct sdp_sock *ssk, struct mbuf *mb) { struct socket *sk; struct sdp_bsdh *h; unsigned long mseq_ack; int credits_before; h = mtod(mb, struct sdp_bsdh *); sk = ssk->socket; /* * If another thread is in so_pcbfree this may be partially torn * down but no further synchronization is required as the destroying * thread will wait for receive to shutdown before discarding the * socket. */ if (sk == NULL) { m_freem(mb); return 0; } SDPSTATS_HIST_LINEAR(credits_before_update, tx_credits(ssk)); mseq_ack = ntohl(h->mseq_ack); credits_before = tx_credits(ssk); atomic_set(&ssk->tx_ring.credits, mseq_ack - ring_head(ssk->tx_ring) + 1 + ntohs(h->bufs)); if (mseq_ack >= ssk->nagle_last_unacked) ssk->nagle_last_unacked = 0; sdp_prf1(ssk->socket, mb, "RX %s +%d c:%d->%d mseq:%d ack:%d\n", mid2str(h->mid), ntohs(h->bufs), credits_before, tx_credits(ssk), ntohl(h->mseq), ntohl(h->mseq_ack)); if (unlikely(h->mid == SDP_MID_DATA && mb->m_pkthdr.len == SDP_HEAD_SIZE)) { /* Credit update is valid even after RCV_SHUTDOWN */ m_freem(mb); return 0; } if ((h->mid != SDP_MID_DATA && h->mid != SDP_MID_SRCAVAIL) || TCPS_HAVERCVDFIN(ssk->state)) { sdp_prf(sk, NULL, "Control mb - queing to control queue"); #ifdef SDP_ZCOPY if (h->mid == SDP_MID_SRCAVAIL_CANCEL) { sdp_dbg_data(sk, "Got SrcAvailCancel. " "seq: 0x%d seq_ack: 0x%d\n", ntohl(h->mseq), ntohl(h->mseq_ack)); ssk->srcavail_cancel_mseq = ntohl(h->mseq); } if (h->mid == SDP_MID_RDMARDCOMPL) { struct sdp_rrch *rrch = (struct sdp_rrch *)(h+1); sdp_dbg_data(sk, "RdmaRdCompl message arrived\n"); sdp_handle_rdma_read_compl(ssk, ntohl(h->mseq_ack), ntohl(rrch->len)); } #endif mb->m_nextpkt = NULL; if (ssk->rx_ctl_tail) ssk->rx_ctl_tail->m_nextpkt = mb; else ssk->rx_ctl_q = mb; ssk->rx_ctl_tail = mb; return 0; } sdp_prf1(sk, NULL, "queueing %s mb\n", mid2str(h->mid)); mb = sdp_sock_queue_rcv_mb(sk, mb); return 0; } /* called only from irq */ static struct mbuf * sdp_process_rx_wc(struct sdp_sock *ssk, struct ib_wc *wc) { struct mbuf *mb; struct sdp_bsdh *h; struct socket *sk = ssk->socket; int mseq; mb = sdp_recv_completion(ssk, wc->wr_id); if (unlikely(!mb)) return NULL; if (unlikely(wc->status)) { if (ssk->qp_active && sk) { sdp_dbg(sk, "Recv completion with error. " "Status %d, vendor: %d\n", wc->status, wc->vendor_err); sdp_abort(sk); ssk->qp_active = 0; } m_freem(mb); return NULL; } sdp_dbg_data(sk, "Recv completion. ID %d Length %d\n", (int)wc->wr_id, wc->byte_len); if (unlikely(wc->byte_len < sizeof(struct sdp_bsdh))) { sdp_warn(sk, "SDP BUG! byte_len %d < %zd\n", wc->byte_len, sizeof(struct sdp_bsdh)); m_freem(mb); return NULL; } /* Use m_adj to trim the tail of data we didn't use. */ m_adj(mb, -(mb->m_pkthdr.len - wc->byte_len)); h = mtod(mb, struct sdp_bsdh *); SDP_DUMP_PACKET(ssk->socket, "RX", mb, h); ssk->rx_packets++; ssk->rx_bytes += mb->m_pkthdr.len; mseq = ntohl(h->mseq); atomic_set(&ssk->mseq_ack, mseq); if (mseq != (int)wc->wr_id) sdp_warn(sk, "SDP BUG! mseq %d != wrid %d\n", mseq, (int)wc->wr_id); return mb; } /* Wakeup writers if we now have credits. */ static void sdp_bzcopy_write_space(struct sdp_sock *ssk) { struct socket *sk = ssk->socket; if (tx_credits(ssk) >= ssk->min_bufs && sk) sowwakeup(sk); } /* only from interrupt. */ static int sdp_poll_rx_cq(struct sdp_sock *ssk) { struct ib_cq *cq = ssk->rx_ring.cq; struct ib_wc ibwc[SDP_NUM_WC]; int n, i; int wc_processed = 0; struct mbuf *mb; do { n = ib_poll_cq(cq, SDP_NUM_WC, ibwc); for (i = 0; i < n; ++i) { struct ib_wc *wc = &ibwc[i]; BUG_ON(!(wc->wr_id & SDP_OP_RECV)); mb = sdp_process_rx_wc(ssk, wc); if (!mb) continue; sdp_process_rx_mb(ssk, mb); wc_processed++; } } while (n == SDP_NUM_WC); if (wc_processed) sdp_bzcopy_write_space(ssk); return wc_processed; } static void sdp_rx_comp_work(struct work_struct *work) { struct sdp_sock *ssk = container_of(work, struct sdp_sock, rx_comp_work); sdp_prf(ssk->socket, NULL, "%s", __func__); SDP_WLOCK(ssk); if (unlikely(!ssk->qp)) { sdp_prf(ssk->socket, NULL, "qp was destroyed"); goto out; } if (unlikely(!ssk->rx_ring.cq)) { sdp_prf(ssk->socket, NULL, "rx_ring.cq is NULL"); goto out; } if (unlikely(!ssk->poll_cq)) { struct rdma_cm_id *id = ssk->id; if (id && id->qp) rdma_notify(id, IB_EVENT_COMM_EST); goto out; } sdp_do_posts(ssk); out: SDP_WUNLOCK(ssk); } void sdp_do_posts(struct sdp_sock *ssk) { struct socket *sk = ssk->socket; int xmit_poll_force; struct mbuf *mb; SDP_WLOCK_ASSERT(ssk); if (!ssk->qp_active) { sdp_dbg(sk, "QP is deactivated\n"); return; } while ((mb = ssk->rx_ctl_q)) { ssk->rx_ctl_q = mb->m_nextpkt; mb->m_nextpkt = NULL; sdp_process_rx_ctl_mb(ssk, mb); } if (ssk->state == TCPS_TIME_WAIT) return; if (!ssk->rx_ring.cq || !ssk->tx_ring.cq) return; sdp_post_recvs(ssk); if (tx_ring_posted(ssk)) sdp_xmit_poll(ssk, 1); sdp_post_sends(ssk, M_NOWAIT); xmit_poll_force = tx_credits(ssk) < SDP_MIN_TX_CREDITS; if (credit_update_needed(ssk) || xmit_poll_force) { /* if has pending tx because run out of tx_credits - xmit it */ sdp_prf(sk, NULL, "Processing to free pending sends"); sdp_xmit_poll(ssk, xmit_poll_force); sdp_prf(sk, NULL, "Sending credit update"); sdp_post_sends(ssk, M_NOWAIT); } } int sdp_process_rx(struct sdp_sock *ssk) { int wc_processed = 0; int credits_before; if (!rx_ring_trylock(&ssk->rx_ring)) { sdp_dbg(ssk->socket, "ring destroyed. not polling it\n"); return 0; } credits_before = tx_credits(ssk); wc_processed = sdp_poll_rx_cq(ssk); sdp_prf(ssk->socket, NULL, "processed %d", wc_processed); if (wc_processed) { sdp_prf(ssk->socket, NULL, "credits: %d -> %d", credits_before, tx_credits(ssk)); queue_work(rx_comp_wq, &ssk->rx_comp_work); } sdp_arm_rx_cq(ssk); rx_ring_unlock(&ssk->rx_ring); return (wc_processed); } static void sdp_rx_irq(struct ib_cq *cq, void *cq_context) { struct socket *sk = cq_context; struct sdp_sock *ssk = sdp_sk(sk); if (cq != ssk->rx_ring.cq) { sdp_dbg(sk, "cq = %p, ssk->cq = %p\n", cq, ssk->rx_ring.cq); return; } SDPSTATS_COUNTER_INC(rx_int_count); sdp_prf(sk, NULL, "rx irq"); sdp_process_rx(ssk); } static void sdp_rx_ring_purge(struct sdp_sock *ssk) { while (rx_ring_posted(ssk) > 0) { struct mbuf *mb; mb = sdp_recv_completion(ssk, ring_tail(ssk->rx_ring)); if (!mb) break; m_freem(mb); } } void sdp_rx_ring_init(struct sdp_sock *ssk) { ssk->rx_ring.buffer = NULL; ssk->rx_ring.destroyed = 0; rw_init(&ssk->rx_ring.destroyed_lock, "sdp rx lock"); } static void sdp_rx_cq_event_handler(struct ib_event *event, void *data) { } int sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device) { struct ib_cq *rx_cq; int rc = 0; sdp_dbg(ssk->socket, "rx ring created"); INIT_WORK(&ssk->rx_comp_work, sdp_rx_comp_work); atomic_set(&ssk->rx_ring.head, 1); atomic_set(&ssk->rx_ring.tail, 1); ssk->rx_ring.buffer = kmalloc( sizeof *ssk->rx_ring.buffer * SDP_RX_SIZE, GFP_KERNEL); if (!ssk->rx_ring.buffer) { sdp_warn(ssk->socket, "Unable to allocate RX Ring size %zd.\n", sizeof(*ssk->rx_ring.buffer) * SDP_RX_SIZE); return -ENOMEM; } rx_cq = ib_create_cq(device, sdp_rx_irq, sdp_rx_cq_event_handler, ssk->socket, SDP_RX_SIZE, 0); if (IS_ERR(rx_cq)) { rc = PTR_ERR(rx_cq); sdp_warn(ssk->socket, "Unable to allocate RX CQ: %d.\n", rc); goto err_cq; } sdp_sk(ssk->socket)->rx_ring.cq = rx_cq; sdp_arm_rx_cq(ssk); return 0; err_cq: kfree(ssk->rx_ring.buffer); ssk->rx_ring.buffer = NULL; return rc; } void sdp_rx_ring_destroy(struct sdp_sock *ssk) { cancel_work_sync(&ssk->rx_comp_work); rx_ring_destroy_lock(&ssk->rx_ring); if (ssk->rx_ring.buffer) { sdp_rx_ring_purge(ssk); kfree(ssk->rx_ring.buffer); ssk->rx_ring.buffer = NULL; } if (ssk->rx_ring.cq) { if (ib_destroy_cq(ssk->rx_ring.cq)) { sdp_warn(ssk->socket, "destroy cq(%p) failed\n", ssk->rx_ring.cq); } else { ssk->rx_ring.cq = NULL; } } WARN_ON(ring_head(ssk->rx_ring) != ring_tail(ssk->rx_ring)); } Index: projects/clang380-import/sys/powerpc/aim/locore32.S =================================================================== --- projects/clang380-import/sys/powerpc/aim/locore32.S (revision 293686) +++ projects/clang380-import/sys/powerpc/aim/locore32.S (revision 293687) @@ -1,176 +1,115 @@ /* $FreeBSD$ */ -/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- - * Copyright (C) 2001 Benno Rice + * Copyright (C) 2010-2016 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/*- - * Copyright (C) 1995, 1996 Wolfgang Solfrank. - * Copyright (C) 1995, 1996 TooLs GmbH. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by TooLs GmbH. - * 4. The name of TooLs GmbH may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ #include "assym.s" #include #include #include #include #include #include "opt_platform.h" /* Locate the per-CPU data structure */ #define GET_CPUINFO(r) \ mfsprg0 r /* * Compiled KERNBASE location and the kernel load address */ .globl kernbase .set kernbase, KERNBASE /* * Globals */ .data .align 3 GLOBAL(__startkernel) .long begin GLOBAL(__endkernel) .long end .align 4 #define TMPSTKSZ 8192 /* 8K temporary stack */ GLOBAL(tmpstk) .space TMPSTKSZ .text .globl btext btext: /* - * This symbol is here for the benefit of kvm_mkdb, and is supposed to - * mark the start of kernel text. + * Main kernel entry point. */ - .globl kernel_text -kernel_text: - -/* - * Startup entry. Note, this must be the first thing in the text - * segment! - */ .text .globl __start __start: /* Figure out where we are */ bl 1f .long _DYNAMIC-. .long _GLOBAL_OFFSET_TABLE_-. .long tmpstk-. 1: mflr %r30 /* Set up temporary stack pointer */ lwz %r1,8(%r30) add %r1,%r1,%r30 addi %r1,%r1,(8+TMPSTKSZ-32) /* Relocate self */ stw %r3,16(%r1) stw %r4,20(%r1) stw %r5,24(%r1) stw %r6,28(%r1) lwz %r3,0(%r30) /* _DYNAMIC in %r3 */ add %r3,%r3,%r30 lwz %r4,4(%r30) /* GOT pointer */ add %r4,%r4,%r30 lwz %r4,4(%r4) /* got[0] is _DYNAMIC link addr */ subf %r4,%r4,%r3 /* subtract to calculate relocbase */ bl elf_reloc_self lwz %r3,16(%r1) lwz %r4,20(%r1) lwz %r5,24(%r1) lwz %r6,28(%r1) /* MD setup */ bl powerpc_init /* Set stack pointer to new value and branch to mi_startup */ mr %r1, %r3 li %r3, 0 stw %r3, 0(%r1) bl mi_startup - /* If mi_startup somehow returns, exit. This would be bad. */ - b OF_exit - -/* - * int setfault() - * - * Similar to setjmp to setup for handling faults on accesses to user memory. - * Any routine using this may only call bcopy, either the form below, - * or the (currently used) C code optimized, so it doesn't use any non-volatile - * registers. - */ - .globl setfault -setfault: - mflr 0 - mfcr 12 - mfsprg 4,0 - lwz 4,TD_PCB(2) /* curthread = r2 */ - stw 3,PCB_ONFAULT(4) - stw 0,0(3) - stw 1,4(3) - stw 2,8(3) - stmw 12,12(3) - xor 3,3,3 - blr + /* mi_startup() does not return */ + b . #include Index: projects/clang380-import/sys/powerpc/aim/locore64.S =================================================================== --- projects/clang380-import/sys/powerpc/aim/locore64.S (revision 293686) +++ projects/clang380-import/sys/powerpc/aim/locore64.S (revision 293687) @@ -1,218 +1,131 @@ /* $FreeBSD$ */ -/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- - * Copyright (C) 2001 Benno Rice + * Copyright (C) 2010-2016 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/*- - * Copyright (C) 1995, 1996 Wolfgang Solfrank. - * Copyright (C) 1995, 1996 TooLs GmbH. - * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by TooLs GmbH. - * 4. The name of TooLs GmbH may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * $FreeBSD$ */ #include "assym.s" #include #include #include #include #include #ifdef _CALL_ELF .abiversion _CALL_ELF #endif -/* Locate the per-CPU data structure */ -#define GET_CPUINFO(r) \ - mfsprg0 r -#define GET_TOCBASE(r) \ - li r,TRAP_TOCBASE; /* Magic address for TOC */ \ - ld r,0(r) - /* Glue for linker script */ .globl kernbase .set kernbase, KERNBASE /* * Globals */ .data .align 3 GLOBAL(__startkernel) .llong begin GLOBAL(__endkernel) .llong end .align 4 #define TMPSTKSZ 16384 /* 16K temporary stack */ GLOBAL(tmpstk) .space TMPSTKSZ TOC_ENTRY(tmpstk) .text .globl btext btext: /* - * This symbol is here for the benefit of kvm_mkdb, and is supposed to - * mark the start of kernel text. - */ - .globl kernel_text -kernel_text: - -/* - * Startup entry. Note, this must be the first thing in the text - * segment! + * Main kernel entry point. * * Calling convention: * r3: Flattened Device Tree pointer (or zero) * r4: ignored * r5: OF client interface pointer (or zero) * r6: Loader metadata pointer (or zero) */ .text ASENTRY_NOPROF(__start) /* Set up the TOC pointer */ b 0f .align 3 0: nop bl 1f .llong __tocbase + 0x8000 - . 1: mflr %r2 ld %r1,0(%r2) add %r2,%r1,%r2 /* Get load offset */ ld %r31,-0x8000(%r2) /* First TOC entry is TOC base */ subf %r31,%r31,%r2 /* Subtract from real TOC base to get base */ /* Set up the stack pointer */ ld %r1,TOC_REF(tmpstk)(%r2) addi %r1,%r1,TMPSTKSZ-96 add %r1,%r1,%r31 /* Relocate kernel */ std %r3,48(%r1) std %r4,56(%r1) std %r5,64(%r1) std %r6,72(%r1) bl 1f .llong _DYNAMIC-. 1: mflr %r3 ld %r4,0(%r3) add %r3,%r4,%r3 mr %r4,%r31 bl elf_reloc_self nop ld %r3,48(%r1) ld %r4,56(%r1) ld %r5,64(%r1) ld %r6,72(%r1) /* Begin CPU init */ mr %r4,%r2 /* Replace ignored r4 with tocbase for trap handlers */ bl powerpc_init nop /* Set stack pointer to new value and branch to mi_startup */ mr %r1, %r3 li %r3, 0 std %r3, 0(%r1) bl mi_startup nop - /* If this returns (it won't), go back to firmware */ - b OF_exit - nop - -/* - * int setfault() - * - * Similar to setjmp to setup for handling faults on accesses to user memory. - * Any routine using this may only call bcopy, either the form below, - * or the (currently used) C code optimized, so it doesn't use any non-volatile - * registers. - */ -ASENTRY_NOPROF(setfault) - mflr 0 - mfcr 12 - mfsprg 4,0 - ld 4,TD_PCB(13) /* curthread = r13 */ - std 3,PCB_ONFAULT(4) - std 0,0(3) - std 1,8(3) - std 2,16(3) - - std %r12,24(%r3) /* Save the non-volatile GP regs. */ - std %r13,24+1*8(%r3) - std %r14,24+2*8(%r3) - std %r15,24+3*8(%r3) - std %r16,24+4*8(%r3) - std %r17,24+5*8(%r3) - std %r18,24+6*8(%r3) - std %r19,24+7*8(%r3) - std %r20,24+8*8(%r3) - std %r21,24+9*8(%r3) - std %r22,24+10*8(%r3) - std %r23,24+11*8(%r3) - std %r24,24+12*8(%r3) - std %r25,24+13*8(%r3) - std %r26,24+14*8(%r3) - std %r27,24+15*8(%r3) - std %r28,24+16*8(%r3) - std %r29,24+17*8(%r3) - std %r30,24+18*8(%r3) - std %r31,24+19*8(%r3) - - xor 3,3,3 - blr + /* Unreachable */ + b . #include Index: projects/clang380-import/sys/powerpc/aim/trap_subr64.S =================================================================== --- projects/clang380-import/sys/powerpc/aim/trap_subr64.S (revision 293686) +++ projects/clang380-import/sys/powerpc/aim/trap_subr64.S (revision 293687) @@ -1,866 +1,869 @@ /* $FreeBSD$ */ /* $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * NOTICE: This is not a standalone file. to use it, #include it in * your port's locore.S, like so: * * #include */ -/* - * Save/restore segment registers - */ +/* Locate the per-CPU data structure */ +#define GET_CPUINFO(r) \ + mfsprg0 r +#define GET_TOCBASE(r) \ + li r,TRAP_TOCBASE; /* Magic address for TOC */ \ + ld r,0(r) /* * Restore SRs for a pmap * * Requires that r28-r31 be scratch, with r28 initialized to the SLB cache */ /* * User SRs are loaded through a pointer to the current pmap. */ restore_usersrs: GET_CPUINFO(%r28) ld %r28,PC_USERSLB(%r28) li %r29, 0 /* Set the counter to zero */ slbia slbmfee %r31,%r29 clrrdi %r31,%r31,28 slbie %r31 1: ld %r31, 0(%r28) /* Load SLB entry pointer */ cmpdi %r31, 0 /* If NULL, stop */ beqlr ld %r30, 0(%r31) /* Load SLBV */ ld %r31, 8(%r31) /* Load SLBE */ or %r31, %r31, %r29 /* Set SLBE slot */ slbmte %r30, %r31 /* Install SLB entry */ addi %r28, %r28, 8 /* Advance pointer */ addi %r29, %r29, 1 b 1b /* Repeat */ /* * Kernel SRs are loaded directly from the PCPU fields */ restore_kernsrs: GET_CPUINFO(%r28) addi %r28,%r28,PC_KERNSLB li %r29, 0 /* Set the counter to zero */ slbia slbmfee %r31,%r29 clrrdi %r31,%r31,28 slbie %r31 1: cmpdi %r29, USER_SLB_SLOT /* Skip the user slot */ beq- 2f ld %r31, 8(%r28) /* Load SLBE */ cmpdi %r31, 0 /* If SLBE is not valid, stop */ beqlr ld %r30, 0(%r28) /* Load SLBV */ slbmte %r30, %r31 /* Install SLB entry */ 2: addi %r28, %r28, 16 /* Advance pointer */ addi %r29, %r29, 1 cmpdi %r29, 64 /* Repeat if we are not at the end */ blt 1b blr /* * FRAME_SETUP assumes: * SPRG1 SP (1) * SPRG3 trap type * savearea r27-r31,DAR,DSISR (DAR & DSISR only for DSI traps) * r28 LR * r29 CR * r30 scratch * r31 scratch * r1 kernel stack * SRR0/1 as at start of trap * * NOTE: SPRG1 is never used while the MMU is on, making it safe to reuse * in any real-mode fault handler, including those handling double faults. */ #define FRAME_SETUP(savearea) \ /* Have to enable translation to allow access of kernel stack: */ \ GET_CPUINFO(%r31); \ mfsrr0 %r30; \ std %r30,(savearea+CPUSAVE_SRR0)(%r31); /* save SRR0 */ \ mfsrr1 %r30; \ std %r30,(savearea+CPUSAVE_SRR1)(%r31); /* save SRR1 */ \ mfsprg1 %r31; /* get saved SP (clears SPRG1) */ \ mfmsr %r30; \ ori %r30,%r30,(PSL_DR|PSL_IR|PSL_RI)@l; /* relocation on */ \ mtmsr %r30; /* stack can now be accessed */ \ isync; \ stdu %r31,-(FRAMELEN+288)(%r1); /* save it in the callframe */ \ std %r0, FRAME_0+48(%r1); /* save r0 in the trapframe */ \ std %r31,FRAME_1+48(%r1); /* save SP " " */ \ std %r2, FRAME_2+48(%r1); /* save r2 " " */ \ std %r28,FRAME_LR+48(%r1); /* save LR " " */ \ std %r29,FRAME_CR+48(%r1); /* save CR " " */ \ GET_CPUINFO(%r2); \ ld %r27,(savearea+CPUSAVE_R27)(%r2); /* get saved r27 */ \ ld %r28,(savearea+CPUSAVE_R28)(%r2); /* get saved r28 */ \ ld %r29,(savearea+CPUSAVE_R29)(%r2); /* get saved r29 */ \ ld %r30,(savearea+CPUSAVE_R30)(%r2); /* get saved r30 */ \ ld %r31,(savearea+CPUSAVE_R31)(%r2); /* get saved r31 */ \ std %r3, FRAME_3+48(%r1); /* save r3-r31 */ \ std %r4, FRAME_4+48(%r1); \ std %r5, FRAME_5+48(%r1); \ std %r6, FRAME_6+48(%r1); \ std %r7, FRAME_7+48(%r1); \ std %r8, FRAME_8+48(%r1); \ std %r9, FRAME_9+48(%r1); \ std %r10, FRAME_10+48(%r1); \ std %r11, FRAME_11+48(%r1); \ std %r12, FRAME_12+48(%r1); \ std %r13, FRAME_13+48(%r1); \ std %r14, FRAME_14+48(%r1); \ std %r15, FRAME_15+48(%r1); \ std %r16, FRAME_16+48(%r1); \ std %r17, FRAME_17+48(%r1); \ std %r18, FRAME_18+48(%r1); \ std %r19, FRAME_19+48(%r1); \ std %r20, FRAME_20+48(%r1); \ std %r21, FRAME_21+48(%r1); \ std %r22, FRAME_22+48(%r1); \ std %r23, FRAME_23+48(%r1); \ std %r24, FRAME_24+48(%r1); \ std %r25, FRAME_25+48(%r1); \ std %r26, FRAME_26+48(%r1); \ std %r27, FRAME_27+48(%r1); \ std %r28, FRAME_28+48(%r1); \ std %r29, FRAME_29+48(%r1); \ std %r30, FRAME_30+48(%r1); \ std %r31, FRAME_31+48(%r1); \ ld %r28,(savearea+CPUSAVE_AIM_DAR)(%r2); /* saved DAR */ \ ld %r29,(savearea+CPUSAVE_AIM_DSISR)(%r2);/* saved DSISR */\ ld %r30,(savearea+CPUSAVE_SRR0)(%r2); /* saved SRR0 */ \ ld %r31,(savearea+CPUSAVE_SRR1)(%r2); /* saved SRR1 */ \ mfxer %r3; \ mfctr %r4; \ mfsprg3 %r5; \ std %r3, FRAME_XER+48(1); /* save xer/ctr/exc */ \ std %r4, FRAME_CTR+48(1); \ std %r5, FRAME_EXC+48(1); \ std %r28,FRAME_AIM_DAR+48(1); \ std %r29,FRAME_AIM_DSISR+48(1); /* save dsisr/srr0/srr1 */ \ std %r30,FRAME_SRR0+48(1); \ std %r31,FRAME_SRR1+48(1); \ ld %r13,PC_CURTHREAD(%r2) /* set kernel curthread */ #define FRAME_LEAVE(savearea) \ /* Disable exceptions: */ \ mfmsr %r2; \ andi. %r2,%r2,~PSL_EE@l; \ mtmsr %r2; \ isync; \ /* Now restore regs: */ \ ld %r2,FRAME_SRR0+48(%r1); \ ld %r3,FRAME_SRR1+48(%r1); \ ld %r4,FRAME_CTR+48(%r1); \ ld %r5,FRAME_XER+48(%r1); \ ld %r6,FRAME_LR+48(%r1); \ GET_CPUINFO(%r7); \ std %r2,(savearea+CPUSAVE_SRR0)(%r7); /* save SRR0 */ \ std %r3,(savearea+CPUSAVE_SRR1)(%r7); /* save SRR1 */ \ ld %r7,FRAME_CR+48(%r1); \ mtctr %r4; \ mtxer %r5; \ mtlr %r6; \ mtsprg2 %r7; /* save cr */ \ ld %r31,FRAME_31+48(%r1); /* restore r0-31 */ \ ld %r30,FRAME_30+48(%r1); \ ld %r29,FRAME_29+48(%r1); \ ld %r28,FRAME_28+48(%r1); \ ld %r27,FRAME_27+48(%r1); \ ld %r26,FRAME_26+48(%r1); \ ld %r25,FRAME_25+48(%r1); \ ld %r24,FRAME_24+48(%r1); \ ld %r23,FRAME_23+48(%r1); \ ld %r22,FRAME_22+48(%r1); \ ld %r21,FRAME_21+48(%r1); \ ld %r20,FRAME_20+48(%r1); \ ld %r19,FRAME_19+48(%r1); \ ld %r18,FRAME_18+48(%r1); \ ld %r17,FRAME_17+48(%r1); \ ld %r16,FRAME_16+48(%r1); \ ld %r15,FRAME_15+48(%r1); \ ld %r14,FRAME_14+48(%r1); \ ld %r13,FRAME_13+48(%r1); \ ld %r12,FRAME_12+48(%r1); \ ld %r11,FRAME_11+48(%r1); \ ld %r10,FRAME_10+48(%r1); \ ld %r9, FRAME_9+48(%r1); \ ld %r8, FRAME_8+48(%r1); \ ld %r7, FRAME_7+48(%r1); \ ld %r6, FRAME_6+48(%r1); \ ld %r5, FRAME_5+48(%r1); \ ld %r4, FRAME_4+48(%r1); \ ld %r3, FRAME_3+48(%r1); \ ld %r2, FRAME_2+48(%r1); \ ld %r0, FRAME_0+48(%r1); \ ld %r1, FRAME_1+48(%r1); \ /* Can't touch %r1 from here on */ \ mtsprg3 %r3; /* save r3 */ \ /* Disable translation, machine check and recoverability: */ \ mfmsr %r3; \ andi. %r3,%r3,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l; \ mtmsr %r3; \ isync; \ /* Decide whether we return to user mode: */ \ GET_CPUINFO(%r3); \ ld %r3,(savearea+CPUSAVE_SRR1)(%r3); \ mtcr %r3; \ bf 17,1f; /* branch if PSL_PR is false */ \ /* Restore user SRs */ \ GET_CPUINFO(%r3); \ std %r27,(savearea+CPUSAVE_R27)(%r3); \ std %r28,(savearea+CPUSAVE_R28)(%r3); \ std %r29,(savearea+CPUSAVE_R29)(%r3); \ std %r30,(savearea+CPUSAVE_R30)(%r3); \ std %r31,(savearea+CPUSAVE_R31)(%r3); \ mflr %r27; /* preserve LR */ \ bl restore_usersrs; /* uses r28-r31 */ \ mtlr %r27; \ ld %r31,(savearea+CPUSAVE_R31)(%r3); \ ld %r30,(savearea+CPUSAVE_R30)(%r3); \ ld %r29,(savearea+CPUSAVE_R29)(%r3); \ ld %r28,(savearea+CPUSAVE_R28)(%r3); \ ld %r27,(savearea+CPUSAVE_R27)(%r3); \ 1: mfsprg2 %r3; /* restore cr */ \ mtcr %r3; \ GET_CPUINFO(%r3); \ ld %r3,(savearea+CPUSAVE_SRR0)(%r3); /* restore srr0 */ \ mtsrr0 %r3; \ GET_CPUINFO(%r3); \ ld %r3,(savearea+CPUSAVE_SRR1)(%r3); /* restore srr1 */ \ mtsrr1 %r3; \ mfsprg3 %r3 /* restore r3 */ #ifdef KDTRACE_HOOKS .data .globl dtrace_invop_calltrap_addr .align 8 .type dtrace_invop_calltrap_addr, @object .size dtrace_invop_calltrap_addr, 8 dtrace_invop_calltrap_addr: .word 0 .word 0 .text #endif /* * Processor reset exception handler. These are typically * the first instructions the processor executes after a * software reset. We do this in two bits so that we are * not still hanging around in the trap handling region * once the MMU is turned on. */ .globl CNAME(rstcode), CNAME(rstcodeend) CNAME(rstcode): /* Explicitly set MSR[SF] */ mfmsr %r9 li %r8,1 insrdi %r9,%r8,1,0 mtmsrd %r9 isync bl 1f .llong cpu_reset 1: mflr %r9 ld %r9,0(%r9) mtlr %r9 blr CNAME(rstcodeend): cpu_reset: GET_TOCBASE(%r2) ld %r1,TOC_REF(tmpstk)(%r2) /* get new SP */ addi %r1,%r1,(TMPSTKSZ-48) bl CNAME(cpudep_ap_early_bootstrap) /* Set PCPU */ nop lis %r3,1@l bl CNAME(pmap_cpu_bootstrap) /* Turn on virtual memory */ nop bl CNAME(cpudep_ap_bootstrap) /* Set up PCPU and stack */ nop mr %r1,%r3 /* Use new stack */ bl CNAME(cpudep_ap_setup) nop GET_CPUINFO(%r5) ld %r3,(PC_RESTORE)(%r5) cmpldi %cr0,%r3,0 beq %cr0,2f nop li %r4,1 b CNAME(longjmp) nop 2: #ifdef SMP bl CNAME(machdep_ap_bootstrap) /* And away! */ nop #endif /* Should not be reached */ 9: b 9b /* * This code gets copied to all the trap vectors * (except ISI/DSI, ALI, and the interrupts). Has to fit in 8 instructions! */ .globl CNAME(trapcode),CNAME(trapcodeend) .p2align 3 CNAME(trapcode): mtsprg1 %r1 /* save SP */ mflr %r1 /* Save the old LR in r1 */ mtsprg2 %r1 /* And then in SPRG2 */ ld %r1,TRAP_GENTRAP(0) mtlr %r1 li %r1, 0xe0 /* How to get the vector from LR */ blrl /* Branch to generictrap */ CNAME(trapcodeend): /* * For SLB misses: do special things for the kernel * * Note: SPRG1 is always safe to overwrite any time the MMU is on, which is * the only time this can be called. */ .globl CNAME(slbtrap),CNAME(slbtrapend) .p2align 3 CNAME(slbtrap): mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) std %r2,(PC_SLBSAVE+16)(%r1) mfcr %r2 /* save CR */ std %r2,(PC_SLBSAVE+104)(%r1) mfsrr1 %r2 /* test kernel mode */ mtcr %r2 bf 17,2f /* branch if PSL_PR is false */ /* User mode */ ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ mtcr %r2 ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2 */ mflr %r1 /* Save the old LR in r1 */ mtsprg2 %r1 /* And then in SPRG2 */ /* 52 bytes so far */ bl 1f .llong generictrap 1: mflr %r1 ld %r1,0(%r1) mtlr %r1 li %r1, 0x80 /* How to get the vector from LR */ blrl /* Branch to generictrap */ /* 84 bytes */ 2: mflr %r2 /* Save the old LR in r2 */ nop bl 3f /* Begin dance to jump to kern_slbtrap*/ .llong kern_slbtrap 3: mflr %r1 ld %r1,0(%r1) mtlr %r1 GET_CPUINFO(%r1) blrl /* 124 bytes -- 4 to spare */ CNAME(slbtrapend): kern_slbtrap: std %r2,(PC_SLBSAVE+136)(%r1) /* old LR */ std %r3,(PC_SLBSAVE+24)(%r1) /* save R3 */ /* Check if this needs to be handled as a regular trap (userseg miss) */ mflr %r2 andi. %r2,%r2,0xff80 cmpwi %r2,0x380 bne 1f mfdar %r2 b 2f 1: mfsrr0 %r2 2: /* r2 now contains the fault address */ lis %r3,SEGMENT_MASK@highesta ori %r3,%r3,SEGMENT_MASK@highera sldi %r3,%r3,32 oris %r3,%r3,SEGMENT_MASK@ha ori %r3,%r3,SEGMENT_MASK@l and %r2,%r2,%r3 /* R2 = segment base address */ lis %r3,USER_ADDR@highesta ori %r3,%r3,USER_ADDR@highera sldi %r3,%r3,32 oris %r3,%r3,USER_ADDR@ha ori %r3,%r3,USER_ADDR@l cmpd %r2,%r3 /* Compare fault base to USER_ADDR */ bne 3f /* User seg miss, handle as a regular trap */ ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ mtcr %r2 ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2,R3 */ ld %r3,(PC_SLBSAVE+24)(%r1) ld %r1,(PC_SLBSAVE+136)(%r1) /* Save the old LR in r1 */ mtsprg2 %r1 /* And then in SPRG2 */ li %r1, 0x80 /* How to get the vector from LR */ b generictrap /* Retain old LR using b */ 3: /* Real kernel SLB miss */ std %r0,(PC_SLBSAVE+0)(%r1) /* free all volatile regs */ mfsprg1 %r2 /* Old R1 */ std %r2,(PC_SLBSAVE+8)(%r1) /* R2,R3 already saved */ std %r4,(PC_SLBSAVE+32)(%r1) std %r5,(PC_SLBSAVE+40)(%r1) std %r6,(PC_SLBSAVE+48)(%r1) std %r7,(PC_SLBSAVE+56)(%r1) std %r8,(PC_SLBSAVE+64)(%r1) std %r9,(PC_SLBSAVE+72)(%r1) std %r10,(PC_SLBSAVE+80)(%r1) std %r11,(PC_SLBSAVE+88)(%r1) std %r12,(PC_SLBSAVE+96)(%r1) /* CR already saved */ mfxer %r2 /* save XER */ std %r2,(PC_SLBSAVE+112)(%r1) mflr %r2 /* save LR (SP already saved) */ std %r2,(PC_SLBSAVE+120)(%r1) mfctr %r2 /* save CTR */ std %r2,(PC_SLBSAVE+128)(%r1) /* Call handler */ addi %r1,%r1,PC_SLBSTACK-48+1024 li %r2,~15 and %r1,%r1,%r2 GET_TOCBASE(%r2) mflr %r3 andi. %r3,%r3,0xff80 mfdar %r4 mfsrr0 %r5 bl handle_kernel_slb_spill nop /* Save r28-31, restore r4-r12 */ GET_CPUINFO(%r1) ld %r4,(PC_SLBSAVE+32)(%r1) ld %r5,(PC_SLBSAVE+40)(%r1) ld %r6,(PC_SLBSAVE+48)(%r1) ld %r7,(PC_SLBSAVE+56)(%r1) ld %r8,(PC_SLBSAVE+64)(%r1) ld %r9,(PC_SLBSAVE+72)(%r1) ld %r10,(PC_SLBSAVE+80)(%r1) ld %r11,(PC_SLBSAVE+88)(%r1) ld %r12,(PC_SLBSAVE+96)(%r1) std %r28,(PC_SLBSAVE+64)(%r1) std %r29,(PC_SLBSAVE+72)(%r1) std %r30,(PC_SLBSAVE+80)(%r1) std %r31,(PC_SLBSAVE+88)(%r1) /* Restore kernel mapping */ bl restore_kernsrs /* Restore remaining registers */ ld %r28,(PC_SLBSAVE+64)(%r1) ld %r29,(PC_SLBSAVE+72)(%r1) ld %r30,(PC_SLBSAVE+80)(%r1) ld %r31,(PC_SLBSAVE+88)(%r1) ld %r2,(PC_SLBSAVE+104)(%r1) mtcr %r2 ld %r2,(PC_SLBSAVE+112)(%r1) mtxer %r2 ld %r2,(PC_SLBSAVE+120)(%r1) mtlr %r2 ld %r2,(PC_SLBSAVE+128)(%r1) mtctr %r2 ld %r2,(PC_SLBSAVE+136)(%r1) mtlr %r2 /* Restore r0-r3 */ ld %r0,(PC_SLBSAVE+0)(%r1) ld %r2,(PC_SLBSAVE+16)(%r1) ld %r3,(PC_SLBSAVE+24)(%r1) mfsprg1 %r1 /* Back to whatever we were doing */ rfid /* * For ALI: has to save DSISR and DAR */ .globl CNAME(alitrap),CNAME(aliend) CNAME(alitrap): mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) std %r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) /* free r27-r31 */ std %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) std %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 mfdsisr %r31 std %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) mfsprg1 %r1 /* restore SP, in case of branch */ mflr %r28 /* save LR */ mfcr %r29 /* save CR */ /* Begin dance to branch to s_trap in a bit */ b 1f .p2align 3 1: nop bl 1f .llong s_trap 1: mflr %r31 ld %r31,0(%r31) mtlr %r31 /* Put our exception vector in SPRG3 */ li %r31, EXC_ALI mtsprg3 %r31 /* Test whether we already had PR set */ mfsrr1 %r31 mtcr %r31 blrl CNAME(aliend): /* * Similar to the above for DSI * Has to handle standard pagetable spills */ .globl CNAME(dsitrap),CNAME(dsiend) CNAME(dsitrap): mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) std %r27,(PC_DISISAVE+CPUSAVE_R27)(%r1) /* free r27-r31 */ std %r28,(PC_DISISAVE+CPUSAVE_R28)(%r1) std %r29,(PC_DISISAVE+CPUSAVE_R29)(%r1) std %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) std %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) mfcr %r29 /* save CR */ mfxer %r30 /* save XER */ mtsprg2 %r30 /* in SPRG2 */ mfsrr1 %r31 /* test kernel mode */ mtcr %r31 mflr %r28 /* save LR (SP already saved) */ bl 1f /* Begin branching to disitrap */ .llong disitrap 1: mflr %r1 ld %r1,0(%r1) mtlr %r1 blrl /* Branch to generictrap */ CNAME(dsiend): /* * Preamble code for DSI/ISI traps */ disitrap: /* Write the trap vector to SPRG3 by computing LR & 0xff00 */ mflr %r1 andi. %r1,%r1,0xff00 mtsprg3 %r1 GET_CPUINFO(%r1) ld %r31,(PC_DISISAVE+CPUSAVE_R27)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) ld %r30,(PC_DISISAVE+CPUSAVE_R28)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) ld %r31,(PC_DISISAVE+CPUSAVE_R29)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) ld %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) ld %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 mfdsisr %r31 std %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) #ifdef KDB /* Try to detect a kernel stack overflow */ mfsrr1 %r31 mtcr %r31 bt 17,realtrap /* branch is user mode */ mfsprg1 %r31 /* get old SP */ clrrdi %r31,%r31,12 /* Round SP down to nearest page */ sub. %r30,%r31,%r30 /* SP - DAR */ bge 1f neg %r30,%r30 /* modulo value */ 1: cmpldi %cr0,%r30,4096 /* is DAR within a page of SP? */ bge %cr0,realtrap /* no, too far away. */ /* Now convert this DSI into a DDB trap. */ GET_CPUINFO(%r1) ld %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) /* get DAR */ std %r30,(PC_DBSAVE +CPUSAVE_AIM_DAR)(%r1) /* save DAR */ ld %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) /* get DSISR */ std %r30,(PC_DBSAVE +CPUSAVE_AIM_DSISR)(%r1) /* save DSISR */ ld %r31,(PC_DISISAVE+CPUSAVE_R27)(%r1) /* get r27 */ std %r31,(PC_DBSAVE +CPUSAVE_R27)(%r1) /* save r27 */ ld %r30,(PC_DISISAVE+CPUSAVE_R28)(%r1) /* get r28 */ std %r30,(PC_DBSAVE +CPUSAVE_R28)(%r1) /* save r28 */ ld %r31,(PC_DISISAVE+CPUSAVE_R29)(%r1) /* get r29 */ std %r31,(PC_DBSAVE +CPUSAVE_R29)(%r1) /* save r29 */ ld %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) /* get r30 */ std %r30,(PC_DBSAVE +CPUSAVE_R30)(%r1) /* save r30 */ ld %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) /* get r31 */ std %r31,(PC_DBSAVE +CPUSAVE_R31)(%r1) /* save r31 */ b dbtrap #endif /* XXX need stack probe here */ realtrap: /* Test whether we already had PR set */ mfsrr1 %r1 mtcr %r1 mfsprg1 %r1 /* restore SP (might have been overwritten) */ bf 17,k_trap /* branch if PSL_PR is false */ GET_CPUINFO(%r1) ld %r1,PC_CURPCB(%r1) mr %r27,%r28 /* Save LR, r29 */ mtsprg2 %r29 bl restore_kernsrs /* enable kernel mapping */ mfsprg2 %r29 mr %r28,%r27 b s_trap /* * generictrap does some standard setup for trap handling to minimize * the code that need be installed in the actual vectors. It expects * the following conditions. * * R1 - Trap vector = LR & (0xff00 | R1) * SPRG1 - Original R1 contents * SPRG2 - Original LR */ .globl CNAME(generictrap) generictrap: /* Save R1 for computing the exception vector */ mtsprg3 %r1 /* Save interesting registers */ GET_CPUINFO(%r1) std %r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) /* free r27-r31 */ std %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) std %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 std %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) mfsprg1 %r1 /* restore SP, in case of branch */ mfsprg2 %r28 /* save LR */ mfcr %r29 /* save CR */ /* Compute the exception vector from the link register */ mfsprg3 %r31 ori %r31,%r31,0xff00 mflr %r30 addi %r30,%r30,-4 /* The branch instruction, not the next */ and %r30,%r30,%r31 mtsprg3 %r30 /* Test whether we already had PR set */ mfsrr1 %r31 mtcr %r31 s_trap: bf 17,k_trap /* branch if PSL_PR is false */ GET_CPUINFO(%r1) u_trap: ld %r1,PC_CURPCB(%r1) mr %r27,%r28 /* Save LR, r29 */ mtsprg2 %r29 bl restore_kernsrs /* enable kernel mapping */ mfsprg2 %r29 mr %r28,%r27 /* * Now the common trap catching code. */ k_trap: FRAME_SETUP(PC_TEMPSAVE) /* Call C interrupt dispatcher: */ trapagain: GET_TOCBASE(%r2) addi %r3,%r1,48 bl CNAME(powerpc_interrupt) nop .globl CNAME(trapexit) /* backtrace code sentinel */ CNAME(trapexit): /* Disable interrupts: */ mfmsr %r3 andi. %r3,%r3,~PSL_EE@l mtmsr %r3 isync /* Test AST pending: */ ld %r5,FRAME_SRR1+48(%r1) mtcr %r5 bf 17,1f /* branch if PSL_PR is false */ GET_CPUINFO(%r3) /* get per-CPU pointer */ lwz %r4, TD_FLAGS(%r13) /* get thread flags value */ lis %r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@h ori %r5,%r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@l and. %r4,%r4,%r5 beq 1f mfmsr %r3 /* re-enable interrupts */ ori %r3,%r3,PSL_EE@l mtmsr %r3 isync GET_TOCBASE(%r2) addi %r3,%r1,48 bl CNAME(ast) nop .globl CNAME(asttrapexit) /* backtrace code sentinel #2 */ CNAME(asttrapexit): b trapexit /* test ast ret value ? */ 1: FRAME_LEAVE(PC_TEMPSAVE) rfid #if defined(KDB) /* * Deliberate entry to dbtrap */ ASENTRY_NOPROF(breakpoint) mtsprg1 %r1 mfmsr %r3 mtsrr1 %r3 andi. %r3,%r3,~(PSL_EE|PSL_ME)@l mtmsr %r3 /* disable interrupts */ isync GET_CPUINFO(%r3) std %r27,(PC_DBSAVE+CPUSAVE_R27)(%r3) std %r28,(PC_DBSAVE+CPUSAVE_R28)(%r3) std %r29,(PC_DBSAVE+CPUSAVE_R29)(%r3) std %r30,(PC_DBSAVE+CPUSAVE_R30)(%r3) std %r31,(PC_DBSAVE+CPUSAVE_R31)(%r3) mflr %r28 li %r29,EXC_BPT mtlr %r29 mfcr %r29 mtsrr0 %r28 /* * Now the kdb trap catching code. */ dbtrap: /* Write the trap vector to SPRG3 by computing LR & 0xff00 */ mflr %r1 andi. %r1,%r1,0xff00 mtsprg3 %r1 ld %r1,TRAP_TOCBASE(0) /* get new SP */ ld %r1,TOC_REF(tmpstk)(%r1) addi %r1,%r1,(TMPSTKSZ-48) FRAME_SETUP(PC_DBSAVE) /* Call C trap code: */ GET_TOCBASE(%r2) addi %r3,%r1,48 bl CNAME(db_trap_glue) nop or. %r3,%r3,%r3 bne dbleave /* This wasn't for KDB, so switch to real trap: */ ld %r3,FRAME_EXC+48(%r1) /* save exception */ GET_CPUINFO(%r4) std %r3,(PC_DBSAVE+CPUSAVE_R31)(%r4) FRAME_LEAVE(PC_DBSAVE) mtsprg1 %r1 /* prepare for entrance to realtrap */ GET_CPUINFO(%r1) std %r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) std %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) std %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mflr %r28 mfcr %r29 ld %r31,(PC_DBSAVE+CPUSAVE_R31)(%r1) mtsprg3 %r31 /* SPRG3 was clobbered by FRAME_LEAVE */ mfsprg1 %r1 b realtrap dbleave: FRAME_LEAVE(PC_DBSAVE) rfid /* * In case of KDB we want a separate trap catcher for it */ .globl CNAME(dblow),CNAME(dbend) CNAME(dblow): mtsprg1 %r1 /* save SP */ mtsprg2 %r29 /* save r29 */ mfcr %r29 /* save CR in r29 */ mfsrr1 %r1 mtcr %r1 bf 17,1f /* branch if privileged */ /* Unprivileged case */ mtcr %r29 /* put the condition register back */ mfsprg2 %r29 /* ... and r29 */ mflr %r1 /* save LR */ mtsprg2 %r1 /* And then in SPRG2 */ ld %r1, TRAP_GENTRAP(0) /* Get branch address */ mtlr %r1 li %r1, 0 /* How to get the vector from LR */ blrl /* Branch to generictrap */ 1: GET_CPUINFO(%r1) std %r27,(PC_DBSAVE+CPUSAVE_R27)(%r1) /* free r27 */ std %r28,(PC_DBSAVE+CPUSAVE_R28)(%r1) /* free r28 */ mfsprg2 %r28 /* r29 holds cr... */ std %r28,(PC_DBSAVE+CPUSAVE_R29)(%r1) /* free r29 */ std %r30,(PC_DBSAVE+CPUSAVE_R30)(%r1) /* free r30 */ std %r31,(PC_DBSAVE+CPUSAVE_R31)(%r1) /* free r31 */ mflr %r28 /* save LR */ bl 9f /* Begin branch */ .llong dbtrap 9: mflr %r1 ld %r1,0(%r1) mtlr %r1 blrl /* Branch to generictrap */ CNAME(dbend): #endif /* KDB */ Index: projects/clang380-import/sys/powerpc/booke/booke_machdep.c =================================================================== --- projects/clang380-import/sys/powerpc/booke/booke_machdep.c (revision 293686) +++ projects/clang380-import/sys/powerpc/booke/booke_machdep.c (revision 293687) @@ -1,411 +1,410 @@ /*- * Copyright (C) 2006-2012 Semihalf * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(MPC85XX) || defined(QORIQ_DPAA) #include #endif #ifdef DDB #include #endif #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif -extern unsigned char kernel_text[]; extern unsigned char _etext[]; extern unsigned char _edata[]; extern unsigned char __bss_start[]; extern unsigned char __sbss_start[]; extern unsigned char __sbss_end[]; extern unsigned char _end[]; extern vm_offset_t __endkernel; /* * Bootinfo is passed to us by legacy loaders. Save the address of the * structure to handle backward compatibility. */ uint32_t *bootinfo; void print_kernel_section_addr(void); void print_kenv(void); uintptr_t booke_init(u_long, u_long); void ivor_setup(void); extern void *interrupt_vector_base; extern void *int_critical_input; extern void *int_machine_check; extern void *int_data_storage; extern void *int_instr_storage; extern void *int_external_input; extern void *int_alignment; extern void *int_fpu; extern void *int_program; extern void *int_syscall; extern void *int_decrementer; extern void *int_fixed_interval_timer; extern void *int_watchdog; extern void *int_data_tlb_error; extern void *int_inst_tlb_error; extern void *int_debug; extern void *int_vec; extern void *int_vecast; #ifdef HWPMC_HOOKS extern void *int_performance_counter; #endif #define SET_TRAP(ivor, handler) \ KASSERT(((uintptr_t)(&handler) & ~0xffffUL) == \ ((uintptr_t)(&interrupt_vector_base) & ~0xffffUL), \ ("Handler " #handler " too far from interrupt vector base")); \ mtspr(ivor, (uintptr_t)(&handler) & 0xffffUL); uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t, vm_offset_t, void *mdp); void booke_cpu_init(void); void booke_cpu_init(void) { cpu_features |= PPC_FEATURE_BOOKE; pmap_mmu_install(MMU_TYPE_BOOKE, BUS_PROBE_GENERIC); } void ivor_setup(void) { mtspr(SPR_IVPR, ((uintptr_t)&interrupt_vector_base) & 0xffff0000); SET_TRAP(SPR_IVOR0, int_critical_input); SET_TRAP(SPR_IVOR1, int_machine_check); SET_TRAP(SPR_IVOR2, int_data_storage); SET_TRAP(SPR_IVOR3, int_instr_storage); SET_TRAP(SPR_IVOR4, int_external_input); SET_TRAP(SPR_IVOR5, int_alignment); SET_TRAP(SPR_IVOR6, int_program); SET_TRAP(SPR_IVOR8, int_syscall); SET_TRAP(SPR_IVOR10, int_decrementer); SET_TRAP(SPR_IVOR11, int_fixed_interval_timer); SET_TRAP(SPR_IVOR12, int_watchdog); SET_TRAP(SPR_IVOR13, int_data_tlb_error); SET_TRAP(SPR_IVOR14, int_inst_tlb_error); SET_TRAP(SPR_IVOR15, int_debug); #ifdef HWPMC_HOOKS SET_TRAP(SPR_IVOR35, int_performance_counter); #endif switch ((mfpvr() >> 16) & 0xffff) { case FSL_E6500: SET_TRAP(SPR_IVOR32, int_vec); SET_TRAP(SPR_IVOR33, int_vecast); /* FALLTHROUGH */ case FSL_E500mc: case FSL_E5500: SET_TRAP(SPR_IVOR7, int_fpu); } } static int booke_check_for_fdt(uint32_t arg1, vm_offset_t *dtbp) { void *ptr; if (arg1 % 8 != 0) return (-1); ptr = (void *)pmap_early_io_map(arg1, PAGE_SIZE); if (fdt_check_header(ptr) != 0) return (-1); *dtbp = (vm_offset_t)ptr; return (0); } uintptr_t booke_init(u_long arg1, u_long arg2) { uintptr_t ret; void *mdp; vm_offset_t dtbp, end; end = (uintptr_t)_end; dtbp = (vm_offset_t)NULL; /* Set up TLB initially */ bootinfo = NULL; bzero(__sbss_start, __sbss_end - __sbss_start); bzero(__bss_start, _end - __bss_start); tlb1_init(); /* * Handle the various ways we can get loaded and started: * - FreeBSD's loader passes the pointer to the metadata * in arg1, with arg2 undefined. arg1 has a value that's * relative to the kernel's link address (i.e. larger * than 0xc0000000). * - Juniper's loader passes the metadata pointer in arg2 * and sets arg1 to zero. This is to signal that the * loader maps the kernel and starts it at its link * address (unlike the FreeBSD loader). * - U-Boot passes the standard argc and argv parameters * in arg1 and arg2 (resp). arg1 is between 1 and some * relatively small number, such as 64K. arg2 is the * physical address of the argv vector. * - ePAPR loaders pass an FDT blob in r3 (arg1) and the magic hex * string 0x45504150 ('EPAP') in r6 (which has been lost by now). * r4 (arg2) is supposed to be set to zero, but is not always. */ if (arg1 == 0) /* Juniper loader */ mdp = (void *)arg2; else if (booke_check_for_fdt(arg1, &dtbp) == 0) { /* ePAPR */ end = roundup(end, 8); memmove((void *)end, (void *)dtbp, fdt_totalsize((void *)dtbp)); dtbp = end; end += fdt_totalsize((void *)dtbp); __endkernel = end; mdp = NULL; - } else if (arg1 > (uintptr_t)kernel_text) /* FreeBSD loader */ + } else if (arg1 > (uintptr_t)btext) /* FreeBSD loader */ mdp = (void *)arg1; else /* U-Boot */ mdp = NULL; /* Default to 32 byte cache line size. */ switch ((mfpvr()) >> 16) { case FSL_E500mc: case FSL_E5500: case FSL_E6500: cacheline_size = 64; break; } ret = powerpc_init(dtbp, 0, 0, mdp); /* Enable caches */ booke_enable_l1_cache(); booke_enable_l2_cache(); booke_enable_bpred(); return (ret); } #define RES_GRANULE 32 extern uint32_t tlb0_miss_locks[]; /* Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) { pcpu->pc_tid_next = TID_MIN; #ifdef SMP uint32_t *ptr; int words_per_gran = RES_GRANULE / sizeof(uint32_t); ptr = &tlb0_miss_locks[cpuid * words_per_gran]; pcpu->pc_booke_tlb_lock = ptr; *ptr = TLB_UNLOCKED; *(ptr + 1) = 0; /* recurse counter */ #endif } /* Shutdown the CPU as much as possible. */ void cpu_halt(void) { mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE)); while (1) ; } int ptrace_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 |= PSL_DE; tf->cpu.booke.dbcr0 |= (DBCR0_IDM | DBCR0_IC); return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 &= ~PSL_DE; tf->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); return (0); } void kdb_cpu_clear_singlestep(void) { register_t r; r = mfspr(SPR_DBCR0); mtspr(SPR_DBCR0, r & ~DBCR0_IC); kdb_frame->srr1 &= ~PSL_DE; } void kdb_cpu_set_singlestep(void) { register_t r; r = mfspr(SPR_DBCR0); mtspr(SPR_DBCR0, r | DBCR0_IC | DBCR0_IDM); kdb_frame->srr1 |= PSL_DE; } Index: projects/clang380-import/sys/powerpc/booke/locore.S =================================================================== --- projects/clang380-import/sys/powerpc/booke/locore.S (revision 293686) +++ projects/clang380-import/sys/powerpc/booke/locore.S (revision 293687) @@ -1,884 +1,861 @@ /*- * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski * Copyright (C) 2006 Semihalf, Marian Balakowicz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "assym.s" #include "opt_hwpmc_hooks.h" #include #include #include #include #include #include #include #include #define TMPSTACKSZ 16384 .text .globl btext btext: /* * This symbol is here for the benefit of kvm_mkdb, and is supposed to * mark the start of kernel text. */ .globl kernel_text kernel_text: /* * Startup entry. Note, this must be the first thing in the text segment! */ .text .globl __start __start: /* * Assumptions on the boot loader: * - System memory starts from physical address 0 * - It's mapped by a single TLB1 entry * - TLB1 mapping is 1:1 pa to va * - Kernel is loaded at 64MB boundary * - All PID registers are set to the same value * - CPU is running in AS=0 * * Registers contents provided by the loader(8): * r1 : stack pointer * r3 : metadata pointer * * We rearrange the TLB1 layout as follows: * - Find TLB1 entry we started in * - Make sure it's protected, invalidate other entries * - Create temp entry in the second AS (make sure it's not TLB[1]) * - Switch to temp mapping * - Map 64MB of RAM in TLB1[1] * - Use AS=1, set EPN to KERNBASE and RPN to kernel load address * - Switch to to TLB1[1] mapping * - Invalidate temp mapping * * locore registers use: * r1 : stack pointer * r2 : trace pointer (AP only, for early diagnostics) * r3-r27 : scratch registers * r28 : temp TLB1 entry * r29 : initial TLB1 entry we started in * r30-r31 : arguments (metadata pointer) */ /* * Keep arguments in r30 & r31 for later use. */ mr %r30, %r3 mr %r31, %r4 /* * Initial cleanup */ li %r3, PSL_DE /* Keep debug exceptions for CodeWarrior. */ mtmsr %r3 isync mfpvr %r3 rlwinm %r3, %r3, 16, 16, 31 lis %r4, HID0_E500_DEFAULT_SET@h ori %r4, %r4, HID0_E500_DEFAULT_SET@l /* Check for e500mc and e5500 */ cmpli 0, 0, %r3, FSL_E500mc bne 2f lis %r4, HID0_E500MC_DEFAULT_SET@h ori %r4, %r4, HID0_E500MC_DEFAULT_SET@l b 3f 2: cmpli 0, 0, %r3, FSL_E5500 bne 3f lis %r4, HID0_E5500_DEFAULT_SET@h ori %r4, %r4, HID0_E5500_DEFAULT_SET@l 3: mtspr SPR_HID0, %r4 isync /* * E500mc and E5500 do not have HID1 register, so skip HID1 setup on * this core. */ cmpli 0, 0, %r3, FSL_E500mc beq 1f cmpli 0, 0, %r3, FSL_E5500 beq 1f lis %r3, HID1_E500_DEFAULT_SET@h ori %r3, %r3, HID1_E500_DEFAULT_SET@l mtspr SPR_HID1, %r3 isync 1: /* Invalidate all entries in TLB0 */ li %r3, 0 bl tlb_inval_all cmpwi %r30, 0 beq done_mapping /* * Locate the TLB1 entry that maps this code */ bl 1f 1: mflr %r3 bl tlb1_find_current /* the entry found is returned in r29 */ bl tlb1_inval_all_but_current /* * Create temporary mapping in AS=1 and switch to it */ addi %r3, %r29, 1 bl tlb1_temp_mapping_as1 mfmsr %r3 ori %r3, %r3, (PSL_IS | PSL_DS) bl 2f 2: mflr %r4 addi %r4, %r4, 20 mtspr SPR_SRR0, %r4 mtspr SPR_SRR1, %r3 rfi /* Switch context */ /* * Invalidate initial entry */ mr %r3, %r29 bl tlb1_inval_entry /* * Setup final mapping in TLB1[1] and switch to it */ /* Final kernel mapping, map in 64 MB of RAM */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ li %r4, 0 /* Entry 0 */ rlwimi %r3, %r4, 16, 10, 15 mtspr SPR_MAS0, %r3 isync li %r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l oris %r3, %r3, (MAS1_VALID | MAS1_IPROT)@h mtspr SPR_MAS1, %r3 /* note TS was not filled, so it's TS=0 */ isync lis %r3, KERNBASE@h ori %r3, %r3, KERNBASE@l /* EPN = KERNBASE */ #ifdef SMP ori %r3, %r3, MAS2_M@l /* WIMGE = 0b00100 */ #endif mtspr SPR_MAS2, %r3 isync /* Discover phys load address */ bl 3f 3: mflr %r4 /* Use current address */ rlwinm %r4, %r4, 0, 0, 5 /* 64MB alignment mask */ ori %r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l mtspr SPR_MAS3, %r4 /* Set RPN and protection */ isync bl zero_mas7 bl zero_mas8 tlbwe isync msync /* Switch to the above TLB1[1] mapping */ bl 4f 4: mflr %r4 rlwinm %r4, %r4, 0, 8, 31 /* Current offset from kernel load address */ rlwinm %r3, %r3, 0, 0, 19 add %r4, %r4, %r3 /* Convert to kernel virtual address */ addi %r4, %r4, 36 li %r3, PSL_DE /* Note AS=0 */ mtspr SPR_SRR0, %r4 mtspr SPR_SRR1, %r3 rfi /* * Invalidate temp mapping */ mr %r3, %r28 bl tlb1_inval_entry done_mapping: /* * Setup a temporary stack */ bl 1f .long tmpstack-. 1: mflr %r1 lwz %r2,0(%r1) add %r1,%r1,%r2 addi %r1, %r1, (TMPSTACKSZ - 16) /* * Relocate kernel */ bl 1f .long _DYNAMIC-. .long _GLOBAL_OFFSET_TABLE_-. 1: mflr %r5 lwz %r3,0(%r5) /* _DYNAMIC in %r3 */ add %r3,%r3,%r5 lwz %r4,4(%r5) /* GOT pointer */ add %r4,%r4,%r5 lwz %r4,4(%r4) /* got[0] is _DYNAMIC link addr */ subf %r4,%r4,%r3 /* subtract to calculate relocbase */ bl elf_reloc_self /* * Initialise exception vector offsets */ bl ivor_setup /* * Set up arguments and jump to system initialization code */ mr %r3, %r30 mr %r4, %r31 /* Prepare core */ bl booke_init /* Switch to thread0.td_kstack now */ mr %r1, %r3 li %r3, 0 stw %r3, 0(%r1) /* Machine independet part, does not return */ bl mi_startup /* NOT REACHED */ 5: b 5b #ifdef SMP /************************************************************************/ /* AP Boot page */ /************************************************************************/ .text .globl __boot_page .align 12 __boot_page: bl 1f .globl bp_ntlb1s bp_ntlb1s: .long 0 .globl bp_tlb1 bp_tlb1: .space 4 * 3 * 64 .globl bp_tlb1_end bp_tlb1_end: /* * Initial configuration */ 1: mflr %r31 /* r31 hold the address of bp_ntlb1s */ /* Set HIDs */ mfpvr %r3 rlwinm %r3, %r3, 16, 16, 31 /* HID0 for E500 is default */ lis %r4, HID0_E500_DEFAULT_SET@h ori %r4, %r4, HID0_E500_DEFAULT_SET@l cmpli 0, 0, %r3, FSL_E500mc bne 2f lis %r4, HID0_E500MC_DEFAULT_SET@h ori %r4, %r4, HID0_E500MC_DEFAULT_SET@l b 3f 2: cmpli 0, 0, %r3, FSL_E5500 bne 3f lis %r4, HID0_E5500_DEFAULT_SET@h ori %r4, %r4, HID0_E5500_DEFAULT_SET@l 3: mtspr SPR_HID0, %r4 isync /* * E500mc and E5500 do not have HID1 register, so skip HID1 setup on * this core. */ cmpli 0, 0, %r3, FSL_E500mc beq 1f cmpli 0, 0, %r3, FSL_E5500 beq 1f lis %r3, HID1_E500_DEFAULT_SET@h ori %r3, %r3, HID1_E500_DEFAULT_SET@l mtspr SPR_HID1, %r3 isync 1: /* Enable branch prediction */ li %r3, BUCSR_BPEN mtspr SPR_BUCSR, %r3 isync /* Invalidate all entries in TLB0 */ li %r3, 0 bl tlb_inval_all /* * Find TLB1 entry which is translating us now */ bl 2f 2: mflr %r3 bl tlb1_find_current /* the entry number found is in r29 */ bl tlb1_inval_all_but_current /* * Create temporary translation in AS=1 and switch to it */ lwz %r3, 0(%r31) bl tlb1_temp_mapping_as1 mfmsr %r3 ori %r3, %r3, (PSL_IS | PSL_DS) bl 3f 3: mflr %r4 addi %r4, %r4, 20 mtspr SPR_SRR0, %r4 mtspr SPR_SRR1, %r3 rfi /* Switch context */ /* * Invalidate initial entry */ mr %r3, %r29 bl tlb1_inval_entry /* * Setup final mapping in TLB1[1] and switch to it */ lwz %r6, 0(%r31) addi %r5, %r31, 4 li %r4, 0 4: lis %r3, MAS0_TLBSEL1@h rlwimi %r3, %r4, 16, 12, 15 mtspr SPR_MAS0, %r3 isync lwz %r3, 0(%r5) mtspr SPR_MAS1, %r3 isync lwz %r3, 4(%r5) mtspr SPR_MAS2, %r3 isync lwz %r3, 8(%r5) mtspr SPR_MAS3, %r3 isync tlbwe isync msync addi %r5, %r5, 12 addi %r4, %r4, 1 cmpw %r4, %r6 blt 4b /* Switch to the final mapping */ bl 5f .long __boot_page-. 5: mflr %r5 lwz %r3,0(%r3) add %r5,%r5,%r3 /* __boot_page in r5 */ bl 6f 6: mflr %r3 rlwinm %r3, %r3, 0, 0xfff /* Offset from boot page start */ add %r3, %r3, %r5 /* Make this virtual address */ addi %r3, %r3, 32 li %r4, 0 /* Note AS=0 */ mtspr SPR_SRR0, %r3 mtspr SPR_SRR1, %r4 rfi /* * At this point we're running at virtual addresses KERNBASE and beyond so * it's allowed to directly access all locations the kernel was linked * against. */ /* * Invalidate temp mapping */ mr %r3, %r28 bl tlb1_inval_entry /* * Setup a temporary stack */ bl 1f .long tmpstack-. 1: mflr %r1 lwz %r2,0(%r1) add %r1,%r1,%r2 addi %r1, %r1, (TMPSTACKSZ - 16) /* * Initialise exception vector offsets */ bl ivor_setup /* * Assign our pcpu instance */ bl 1f .long ap_pcpu-. 1: mflr %r4 lwz %r3, 0(%r4) add %r3, %r3, %r4 lwz %r3, 0(%r3) mtsprg0 %r3 bl pmap_bootstrap_ap bl cpudep_ap_bootstrap /* Switch to the idle thread's kstack */ mr %r1, %r3 bl machdep_ap_bootstrap /* NOT REACHED */ 6: b 6b #endif /* SMP */ /* * Invalidate all entries in the given TLB. * * r3 TLBSEL */ tlb_inval_all: rlwinm %r3, %r3, 3, (1 << 3) /* TLBSEL */ ori %r3, %r3, (1 << 2) /* INVALL */ tlbivax 0, %r3 isync msync tlbsync msync blr /* * expects address to look up in r3, returns entry number in r29 * * FIXME: the hidden assumption is we are now running in AS=0, but we should * retrieve actual AS from MSR[IS|DS] and put it in MAS6[SAS] */ tlb1_find_current: mfspr %r17, SPR_PID0 slwi %r17, %r17, MAS6_SPID0_SHIFT mtspr SPR_MAS6, %r17 isync tlbsx 0, %r3 mfspr %r17, SPR_MAS0 rlwinm %r29, %r17, 16, 20, 31 /* MAS0[ESEL] -> r29 */ /* Make sure we have IPROT set on the entry */ mfspr %r17, SPR_MAS1 oris %r17, %r17, MAS1_IPROT@h mtspr SPR_MAS1, %r17 isync tlbwe isync msync blr /* * Invalidates a single entry in TLB1. * * r3 ESEL * r4-r5 scratched */ tlb1_inval_entry: lis %r4, MAS0_TLBSEL1@h /* Select TLB1 */ rlwimi %r4, %r3, 16, 10, 15 /* Select our entry */ mtspr SPR_MAS0, %r4 isync tlbre li %r5, 0 /* MAS1[V] = 0 */ mtspr SPR_MAS1, %r5 isync tlbwe isync msync blr /* * r3 entry of temp translation * r29 entry of current translation * r28 returns temp entry passed in r3 * r4-r5 scratched */ tlb1_temp_mapping_as1: mr %r28, %r3 /* Read our current translation */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ rlwimi %r3, %r29, 16, 10, 15 /* Select our current entry */ mtspr SPR_MAS0, %r3 isync tlbre /* Prepare and write temp entry */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ rlwimi %r3, %r28, 16, 10, 15 /* Select temp entry */ mtspr SPR_MAS0, %r3 isync mfspr %r5, SPR_MAS1 li %r4, 1 /* AS=1 */ rlwimi %r5, %r4, 12, 19, 19 li %r4, 0 /* Global mapping, TID=0 */ rlwimi %r5, %r4, 16, 8, 15 oris %r5, %r5, (MAS1_VALID | MAS1_IPROT)@h mtspr SPR_MAS1, %r5 isync mflr %r3 bl zero_mas7 bl zero_mas8 mtlr %r3 tlbwe isync msync blr /* * Loops over TLB1, invalidates all entries skipping the one which currently * maps this code. * * r29 current entry * r3-r5 scratched */ tlb1_inval_all_but_current: mr %r6, %r3 mfspr %r3, SPR_TLB1CFG /* Get number of entries */ andi. %r3, %r3, TLBCFG_NENTRY_MASK@l li %r4, 0 /* Start from Entry 0 */ 1: lis %r5, MAS0_TLBSEL1@h rlwimi %r5, %r4, 16, 10, 15 mtspr SPR_MAS0, %r5 isync tlbre mfspr %r5, SPR_MAS1 cmpw %r4, %r29 /* our current entry? */ beq 2f rlwinm %r5, %r5, 0, 2, 31 /* clear VALID and IPROT bits */ mtspr SPR_MAS1, %r5 isync tlbwe isync msync 2: addi %r4, %r4, 1 cmpw %r4, %r3 /* Check if this is the last entry */ bne 1b blr /* * MAS7 and MAS8 conditional zeroing. */ .globl zero_mas7 zero_mas7: mfpvr %r20 rlwinm %r20, %r20, 16, 16, 31 cmpli 0, 0, %r20, FSL_E500v1 beq 1f li %r20, 0 mtspr SPR_MAS7, %r20 isync 1: blr .globl zero_mas8 zero_mas8: mfpvr %r20 rlwinm %r20, %r20, 16, 16, 31 cmpli 0, 0, %r20, FSL_E500mc beq 1f cmpli 0, 0, %r20, FSL_E5500 beq 1f blr 1: li %r20, 0 mtspr SPR_MAS8, %r20 isync blr #ifdef SMP __boot_page_padding: /* * Boot page needs to be exactly 4K, with the last word of this page * acting as the reset vector, so we need to stuff the remainder. * Upon release from holdoff CPU fetches the last word of the boot * page. */ .space 4092 - (__boot_page_padding - __boot_page) b __boot_page #endif /* SMP */ /************************************************************************/ /* locore subroutines */ /************************************************************************/ /* * Cache disable/enable/inval sequences according * to section 2.16 of E500CORE RM. */ ENTRY(dcache_inval) /* Invalidate d-cache */ mfspr %r3, SPR_L1CSR0 ori %r3, %r3, (L1CSR0_DCFI | L1CSR0_DCLFR)@l msync isync mtspr SPR_L1CSR0, %r3 isync 1: mfspr %r3, SPR_L1CSR0 andi. %r3, %r3, L1CSR0_DCFI bne 1b blr ENTRY(dcache_disable) /* Disable d-cache */ mfspr %r3, SPR_L1CSR0 li %r4, L1CSR0_DCE@l not %r4, %r4 and %r3, %r3, %r4 msync isync mtspr SPR_L1CSR0, %r3 isync blr ENTRY(dcache_enable) /* Enable d-cache */ mfspr %r3, SPR_L1CSR0 oris %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@h ori %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@l msync isync mtspr SPR_L1CSR0, %r3 isync blr ENTRY(icache_inval) /* Invalidate i-cache */ mfspr %r3, SPR_L1CSR1 ori %r3, %r3, (L1CSR1_ICFI | L1CSR1_ICLFR)@l isync mtspr SPR_L1CSR1, %r3 isync 1: mfspr %r3, SPR_L1CSR1 andi. %r3, %r3, L1CSR1_ICFI bne 1b blr ENTRY(icache_disable) /* Disable i-cache */ mfspr %r3, SPR_L1CSR1 li %r4, L1CSR1_ICE@l not %r4, %r4 and %r3, %r3, %r4 isync mtspr SPR_L1CSR1, %r3 isync blr ENTRY(icache_enable) /* Enable i-cache */ mfspr %r3, SPR_L1CSR1 oris %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@h ori %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@l isync mtspr SPR_L1CSR1, %r3 isync blr /* * L2 cache disable/enable/inval sequences for E500mc. */ ENTRY(l2cache_inval) mfspr %r3, SPR_L2CSR0 oris %r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@h ori %r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@l isync mtspr SPR_L2CSR0, %r3 isync 1: mfspr %r3, SPR_L2CSR0 andis. %r3, %r3, L2CSR0_L2FI@h bne 1b blr ENTRY(l2cache_enable) mfspr %r3, SPR_L2CSR0 oris %r3, %r3, (L2CSR0_L2E | L2CSR0_L2PE)@h isync mtspr SPR_L2CSR0, %r3 isync blr /* * Branch predictor setup. */ ENTRY(bpred_enable) mfspr %r3, SPR_BUCSR ori %r3, %r3, BUCSR_BBFI isync mtspr SPR_BUCSR, %r3 isync ori %r3, %r3, BUCSR_BPEN isync mtspr SPR_BUCSR, %r3 isync blr ENTRY(dataloss_erratum_access) /* Lock two cache lines into I-Cache */ sync mfspr %r11, SPR_L1CSR1 rlwinm %r11, %r11, 0, ~L1CSR1_ICUL sync isync mtspr SPR_L1CSR1, %r11 isync mflr %r9 bl 1f .long 2f-. 1: mflr %r5 lwz %r8, 0(%r5) mtlr %r9 add %r8, %r8, %r5 icbtls 0, 0, %r8 addi %r9, %r8, 64 sync mfspr %r11, SPR_L1CSR1 3: andi. %r11, %r11, L1CSR1_ICUL bne 3b icbtls 0, 0, %r9 sync mfspr %r11, SPR_L1CSR1 3: andi. %r11, %r11, L1CSR1_ICUL bne 3b b 2f .align 6 /* Inside a locked cacheline, wait a while, write, then wait a while */ 2: sync mfspr %r5, TBR_TBL 4: addis %r11, %r5, 0x100000@h /* wait around one million timebase ticks */ mfspr %r5, TBR_TBL subf. %r5, %r5, %r11 bgt 4b stw %r4, 0(%r3) mfspr %r5, TBR_TBL 4: addis %r11, %r5, 0x100000@h /* wait around one million timebase ticks */ mfspr %r5, TBR_TBL subf. %r5, %r5, %r11 bgt 4b sync /* * Fill out the rest of this cache line and the next with nops, * to ensure that nothing outside the locked area will be * fetched due to a branch. */ .rept 19 nop .endr icblc 0, 0, %r8 icblc 0, 0, %r9 blr -/* - * int setfault() - * - * Similar to setjmp to setup for handling faults on accesses to user memory. - * Any routine using this may only call bcopy, either the form below, - * or the (currently used) C code optimized, so it doesn't use any non-volatile - * registers. - */ - .globl setfault -setfault: - mflr %r0 - mfsprg0 %r4 - lwz %r4, TD_PCB(%r2) - stw %r3, PCB_ONFAULT(%r4) - mfcr %r4 - stw %r0, 0(%r3) - stw %r1, 4(%r3) - stw %r2, 8(%r3) - stw %r4, 12(%r3) - stmw %r13, 16(%r3) /* store CR, CTR, XER, [r13 .. r31] */ - li %r3, 0 /* return FALSE */ - blr - /************************************************************************/ /* Data section */ /************************************************************************/ .data .align 3 GLOBAL(__startkernel) .long begin GLOBAL(__endkernel) .long end .align 4 tmpstack: .space TMPSTACKSZ tmpstackbound: .space 10240 /* XXX: this really should not be necessary */ /* * Compiled KERNBASE locations */ .globl kernbase .set kernbase, KERNBASE #include Index: projects/clang380-import/sys/powerpc/include/pcb.h =================================================================== --- projects/clang380-import/sys/powerpc/include/pcb.h (revision 293686) +++ projects/clang380-import/sys/powerpc/include/pcb.h (revision 293687) @@ -1,98 +1,98 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: pcb.h,v 1.4 2000/06/04 11:57:17 tsubai Exp $ * $FreeBSD$ */ #ifndef _MACHINE_PCB_H_ #define _MACHINE_PCB_H_ -typedef register_t faultbuf[25]; +#include struct pcb { register_t pcb_context[20]; /* non-volatile r14-r31 */ register_t pcb_cr; /* Condition register */ register_t pcb_sp; /* stack pointer */ register_t pcb_toc; /* toc pointer */ register_t pcb_lr; /* link register */ struct pmap *pcb_pm; /* pmap of our vmspace */ - faultbuf *pcb_onfault; /* For use during + jmp_buf *pcb_onfault; /* For use during copyin/copyout */ int pcb_flags; #define PCB_FPU 1 /* Process uses FPU */ #define PCB_FPREGS 2 /* Process had FPU registers initialized */ #define PCB_VEC 4 /* Process had Altivec initialized */ #define PCB_VSX 8 /* Process had VSX initialized */ struct fpu { union { double fpr; uint32_t vsr[4]; } fpr[32]; double fpscr; /* FPSCR stored as double for easier access */ } pcb_fpu; /* Floating point processor */ unsigned int pcb_fpcpu; /* which CPU had our FPU stuff. */ struct vec { uint32_t vr[32][4]; uint32_t spare[2]; uint32_t vrsave; uint32_t vscr; /* aligned at vector element 3 */ } pcb_vec __aligned(16); /* Vector processor */ unsigned int pcb_veccpu; /* which CPU had our vector stuff. */ union { struct { vm_offset_t usr_segm; /* Base address */ register_t usr_vsid; /* USER_SR segment */ } aim; struct { register_t dbcr0; } booke; } pcb_cpu; }; #ifdef _KERNEL struct trapframe; #ifndef curpcb extern struct pcb *curpcb; #endif extern struct pmap *curpm; extern struct proc *fpuproc; void makectx(struct trapframe *, struct pcb *); void savectx(struct pcb *) __returns_twice; #endif #endif /* _MACHINE_PCB_H_ */ Index: projects/clang380-import/sys/powerpc/include/setjmp.h =================================================================== --- projects/clang380-import/sys/powerpc/include/setjmp.h (revision 293686) +++ projects/clang380-import/sys/powerpc/include/setjmp.h (revision 293687) @@ -1,24 +1,28 @@ /*- * $NetBSD: setjmp.h,v 1.3 1998/09/16 23:51:27 thorpej Exp $ * $FreeBSD$ */ #ifndef _MACHINE_SETJMP_H_ #define _MACHINE_SETJMP_H_ #include +#ifdef _KERNEL +#define _JBLEN 25 /* Kernel doesn't save FP and Altivec regs */ +#else #define _JBLEN 100 +#endif /* * jmp_buf and sigjmp_buf are encapsulated in different structs to force * compile-time diagnostics for mismatches. The structs are the same * internally to avoid some run-time errors for mismatches. */ #if __BSD_VISIBLE || __POSIX_VISIBLE || __XSI_VISIBLE typedef struct _sigjmp_buf { long _sjb[_JBLEN + 1]; } sigjmp_buf[1]; #endif typedef struct _jmp_buf { long _jb[_JBLEN + 1]; } jmp_buf[1]; #endif /* !_MACHINE_SETJMP_H_ */ Index: projects/clang380-import/sys/powerpc/ofw/rtas.c =================================================================== --- projects/clang380-import/sys/powerpc/ofw/rtas.c (revision 293686) +++ projects/clang380-import/sys/powerpc/ofw/rtas.c (revision 293687) @@ -1,272 +1,271 @@ /*- * Copyright (c) 2011 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_RTAS, "rtas", "Run Time Abstraction Service"); static vm_offset_t rtas_bounce_phys; static caddr_t rtas_bounce_virt; static off_t rtas_bounce_offset; static size_t rtas_bounce_size; static uintptr_t rtas_private_data; static struct mtx rtas_mtx; static phandle_t rtas; /* From ofwcall.S */ int rtascall(vm_offset_t callbuffer, uintptr_t rtas_privdat); extern uintptr_t rtas_entry; extern register_t rtasmsr; -int setfault(faultbuf); /* defined in locore.S */ - /* * After the VM is up, allocate RTAS memory and instantiate it */ static void rtas_setup(void *); SYSINIT(rtas_setup, SI_SUB_KMEM, SI_ORDER_ANY, rtas_setup, NULL); static void rtas_setup(void *junk) { ihandle_t rtasi; cell_t rtas_size = 0, rtas_ptr; char path[31]; int result; rtas = OF_finddevice("/rtas"); if (rtas == -1) { rtas = 0; return; } OF_package_to_path(rtas, path, sizeof(path)); mtx_init(&rtas_mtx, "RTAS", NULL, MTX_SPIN); /* RTAS must be called with everything turned off in MSR */ rtasmsr = mfmsr(); rtasmsr &= ~(PSL_IR | PSL_DR | PSL_EE | PSL_SE); #ifdef __powerpc64__ rtasmsr &= ~PSL_SF; #endif /* * Allocate rtas_size + one page of contiguous, wired physical memory * that can fit into a 32-bit address space and accessed from real mode. * This is used both to bounce arguments and for RTAS private data. * * It must be 4KB-aligned and not cross a 256 MB boundary. */ OF_getencprop(rtas, "rtas-size", &rtas_size, sizeof(rtas_size)); rtas_size = round_page(rtas_size); rtas_bounce_virt = contigmalloc(rtas_size + PAGE_SIZE, M_RTAS, 0, 0, ulmin(platform_real_maxaddr(), BUS_SPACE_MAXADDR_32BIT), 4096, 256*1024*1024); rtas_private_data = vtophys(rtas_bounce_virt); rtas_bounce_virt += rtas_size; /* Actual bounce area */ rtas_bounce_phys = vtophys(rtas_bounce_virt); rtas_bounce_size = PAGE_SIZE; /* * Instantiate RTAS. We always use the 32-bit version. */ if (OF_hasprop(rtas, "linux,rtas-entry") && OF_hasprop(rtas, "linux,rtas-base")) { OF_getencprop(rtas, "linux,rtas-base", &rtas_ptr, sizeof(rtas_ptr)); rtas_private_data = rtas_ptr; OF_getencprop(rtas, "linux,rtas-entry", &rtas_ptr, sizeof(rtas_ptr)); } else { rtasi = OF_open(path); if (rtasi == 0) { rtas = 0; printf("Error initializing RTAS: could not open " "node\n"); return; } result = OF_call_method("instantiate-rtas", rtasi, 1, 1, (cell_t)rtas_private_data, &rtas_ptr); OF_close(rtasi); if (result != 0) { rtas = 0; rtas_ptr = 0; printf("Error initializing RTAS (%d)\n", result); return; } } rtas_entry = (uintptr_t)(rtas_ptr); } static cell_t rtas_real_map(const void *buf, size_t len) { cell_t phys; mtx_assert(&rtas_mtx, MA_OWNED); /* * Make sure the bounce page offset satisfies any reasonable * alignment constraint. */ rtas_bounce_offset += sizeof(register_t) - (rtas_bounce_offset % sizeof(register_t)); if (rtas_bounce_offset + len > rtas_bounce_size) { panic("Oversize RTAS call!"); return 0; } if (buf != NULL) memcpy(rtas_bounce_virt + rtas_bounce_offset, buf, len); else return (0); phys = rtas_bounce_phys + rtas_bounce_offset; rtas_bounce_offset += len; return (phys); } static void rtas_real_unmap(cell_t physaddr, void *buf, size_t len) { mtx_assert(&rtas_mtx, MA_OWNED); if (physaddr == 0) return; memcpy(buf, rtas_bounce_virt + (physaddr - rtas_bounce_phys), len); } /* Check if we have RTAS */ int rtas_exists(void) { return (rtas != 0); } /* Call an RTAS method by token */ int rtas_call_method(cell_t token, int nargs, int nreturns, ...) { vm_offset_t argsptr; - faultbuf env, *oldfaultbuf; + jmp_buf env, *oldfaultbuf; va_list ap; struct { cell_t token; cell_t nargs; cell_t nreturns; cell_t args_n_results[12]; } args; int n, result; if (!rtas_exists() || nargs + nreturns > 12) return (-1); args.token = token; va_start(ap, nreturns); mtx_lock_spin(&rtas_mtx); rtas_bounce_offset = 0; args.nargs = nargs; args.nreturns = nreturns; for (n = 0; n < nargs; n++) args.args_n_results[n] = va_arg(ap, cell_t); argsptr = rtas_real_map(&args, sizeof(args)); /* Get rid of any stale machine checks that have been waiting. */ __asm __volatile ("sync; isync"); oldfaultbuf = curthread->td_pcb->pcb_onfault; - if (!setfault(env)) { + curthread->td_pcb->pcb_onfault = &env; + if (!setjmp(env)) { __asm __volatile ("sync"); result = rtascall(argsptr, rtas_private_data); __asm __volatile ("sync; isync"); } else { result = RTAS_HW_ERROR; } curthread->td_pcb->pcb_onfault = oldfaultbuf; __asm __volatile ("sync"); rtas_real_unmap(argsptr, &args, sizeof(args)); mtx_unlock_spin(&rtas_mtx); if (result < 0) return (result); for (n = nargs; n < nargs + nreturns; n++) *va_arg(ap, cell_t *) = args.args_n_results[n]; return (result); } /* Look up an RTAS token */ cell_t rtas_token_lookup(const char *method) { cell_t token; if (!rtas_exists()) return (-1); if (OF_getencprop(rtas, method, &token, sizeof(token)) == -1) return (-1); return (token); } Index: projects/clang380-import/sys/powerpc/powermac/grackle.c =================================================================== --- projects/clang380-import/sys/powerpc/powermac/grackle.c (revision 293686) +++ projects/clang380-import/sys/powerpc/powermac/grackle.c (revision 293687) @@ -1,325 +1,324 @@ /*- * Copyright 2003 by Peter Grehan. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" /* * Device interface. */ static int grackle_probe(device_t); static int grackle_attach(device_t); /* * pcib interface. */ static u_int32_t grackle_read_config(device_t, u_int, u_int, u_int, u_int, int); static void grackle_write_config(device_t, u_int, u_int, u_int, u_int, u_int32_t, int); /* * Local routines. */ static int grackle_enable_config(struct grackle_softc *, u_int, u_int, u_int, u_int); static void grackle_disable_config(struct grackle_softc *); static int badaddr(void *, size_t); -int setfault(faultbuf); /* defined in locore.S */ - /* * Driver methods. */ static device_method_t grackle_methods[] = { /* Device interface */ DEVMETHOD(device_probe, grackle_probe), DEVMETHOD(device_attach, grackle_attach), /* pcib interface */ DEVMETHOD(pcib_read_config, grackle_read_config), DEVMETHOD(pcib_write_config, grackle_write_config), DEVMETHOD_END }; static devclass_t grackle_devclass; DEFINE_CLASS_1(pcib, grackle_driver, grackle_methods, sizeof(struct grackle_softc), ofw_pci_driver); DRIVER_MODULE(grackle, ofwbus, grackle_driver, grackle_devclass, 0, 0); static int grackle_probe(device_t dev) { const char *type, *compatible; type = ofw_bus_get_type(dev); compatible = ofw_bus_get_compat(dev); if (type == NULL || compatible == NULL) return (ENXIO); if (strcmp(type, "pci") != 0 || strcmp(compatible, "grackle") != 0) return (ENXIO); device_set_desc(dev, "MPC106 (Grackle) Host-PCI bridge"); return (0); } static int grackle_attach(device_t dev) { struct grackle_softc *sc; sc = device_get_softc(dev); /* * The Grackle PCI config addr/data registers are actually in * PCI space, but since they are needed to actually probe the * PCI bus, use the fact that they are also available directly * on the processor bus and map them */ sc->sc_addr = (vm_offset_t)pmap_mapdev(GRACKLE_ADDR, PAGE_SIZE); sc->sc_data = (vm_offset_t)pmap_mapdev(GRACKLE_DATA, PAGE_SIZE); return (ofw_pci_attach(dev)); } static u_int32_t grackle_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int width) { struct grackle_softc *sc; vm_offset_t caoff; u_int32_t retval = 0xffffffff; sc = device_get_softc(dev); caoff = sc->sc_data + (reg & 0x03); if (grackle_enable_config(sc, bus, slot, func, reg) != 0) { /* * Config probes to non-existent devices on the * secondary bus generates machine checks. Be sure * to catch these. */ if (bus > 0) { if (badaddr((void *)sc->sc_data, 4)) { return (retval); } } switch (width) { case 1: retval = (in8rb(caoff)); break; case 2: retval = (in16rb(caoff)); break; case 4: retval = (in32rb(caoff)); break; } } grackle_disable_config(sc); return (retval); } static void grackle_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, u_int32_t val, int width) { struct grackle_softc *sc; vm_offset_t caoff; sc = device_get_softc(dev); caoff = sc->sc_data + (reg & 0x03); if (grackle_enable_config(sc, bus, slot, func, reg)) { switch (width) { case 1: out8rb(caoff, val); (void)in8rb(caoff); break; case 2: out16rb(caoff, val); (void)in16rb(caoff); break; case 4: out32rb(caoff, val); (void)in32rb(caoff); break; } } grackle_disable_config(sc); } static int grackle_enable_config(struct grackle_softc *sc, u_int bus, u_int slot, u_int func, u_int reg) { u_int32_t cfgval; /* * Unlike UniNorth, the format of the config word is the same * for local (0) and remote busses. */ cfgval = (bus << 16) | (slot << 11) | (func << 8) | (reg & 0xFC) | GRACKLE_CFG_ENABLE; out32rb(sc->sc_addr, cfgval); (void) in32rb(sc->sc_addr); return (1); } static void grackle_disable_config(struct grackle_softc *sc) { /* * Clear the GRACKLE_CFG_ENABLE bit to prevent stray * accesses from causing config cycles */ out32rb(sc->sc_addr, 0); } static int badaddr(void *addr, size_t size) { struct thread *td; - faultbuf env, *oldfaultbuf; + jmp_buf env, *oldfaultbuf; int x; /* Get rid of any stale machine checks that have been waiting. */ __asm __volatile ("sync; isync"); td = curthread; oldfaultbuf = td->td_pcb->pcb_onfault; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = oldfaultbuf; __asm __volatile ("sync"); return 1; } __asm __volatile ("sync"); switch (size) { case 1: x = *(volatile int8_t *)addr; break; case 2: x = *(volatile int16_t *)addr; break; case 4: x = *(volatile int32_t *)addr; break; default: panic("badaddr: invalid size (%zd)", size); } /* Make sure we took the machine check, if we caused one. */ __asm __volatile ("sync; isync"); td->td_pcb->pcb_onfault = oldfaultbuf; __asm __volatile ("sync"); /* To be sure. */ return (0); } /* * Driver to swallow Grackle host bridges from the PCI bus side. */ static int grackle_hb_probe(device_t dev) { if (pci_get_devid(dev) == 0x00021057) { device_set_desc(dev, "Grackle Host to PCI bridge"); device_quiet(dev); return (0); } return (ENXIO); } static int grackle_hb_attach(device_t dev) { return (0); } static device_method_t grackle_hb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, grackle_hb_probe), DEVMETHOD(device_attach, grackle_hb_attach), { 0, 0 } }; static driver_t grackle_hb_driver = { "grackle_hb", grackle_hb_methods, 1, }; static devclass_t grackle_hb_devclass; DRIVER_MODULE(grackle_hb, pci, grackle_hb_driver, grackle_hb_devclass, 0, 0); Index: projects/clang380-import/sys/powerpc/powerpc/copyinout.c =================================================================== --- projects/clang380-import/sys/powerpc/powerpc/copyinout.c (revision 293686) +++ projects/clang380-import/sys/powerpc/powerpc/copyinout.c (revision 293687) @@ -1,611 +1,621 @@ /*- * Copyright (C) 2002 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1993 Wolfgang Solfrank. * Copyright (C) 1993 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include -int setfault(faultbuf); /* defined in locore.S */ - #ifdef AIM /* * Makes sure that the right segment of userspace is mapped in. */ #ifdef __powerpc64__ static __inline void set_user_sr(pmap_t pm, volatile const void *addr) { struct slb *slb; register_t slbv; /* Try lockless look-up first */ slb = user_va_to_slb_entry(pm, (vm_offset_t)addr); if (slb == NULL) { /* If it isn't there, we need to pre-fault the VSID */ PMAP_LOCK(pm); slbv = va_to_vsid(pm, (vm_offset_t)addr) << SLBV_VSID_SHIFT; PMAP_UNLOCK(pm); } else { slbv = slb->slbv; } /* Mark segment no-execute */ slbv |= SLBV_N; /* If we have already set this VSID, we can just return */ if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv) return; __asm __volatile("isync"); curthread->td_pcb->pcb_cpu.aim.usr_segm = (uintptr_t)addr >> ADDR_SR_SHFT; curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv; __asm __volatile ("slbie %0; slbmte %1, %2; isync" :: "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE)); } #else static __inline void set_user_sr(pmap_t pm, volatile const void *addr) { register_t vsid; vsid = va_to_vsid(pm, (vm_offset_t)addr); /* Mark segment no-execute */ vsid |= SR_N; /* If we have already set this VSID, we can just return */ if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == vsid) return; __asm __volatile("isync"); curthread->td_pcb->pcb_cpu.aim.usr_segm = (uintptr_t)addr >> ADDR_SR_SHFT; curthread->td_pcb->pcb_cpu.aim.usr_vsid = vsid; __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(vsid)); } #endif static __inline int map_user_ptr(pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen) { size_t l; *kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK); l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr); if (l > ulen) l = ulen; if (klen) *klen = l; else if (l != ulen) return (EFAULT); set_user_sr(pm, uaddr); return (0); } #else /* Book-E uses a combined kernel/user mapping */ static __inline int map_user_ptr(pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen) { if ((uintptr_t)uaddr + ulen > VM_MAXUSER_ADDRESS + PAGE_SIZE) return (EFAULT); *kaddr = (void *)(uintptr_t)uaddr; if (klen) *klen = ulen; return (0); } #endif int copyout(const void *kaddr, void *udaddr, size_t len) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; const char *kp; char *up, *p; size_t l; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } kp = kaddr; up = udaddr; while (len > 0) { if (map_user_ptr(pm, udaddr, (void **)&p, len, &l)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } bcopy(kp, p, l); up += l; kp += l; len -= l; } td->td_pcb->pcb_onfault = NULL; return (0); } int copyin(const void *udaddr, void *kaddr, size_t len) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; const char *up; char *kp, *p; size_t l; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } kp = kaddr; up = udaddr; while (len > 0) { if (map_user_ptr(pm, udaddr, (void **)&p, len, &l)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } bcopy(p, kp, l); up += l; kp += l; len -= l; } td->td_pcb->pcb_onfault = NULL; return (0); } int copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done) { const char *up; char *kp; size_t l; int rv, c; kp = kaddr; up = udaddr; rv = ENAMETOOLONG; for (l = 0; len-- > 0; l++) { if ((c = fubyte(up++)) < 0) { rv = EFAULT; break; } if (!(*kp++ = c)) { l++; rv = 0; break; } } if (done != NULL) { *done = l; } return (rv); } int subyte(volatile void *addr, int byte) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; char *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *p = (char)byte; td->td_pcb->pcb_onfault = NULL; return (0); } #ifdef __powerpc64__ int suword32(volatile void *addr, int word) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; int *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *p = word; td->td_pcb->pcb_onfault = NULL; return (0); } #endif int suword(volatile void *addr, long word) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; long *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *p = word; td->td_pcb->pcb_onfault = NULL; return (0); } #ifdef __powerpc64__ int suword64(volatile void *addr, int64_t word) { return (suword(addr, (long)word)); } #else int suword32(volatile void *addr, int32_t word) { return (suword(addr, (long)word)); } #endif int fubyte(volatile const void *addr) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; u_char *p; int val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } val = *p; td->td_pcb->pcb_onfault = NULL; return (val); } int fuword16(volatile const void *addr) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; uint16_t *p, val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } val = *p; td->td_pcb->pcb_onfault = NULL; return (val); } int fueword32(volatile const void *addr, int32_t *val) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; int32_t *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *val = *p; td->td_pcb->pcb_onfault = NULL; return (0); } #ifdef __powerpc64__ int fueword64(volatile const void *addr, int64_t *val) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; int64_t *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *val = *p; td->td_pcb->pcb_onfault = NULL; return (0); } #endif int fueword(volatile const void *addr, long *val) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; long *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *val = *p; td->td_pcb->pcb_onfault = NULL; return (0); } int casueword32(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp, uint32_t new) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; uint32_t *p, val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, (void *)(uintptr_t)addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } __asm __volatile ( "1:\tlwarx %0, 0, %2\n\t" /* load old value */ "cmplw %3, %0\n\t" /* compare */ "bne 2f\n\t" /* exit if not equal */ "stwcx. %4, 0, %2\n\t" /* attempt to store */ "bne- 1b\n\t" /* spin if failed */ "b 3f\n\t" /* we've succeeded */ "2:\n\t" "stwcx. %0, 0, %2\n\t" /* clear reservation (74xx) */ "3:\n\t" : "=&r" (val), "=m" (*p) : "r" (p), "r" (old), "r" (new), "m" (*p) : "cr0", "memory"); td->td_pcb->pcb_onfault = NULL; *oldvalp = val; return (0); } #ifndef __powerpc64__ int casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new) { return (casueword32((volatile uint32_t *)addr, old, (uint32_t *)oldvalp, new)); } #else int casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; u_long *p, val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (map_user_ptr(pm, (void *)(uintptr_t)addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } __asm __volatile ( "1:\tldarx %0, 0, %2\n\t" /* load old value */ "cmpld %3, %0\n\t" /* compare */ "bne 2f\n\t" /* exit if not equal */ "stdcx. %4, 0, %2\n\t" /* attempt to store */ "bne- 1b\n\t" /* spin if failed */ "b 3f\n\t" /* we've succeeded */ "2:\n\t" "stdcx. %0, 0, %2\n\t" /* clear reservation (74xx) */ "3:\n\t" : "=&r" (val), "=m" (*p) : "r" (p), "r" (old), "r" (new), "m" (*p) : "cr0", "memory"); td->td_pcb->pcb_onfault = NULL; *oldvalp = val; return (0); } #endif Index: projects/clang380-import/sys/powerpc/powerpc/elf32_machdep.c =================================================================== --- projects/clang380-import/sys/powerpc/powerpc/elf32_machdep.c (revision 293686) +++ projects/clang380-import/sys/powerpc/powerpc/elf32_machdep.c (revision 293687) @@ -1,320 +1,321 @@ /*- * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __powerpc64__ #include #include extern const char *freebsd32_syscallnames[]; #endif struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, #ifdef __powerpc64__ .sv_table = freebsd32_sysent, #else .sv_table = sysent, #endif .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_stackprot = VM_PROT_ALL, #ifdef __powerpc64__ .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ppc32_setregs, .sv_syscallnames = freebsd32_syscallnames, #else .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_syscallnames = syscallnames, #endif .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, #ifdef __powerpc64__ .interp_newpath = "/libexec/ld-elf32.so.1", #else .interp_newpath = NULL, #endif .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf32_populate_note(NT_PPC_VMX, &pcb->pcb_vec, dst, sizeof(pcb->pcb_vec), NULL); } else len += elf32_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } *off = len; } #ifndef __powerpc64__ /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Half *hwhere; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) ((uintptr_t)relocbase + rela->r_offset); hwhere = (Elf_Half *) ((uintptr_t)relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC_ADDR32: /* word32 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; *where = elf_relocaddr(lf, addr + addend); break; case R_PPC_ADDR16_LO: /* #lo(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = addr & 0xffff; break; case R_PPC_ADDR16_HA: /* #ha(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0)) & 0xffff; break; case R_PPC_RELATIVE: /* word32 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; default: printf("kldload: unexpected relocation type %d\n", (int) rtype); return -1; } return(0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = 0, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } #endif Index: projects/clang380-import/sys/powerpc/powerpc/elf64_machdep.c =================================================================== --- projects/clang380-import/sys/powerpc/powerpc/elf64_machdep.c (revision 293686) +++ projects/clang380-import/sys/powerpc/powerpc/elf64_machdep.c (revision 293687) @@ -1,374 +1,375 @@ /*- * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, u_long stack); struct sysentvec elf64_freebsd_sysvec_v1 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64, .sv_szsigcode = &szsigcode64, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs_funcdesc, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec_v1, &elf64_freebsd_sysvec_v1); struct sysentvec elf64_freebsd_sysvec_v2 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64_elfv2, .sv_szsigcode = &szsigcode64_elfv2, .sv_name = "FreeBSD ELF64 V2", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, }; INIT_SYSENTVEC(elf64_sysvec_v2, &elf64_freebsd_sysvec_v2); static boolean_t ppc64_elfv1_header_match(struct image_params *params); static boolean_t ppc64_elfv2_header_match(struct image_params *params); static Elf64_Brandinfo freebsd_brand_info_elfv1 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv1); static Elf64_Brandinfo freebsd_brand_info_elfv2 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v2, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv2_header_match }; SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv2); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); static boolean_t ppc64_elfv1_header_match(struct image_params *params) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 0 || abi == 1); } static boolean_t ppc64_elfv2_header_match(struct image_params *params) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 2); } static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; register_t entry_desc[3]; tf = trapframe(td); exec_setregs(td, imgp, stack); /* * For 64-bit ELFv1, we need to disentangle the function * descriptor * * 0. entry point * 1. TOC value (r2) * 2. Environment pointer (r11) */ (void)copyin((void *)imgp->entry_addr, entry_desc, sizeof(entry_desc)); tf->srr0 = entry_desc[0] + imgp->reloc_base; tf->fixreg[2] = entry_desc[1] + imgp->reloc_base; tf->fixreg[11] = entry_desc[2] + imgp->reloc_base; } void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf64_populate_note(NT_PPC_VMX, &pcb->pcb_vec, dst, sizeof(pcb->pcb_vec), NULL); } else len += elf64_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } *off = len; } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC64_ADDR64: /* doubleword64 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend; *where = addr; break; case R_PPC_RELATIVE: /* doubleword64 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; case R_PPC_JMP_SLOT: /* function descriptor copy */ lookup(lf, symidx, 1, &addr); #if !defined(_CALL_ELF) || _CALL_ELF == 1 memcpy(where, (Elf_Addr *)addr, 3*sizeof(Elf_Addr)); #else memcpy(where, (Elf_Addr *)addr, sizeof(Elf_Addr)); #endif __asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory"); break; default: printf("kldload: unexpected relocation type %d\n", (int) rtype); return -1; } return(0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = 0, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/clang380-import/sys/powerpc/powerpc/mp_machdep.c =================================================================== --- projects/clang380-import/sys/powerpc/powerpc/mp_machdep.c (revision 293686) +++ projects/clang380-import/sys/powerpc/powerpc/mp_machdep.c (revision 293687) @@ -1,375 +1,374 @@ /*- * Copyright (c) 2008 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pic_if.h" extern struct pcpu __pcpu[MAXCPU]; volatile static int ap_awake; volatile static u_int ap_letgo; volatile static u_quad_t ap_timebase; static u_int ipi_msg_cnt[32]; static struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; -int longfault(faultbuf, int); void machdep_ap_bootstrap(void) { /* Set PIR */ PCPU_SET(pir, mfspr(SPR_PIR)); PCPU_SET(awake, 1); __asm __volatile("msync; isync"); while (ap_letgo == 0) ; /* Initialize DEC and TB, sync with the BSP values */ #ifdef __powerpc64__ /* Writing to the time base register is hypervisor-privileged */ if (mfmsr() & PSL_HV) mttb(ap_timebase); #else mttb(ap_timebase); #endif decr_ap_init(); /* Give platform code a chance to do anything necessary */ platform_smp_ap_init(); /* Serialize console output and AP count increment */ mtx_lock_spin(&ap_boot_mtx); ap_awake++; printf("SMP: AP CPU #%d launched\n", PCPU_GET(cpuid)); mtx_unlock_spin(&ap_boot_mtx); /* Start per-CPU event timers. */ cpu_initclocks_ap(); /* Announce ourselves awake, and enter the scheduler */ sched_throw(NULL); } void cpu_mp_setmaxid(void) { struct cpuref cpuref; int error; mp_ncpus = 0; error = platform_smp_first_cpu(&cpuref); while (!error) { mp_ncpus++; error = platform_smp_next_cpu(&cpuref); } /* Sanity. */ if (mp_ncpus == 0) mp_ncpus = 1; /* * Set the largest cpuid we're going to use. This is necessary * for VM initialization. */ mp_maxid = min(mp_ncpus, MAXCPU) - 1; } int cpu_mp_probe(void) { /* * We're not going to enable SMP if there's only 1 processor. */ return (mp_ncpus > 1); } void cpu_mp_start(void) { struct cpuref bsp, cpu; struct pcpu *pc; int error; error = platform_smp_get_bsp(&bsp); KASSERT(error == 0, ("Don't know BSP")); KASSERT(bsp.cr_cpuid == 0, ("%s: cpuid != 0", __func__)); error = platform_smp_first_cpu(&cpu); while (!error) { if (cpu.cr_cpuid >= MAXCPU) { printf("SMP: cpu%d: skipped -- ID out of range\n", cpu.cr_cpuid); goto next; } if (CPU_ISSET(cpu.cr_cpuid, &all_cpus)) { printf("SMP: cpu%d: skipped - duplicate ID\n", cpu.cr_cpuid); goto next; } if (cpu.cr_cpuid != bsp.cr_cpuid) { void *dpcpu; pc = &__pcpu[cpu.cr_cpuid]; dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); pcpu_init(pc, cpu.cr_cpuid, sizeof(*pc)); dpcpu_init(dpcpu, cpu.cr_cpuid); } else { pc = pcpup; pc->pc_cpuid = bsp.cr_cpuid; pc->pc_bsp = 1; } pc->pc_hwref = cpu.cr_hwref; CPU_SET(pc->pc_cpuid, &all_cpus); next: error = platform_smp_next_cpu(&cpu); } } void cpu_mp_announce(void) { struct pcpu *pc; int i; for (i = 0; i <= mp_maxid; i++) { pc = pcpu_find(i); if (pc == NULL) continue; printf("cpu%d: dev=%x", i, (int)pc->pc_hwref); if (pc->pc_bsp) printf(" (BSP)"); printf("\n"); } } static void cpu_mp_unleash(void *dummy) { struct pcpu *pc; int cpus, timeout; if (mp_ncpus <= 1) return; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); cpus = 0; smp_cpus = 0; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { cpus++; if (!pc->pc_bsp) { if (bootverbose) printf("Waking up CPU %d (dev=%x)\n", pc->pc_cpuid, (int)pc->pc_hwref); platform_smp_start_cpu(pc); timeout = 2000; /* wait 2sec for the AP */ while (!pc->pc_awake && --timeout > 0) DELAY(1000); } else { PCPU_SET(pir, mfspr(SPR_PIR)); pc->pc_awake = 1; } if (pc->pc_awake) { if (bootverbose) printf("Adding CPU %d, pir=%x, awake=%x\n", pc->pc_cpuid, pc->pc_pir, pc->pc_awake); smp_cpus++; } else CPU_SET(pc->pc_cpuid, &stopped_cpus); } ap_awake = 1; /* Provide our current DEC and TB values for APs */ ap_timebase = mftb() + 10; __asm __volatile("msync; isync"); /* Let APs continue */ atomic_store_rel_int(&ap_letgo, 1); #ifdef __powerpc64__ /* Writing to the time base register is hypervisor-privileged */ if (mfmsr() & PSL_HV) mttb(ap_timebase); #else mttb(ap_timebase); #endif while (ap_awake < smp_cpus) ; if (smp_cpus != cpus || cpus != mp_ncpus) { printf("SMP: %d CPUs found; %d CPUs usable; %d CPUs woken\n", mp_ncpus, cpus, smp_cpus); } /* Let the APs get into the scheduler */ DELAY(10000); /* XXX Atomic set operation? */ smp_started = 1; } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL); int powerpc_ipi_handler(void *arg) { u_int cpuid; uint32_t ipimask; int msg; CTR2(KTR_SMP, "%s: MSR 0x%08x", __func__, mfmsr()); ipimask = atomic_readandclear_32(&(pcpup->pc_ipimask)); if (ipimask == 0) return (FILTER_STRAY); while ((msg = ffs(ipimask) - 1) != -1) { ipimask &= ~(1u << msg); ipi_msg_cnt[msg]++; switch (msg) { case IPI_AST: CTR1(KTR_SMP, "%s: IPI_AST", __func__); break; case IPI_PREEMPT: CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); break; case IPI_RENDEZVOUS: CTR1(KTR_SMP, "%s: IPI_RENDEZVOUS", __func__); smp_rendezvous_action(); break; case IPI_STOP: /* * IPI_STOP_HARD is mapped to IPI_STOP so it is not * necessary to add such case in the switch. */ CTR1(KTR_SMP, "%s: IPI_STOP or IPI_STOP_HARD (stop)", __func__); cpuid = PCPU_GET(cpuid); savectx(&stoppcbs[cpuid]); savectx(PCPU_GET(curpcb)); CPU_SET_ATOMIC(cpuid, &stopped_cpus); while (!CPU_ISSET(cpuid, &started_cpus)) cpu_spinwait(); CPU_CLR_ATOMIC(cpuid, &stopped_cpus); CPU_CLR_ATOMIC(cpuid, &started_cpus); CTR1(KTR_SMP, "%s: IPI_STOP (restart)", __func__); break; case IPI_HARDCLOCK: CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); break; } } return (FILTER_HANDLED); } static void ipi_send(struct pcpu *pc, int ipi) { CTR4(KTR_SMP, "%s: pc=%p, targetcpu=%d, IPI=%d", __func__, pc, pc->pc_cpuid, ipi); atomic_set_32(&pc->pc_ipimask, (1 << ipi)); powerpc_sync(); PIC_IPI(root_pic, pc->pc_cpuid); CTR1(KTR_SMP, "%s: sent", __func__); } /* Send an IPI to a set of cpus. */ void ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (CPU_ISSET(pc->pc_cpuid, &cpus)) ipi_send(pc, ipi); } } /* Send an IPI to a specific CPU. */ void ipi_cpu(int cpu, u_int ipi) { ipi_send(cpuid_to_pcpu[cpu], ipi); } /* Send an IPI to all CPUs EXCEPT myself. */ void ipi_all_but_self(int ipi) { struct pcpu *pc; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) ipi_send(pc, ipi); } } Index: projects/clang380-import/sys/powerpc/powerpc/setjmp.S =================================================================== --- projects/clang380-import/sys/powerpc/powerpc/setjmp.S (revision 293686) +++ projects/clang380-import/sys/powerpc/powerpc/setjmp.S (revision 293687) @@ -1,115 +1,114 @@ /* $FreeBSD$ */ /* from: NetBSD: setjmp.S,v 1.1 1998/01/27 15:13:12 sakamoto Exp $ */ /* from: OpenBSD: setjmp.S,v 1.2 1996/12/28 06:22:18 rahnds Exp */ /* kernel version of this file, does not have signal goop */ /* int setjmp(jmp_buf env) */ #include #ifdef __powerpc64__ #define LD_REG ld #define ST_REG std #define REGWIDTH 8 #else #define LD_REG lwz #define ST_REG stw #define REGWIDTH 4 #endif #define JMP_r1 1*REGWIDTH #define JMP_r2 2*REGWIDTH #define JMP_r14 3*REGWIDTH #define JMP_r15 4*REGWIDTH #define JMP_r16 5*REGWIDTH #define JMP_r17 6*REGWIDTH #define JMP_r18 7*REGWIDTH #define JMP_r19 8*REGWIDTH #define JMP_r20 9*REGWIDTH #define JMP_r21 10*REGWIDTH #define JMP_r22 11*REGWIDTH #define JMP_r23 12*REGWIDTH #define JMP_r24 13*REGWIDTH #define JMP_r25 14*REGWIDTH #define JMP_r26 15*REGWIDTH #define JMP_r27 16*REGWIDTH #define JMP_r28 17*REGWIDTH #define JMP_r29 18*REGWIDTH #define JMP_r30 19*REGWIDTH #define JMP_r31 20*REGWIDTH #define JMP_lr 21*REGWIDTH #define JMP_cr 22*REGWIDTH #define JMP_ctr 23*REGWIDTH #define JMP_xer 24*REGWIDTH -#define JMP_sig 25*REGWIDTH ASENTRY_NOPROF(setjmp) ST_REG 31, JMP_r31(3) /* r1, r2, r14-r30 */ ST_REG 1, JMP_r1 (3) ST_REG 2, JMP_r2 (3) ST_REG 14, JMP_r14(3) ST_REG 15, JMP_r15(3) ST_REG 16, JMP_r16(3) ST_REG 17, JMP_r17(3) ST_REG 18, JMP_r18(3) ST_REG 19, JMP_r19(3) ST_REG 20, JMP_r20(3) ST_REG 21, JMP_r21(3) ST_REG 22, JMP_r22(3) ST_REG 23, JMP_r23(3) ST_REG 24, JMP_r24(3) ST_REG 25, JMP_r25(3) ST_REG 26, JMP_r26(3) ST_REG 27, JMP_r27(3) ST_REG 28, JMP_r28(3) ST_REG 29, JMP_r29(3) ST_REG 30, JMP_r30(3) /* cr, lr, ctr, xer */ mfcr 0 ST_REG 0, JMP_cr(3) mflr 0 ST_REG 0, JMP_lr(3) mfctr 0 ST_REG 0, JMP_ctr(3) mfxer 0 ST_REG 0, JMP_xer(3) /* f14-f31, fpscr */ li 3, 0 blr .extern sigsetmask ASENTRY_NOPROF(longjmp) LD_REG 31, JMP_r31(3) /* r1, r2, r14-r30 */ LD_REG 1, JMP_r1 (3) LD_REG 2, JMP_r2 (3) LD_REG 14, JMP_r14(3) LD_REG 15, JMP_r15(3) LD_REG 16, JMP_r16(3) LD_REG 17, JMP_r17(3) LD_REG 18, JMP_r18(3) LD_REG 19, JMP_r19(3) LD_REG 20, JMP_r20(3) LD_REG 21, JMP_r21(3) LD_REG 22, JMP_r22(3) LD_REG 23, JMP_r23(3) LD_REG 24, JMP_r24(3) LD_REG 25, JMP_r25(3) LD_REG 26, JMP_r26(3) LD_REG 27, JMP_r27(3) LD_REG 28, JMP_r28(3) LD_REG 29, JMP_r29(3) LD_REG 30, JMP_r30(3) /* cr, lr, ctr, xer */ LD_REG 0, JMP_cr(3) mtcr 0 LD_REG 0, JMP_lr(3) mtlr 0 LD_REG 0, JMP_ctr(3) mtctr 0 LD_REG 0, JMP_xer(3) mtxer 0 /* f14-f31, fpscr */ mr 3, 4 blr Index: projects/clang380-import/sys/powerpc/powerpc/trap.c =================================================================== --- projects/clang380-import/sys/powerpc/powerpc/trap.c (revision 293686) +++ projects/clang380-import/sys/powerpc/powerpc/trap.c (revision 293687) @@ -1,825 +1,827 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: trap.c,v 1.58 2002/03/04 04:07:35 dbj Exp $ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#define FAULTBUF_LR 0 +/* Below matches setjmp.S */ +#define FAULTBUF_LR 21 #define FAULTBUF_R1 1 #define FAULTBUF_R2 2 -#define FAULTBUF_CR 3 -#define FAULTBUF_R13 4 +#define FAULTBUF_CR 22 +#define FAULTBUF_R14 3 static void trap_fatal(struct trapframe *frame); static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user); static int trap_pfault(struct trapframe *frame, int user); static int fix_unaligned(struct thread *td, struct trapframe *frame); static int handle_onfault(struct trapframe *frame); static void syscall(struct trapframe *frame); #ifdef __powerpc64__ void handle_kernel_slb_spill(int, register_t, register_t); static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr); extern int n_slbs; #endif struct powerpc_exception { u_int vector; char *name; }; #ifdef KDTRACE_HOOKS #include int (*dtrace_invop_jump_addr)(struct trapframe *); #endif static struct powerpc_exception powerpc_exceptions[] = { { EXC_CRIT, "critical input" }, { EXC_RST, "system reset" }, { EXC_MCHK, "machine check" }, { EXC_DSI, "data storage interrupt" }, { EXC_DSE, "data segment exception" }, { EXC_ISI, "instruction storage interrupt" }, { EXC_ISE, "instruction segment exception" }, { EXC_EXI, "external interrupt" }, { EXC_ALI, "alignment" }, { EXC_PGM, "program" }, { EXC_FPU, "floating-point unavailable" }, { EXC_APU, "auxiliary proc unavailable" }, { EXC_DECR, "decrementer" }, { EXC_FIT, "fixed-interval timer" }, { EXC_WDOG, "watchdog timer" }, { EXC_SC, "system call" }, { EXC_TRC, "trace" }, { EXC_FPA, "floating-point assist" }, { EXC_DEBUG, "debug" }, { EXC_PERF, "performance monitoring" }, { EXC_VEC, "altivec unavailable" }, { EXC_VSX, "vsx unavailable" }, { EXC_ITMISS, "instruction tlb miss" }, { EXC_DLMISS, "data load tlb miss" }, { EXC_DSMISS, "data store tlb miss" }, { EXC_BPT, "instruction breakpoint" }, { EXC_SMI, "system management" }, { EXC_VECAST_G4, "altivec assist" }, { EXC_THRM, "thermal management" }, { EXC_RUNMODETRC, "run mode/trace" }, { EXC_LAST, NULL } }; static const char * trapname(u_int vector) { struct powerpc_exception *pe; for (pe = powerpc_exceptions; pe->vector != EXC_LAST; pe++) { if (pe->vector == vector) return (pe->name); } return ("unknown"); } void trap(struct trapframe *frame) { struct thread *td; struct proc *p; #ifdef KDTRACE_HOOKS uint32_t inst; #endif int sig, type, user; u_int ucode; ksiginfo_t ksi; PCPU_INC(cnt.v_trap); td = curthread; p = td->td_proc; type = ucode = frame->exc; sig = 0; user = frame->srr1 & PSL_PR; CTR3(KTR_TRAP, "trap: %s type=%s (%s)", td->td_name, trapname(type), user ? "user" : "kernel"); #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type) != 0) return; #endif if (user) { td->td_pticks = 0; td->td_frame = frame; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); /* User Mode Traps */ switch (type) { case EXC_RUNMODETRC: case EXC_TRC: frame->srr1 &= ~PSL_SE; sig = SIGTRAP; ucode = TRAP_TRACE; break; #ifdef __powerpc64__ case EXC_ISE: case EXC_DSE: if (handle_user_slb_spill(&p->p_vmspace->vm_pmap, (type == EXC_ISE) ? frame->srr0 : frame->dar) != 0){ sig = SIGSEGV; ucode = SEGV_MAPERR; } break; #endif case EXC_DSI: case EXC_ISI: sig = trap_pfault(frame, 1); if (sig == SIGSEGV) ucode = SEGV_MAPERR; break; case EXC_SC: syscall(frame); break; case EXC_FPU: KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU, ("FPU already enabled for thread")); enable_fpu(td); break; case EXC_VEC: KASSERT((td->td_pcb->pcb_flags & PCB_VEC) != PCB_VEC, ("Altivec already enabled for thread")); enable_vec(td); break; case EXC_VSX: KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX, ("VSX already enabled for thread")); if (!(td->td_pcb->pcb_flags & PCB_VEC)) enable_vec(td); if (!(td->td_pcb->pcb_flags & PCB_FPU)) save_fpu(td); td->td_pcb->pcb_flags |= PCB_VSX; enable_fpu(td); break; case EXC_VECAST_E: case EXC_VECAST_G4: case EXC_VECAST_G5: /* * We get a VPU assist exception for IEEE mode * vector operations on denormalized floats. * Emulating this is a giant pain, so for now, * just switch off IEEE mode and treat them as * zero. */ save_vec(td); td->td_pcb->pcb_vec.vscr |= ALTIVEC_VSCR_NJ; enable_vec(td); break; case EXC_ALI: if (fix_unaligned(td, frame) != 0) { sig = SIGBUS; ucode = BUS_ADRALN; } else frame->srr0 += 4; break; case EXC_DEBUG: /* Single stepping */ mtspr(SPR_DBSR, mfspr(SPR_DBSR)); frame->srr1 &= ~PSL_DE; frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM || DBCR0_IC); sig = SIGTRAP; ucode = TRAP_TRACE; break; case EXC_PGM: /* Identify the trap reason */ #ifdef AIM if (frame->srr1 & EXC_PGM_TRAP) { #else if (frame->cpu.booke.esr & ESR_PTR) { #endif #ifdef KDTRACE_HOOKS inst = fuword32((const void *)frame->srr0); if (inst == 0x0FFFDDDD && dtrace_pid_probe_ptr != NULL) { struct reg regs; fill_regs(td, ®s); (*dtrace_pid_probe_ptr)(®s); break; } #endif sig = SIGTRAP; ucode = TRAP_BRKPT; } else { sig = ppc_instr_emulate(frame, td->td_pcb); if (sig == SIGILL) { if (frame->srr1 & EXC_PGM_PRIV) ucode = ILL_PRVOPC; else if (frame->srr1 & EXC_PGM_ILLEGAL) ucode = ILL_ILLOPC; } else if (sig == SIGFPE) ucode = FPE_FLTINV; /* Punt for now, invalid operation. */ } break; case EXC_MCHK: /* * Note that this may not be recoverable for the user * process, depending on the type of machine check, * but it at least prevents the kernel from dying. */ sig = SIGBUS; ucode = BUS_OBJERR; break; default: trap_fatal(frame); } } else { /* Kernel Mode Traps */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { #ifdef KDTRACE_HOOKS case EXC_PGM: if (frame->srr1 & EXC_PGM_TRAP) { if (*(uint32_t *)frame->srr0 == EXC_DTRACE) { if (dtrace_invop_jump_addr != NULL) { dtrace_invop_jump_addr(frame); return; } } } break; #endif #ifdef __powerpc64__ case EXC_DSE: if ((frame->dar & SEGMENT_MASK) == USER_ADDR) { __asm __volatile ("slbmte %0, %1" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); return; } break; #endif case EXC_DSI: if (trap_pfault(frame, 0) == 0) return; break; case EXC_MCHK: if (handle_onfault(frame)) return; break; default: break; } trap_fatal(frame); } if (sig != 0) { if (p->p_sysent->sv_transtrap != NULL) sig = (p->p_sysent->sv_transtrap)(sig, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = (int) ucode; /* XXX, not POSIX */ /* ksi.ksi_addr = ? */ ksi.ksi_trapno = type; trapsignal(td, &ksi); } userret(td, frame); } static void trap_fatal(struct trapframe *frame) { printtrap(frame->exc, frame, 1, (frame->srr1 & PSL_PR)); #ifdef KDB if ((debugger_on_panic || kdb_active) && kdb_trap(frame->exc, 0, frame)) return; #endif panic("%s trap", trapname(frame->exc)); } static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) { uint16_t ver; #ifdef BOOKE vm_paddr_t pa; #endif printf("\n"); printf("%s %s trap:\n", isfatal ? "fatal" : "handled", user ? "user" : "kernel"); printf("\n"); printf(" exception = 0x%x (%s)\n", vector, trapname(vector)); switch (vector) { case EXC_DSE: case EXC_DSI: case EXC_DTMISS: printf(" virtual address = 0x%" PRIxPTR "\n", frame->dar); #ifdef AIM printf(" dsisr = 0x%lx\n", (u_long)frame->cpu.aim.dsisr); #endif break; case EXC_ISE: case EXC_ISI: case EXC_ITMISS: printf(" virtual address = 0x%" PRIxPTR "\n", frame->srr0); break; case EXC_MCHK: ver = mfpvr() >> 16; #if defined(AIM) if (MPC745X_P(ver)) printf(" msssr0 = 0x%lx\n", (u_long)mfspr(SPR_MSSSR0)); #elif defined(BOOKE) pa = mfspr(SPR_MCARU); pa = (pa << 32) | mfspr(SPR_MCAR); printf(" mcsr = 0x%lx\n", (u_long)mfspr(SPR_MCSR)); printf(" mcar = 0x%jx\n", (uintmax_t)pa); #endif break; } #ifdef BOOKE printf(" esr = 0x%" PRIxPTR "\n", frame->cpu.booke.esr); #endif printf(" srr0 = 0x%" PRIxPTR "\n", frame->srr0); printf(" srr1 = 0x%lx\n", (u_long)frame->srr1); printf(" lr = 0x%" PRIxPTR "\n", frame->lr); printf(" curthread = %p\n", curthread); if (curthread != NULL) printf(" pid = %d, comm = %s\n", curthread->td_proc->p_pid, curthread->td_name); printf("\n"); } /* * Handles a fatal fault when we have onfault state to recover. Returns * non-zero if there was onfault recovery state available. */ static int handle_onfault(struct trapframe *frame) { struct thread *td; - faultbuf *fb; + jmp_buf *fb; td = curthread; fb = td->td_pcb->pcb_onfault; if (fb != NULL) { - frame->srr0 = (*fb)[FAULTBUF_LR]; - frame->fixreg[1] = (*fb)[FAULTBUF_R1]; - frame->fixreg[2] = (*fb)[FAULTBUF_R2]; + frame->srr0 = (*fb)->_jb[FAULTBUF_LR]; + frame->fixreg[1] = (*fb)->_jb[FAULTBUF_R1]; + frame->fixreg[2] = (*fb)->_jb[FAULTBUF_R2]; frame->fixreg[3] = 1; - frame->cr = (*fb)[FAULTBUF_CR]; - bcopy(&(*fb)[FAULTBUF_R13], &frame->fixreg[13], - 19 * sizeof(register_t)); + frame->cr = (*fb)->_jb[FAULTBUF_CR]; + bcopy(&(*fb)->_jb[FAULTBUF_R14], &frame->fixreg[14], + 18 * sizeof(register_t)); + td->td_pcb->pcb_onfault = NULL; /* Returns twice, not thrice */ return (1); } return (0); } int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; caddr_t params; size_t argsz; int error, n, i; p = td->td_proc; frame = td->td_frame; sa->code = frame->fixreg[0]; params = (caddr_t)(frame->fixreg + FIRSTARG); n = NARGREG; if (sa->code == SYS_syscall) { /* * code is first argument, * followed by actual args. */ sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, * so as to maintain quad alignment * for the rest of the args. */ if (SV_PROC_FLAG(p, SV_ILP32)) { params += sizeof(register_t); sa->code = *(register_t *) params; params += sizeof(register_t); n -= 2; } else { sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (SV_PROC_FLAG(p, SV_ILP32)) { argsz = sizeof(uint32_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i] & 0xffffffff; } else { argsz = sizeof(uint64_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i]; } if (sa->narg > n) error = copyin(MOREARGS(frame->fixreg[1]), sa->args + n, (sa->narg - n) * argsz); else error = 0; #ifdef __powerpc64__ if (SV_PROC_FLAG(p, SV_ILP32) && sa->narg > n) { /* Expand the size of arguments copied from the stack */ for (i = sa->narg; i >= n; i--) sa->args[i] = ((uint32_t *)(&sa->args[n]))[i-n]; } #endif if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->fixreg[FIRSTARG + 1]; } return (error); } #include "../../kern/subr_syscall.c" void syscall(struct trapframe *frame) { struct thread *td; struct syscall_args sa; int error; td = curthread; td->td_frame = frame; #ifdef __powerpc64__ /* * Speculatively restore last user SLB segment, which we know is * invalid already, since we are likely to do copyin()/copyout(). */ __asm __volatile ("slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #endif error = syscallenter(td, &sa); syscallret(td, error, &sa); } #ifdef __powerpc64__ /* Handle kernel SLB faults -- runs in real mode, all seat belts off */ void handle_kernel_slb_spill(int type, register_t dar, register_t srr0) { struct slb *slbcache; uint64_t slbe, slbv; uint64_t esid, addr; int i; addr = (type == EXC_ISE) ? srr0 : dar; slbcache = PCPU_GET(slb); esid = (uintptr_t)addr >> ADDR_SR_SHFT; slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; /* See if the hardware flushed this somehow (can happen in LPARs) */ for (i = 0; i < n_slbs; i++) if (slbcache[i].slbe == (slbe | (uint64_t)i)) return; /* Not in the map, needs to actually be added */ slbv = kernel_va_to_slbv(addr); if (slbcache[USER_SLB_SLOT].slbe == 0) { for (i = 0; i < n_slbs; i++) { if (i == USER_SLB_SLOT) continue; if (!(slbcache[i].slbe & SLBE_VALID)) goto fillkernslb; } if (i == n_slbs) slbcache[USER_SLB_SLOT].slbe = 1; } /* Sacrifice a random SLB entry that is not the user entry */ i = mftb() % n_slbs; if (i == USER_SLB_SLOT) i = (i+1) % n_slbs; fillkernslb: /* Write new entry */ slbcache[i].slbv = slbv; slbcache[i].slbe = slbe | (uint64_t)i; /* Trap handler will restore from cache on exit */ } static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr) { struct slb *user_entry; uint64_t esid; int i; esid = (uintptr_t)addr >> ADDR_SR_SHFT; PMAP_LOCK(pm); user_entry = user_va_to_slb_entry(pm, addr); if (user_entry == NULL) { /* allocate_vsid auto-spills it */ (void)allocate_user_vsid(pm, esid, 0); } else { /* * Check that another CPU has not already mapped this. * XXX: Per-thread SLB caches would be better. */ for (i = 0; i < pm->pm_slb_len; i++) if (pm->pm_slb[i] == user_entry) break; if (i == pm->pm_slb_len) slb_insert_user(pm, user_entry); } PMAP_UNLOCK(pm); return (0); } #endif static int trap_pfault(struct trapframe *frame, int user) { vm_offset_t eva, va; struct thread *td; struct proc *p; vm_map_t map; vm_prot_t ftype; int rv; #ifdef AIM register_t user_sr; #endif td = curthread; p = td->td_proc; if (frame->exc == EXC_ISI) { eva = frame->srr0; ftype = VM_PROT_EXECUTE; if (frame->srr1 & SRR1_ISI_PFAULT) ftype |= VM_PROT_READ; } else { eva = frame->dar; #ifdef BOOKE if (frame->cpu.booke.esr & ESR_ST) #else if (frame->cpu.aim.dsisr & DSISR_STORE) #endif ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; } if (user) { KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); map = &p->p_vmspace->vm_map; } else { #ifdef BOOKE if (eva < VM_MAXUSER_ADDRESS) { #else if ((eva >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) { #endif map = &p->p_vmspace->vm_map; #ifdef AIM user_sr = td->td_pcb->pcb_cpu.aim.usr_segm; eva &= ADDR_PIDX | ADDR_POFF; eva |= user_sr << ADDR_SR_SHFT; #endif } else { map = kernel_map; } } va = trunc_page(eva); /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ if (rv == KERN_SUCCESS) return (0); if (!user && handle_onfault(frame)) return (0); return (SIGSEGV); } /* * For now, this only deals with the particular unaligned access case * that gcc tends to generate. Eventually it should handle all of the * possibilities that can happen on a 32-bit PowerPC in big-endian mode. */ static int fix_unaligned(struct thread *td, struct trapframe *frame) { struct thread *fputhread; int indicator, reg; double *fpr; indicator = EXC_ALI_OPCODE_INDICATOR(frame->cpu.aim.dsisr); switch (indicator) { case EXC_ALI_LFD: case EXC_ALI_STFD: reg = EXC_ALI_RST(frame->cpu.aim.dsisr); fpr = &td->td_pcb->pcb_fpu.fpr[reg].fpr; fputhread = PCPU_GET(fputhread); /* Juggle the FPU to ensure that we've initialized * the FPRs, and that their current state is in * the PCB. */ if (fputhread != td) { if (fputhread) save_fpu(fputhread); enable_fpu(td); } save_fpu(td); if (indicator == EXC_ALI_LFD) { if (copyin((void *)frame->dar, fpr, sizeof(double)) != 0) return (-1); enable_fpu(td); } else { if (copyout(fpr, (void *)frame->dar, sizeof(double)) != 0) return (-1); } return (0); break; } return (-1); } #ifdef KDB int db_trap_glue(struct trapframe *); /* Called from trap_subr.S */ int db_trap_glue(struct trapframe *frame) { if (!(frame->srr1 & PSL_PR) && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC #ifdef AIM || (frame->exc == EXC_PGM && (frame->srr1 & EXC_PGM_TRAP)) #else || (frame->exc == EXC_DEBUG) #endif || frame->exc == EXC_BPT || frame->exc == EXC_DSI)) { int type = frame->exc; /* Ignore DTrace traps. */ if (*(uint32_t *)frame->srr0 == EXC_DTRACE) return (0); #ifdef AIM if (type == EXC_PGM && (frame->srr1 & EXC_PGM_TRAP)) { #else if (frame->cpu.booke.esr & ESR_PTR) { #endif type = T_BREAKPOINT; } return (kdb_trap(type, 0, frame)); } return (0); } #endif Index: projects/clang380-import/sys/sparc64/sparc64/elf_machdep.c =================================================================== --- projects/clang380-import/sys/sparc64/sparc64/elf_machdep.c (revision 293686) +++ projects/clang380-import/sys/sparc64/sparc64/elf_machdep.c (revision 293687) @@ -1,423 +1,424 @@ /*- * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 2000 Eduardo Horvath. * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Paul Kranenburg. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * from: NetBSD: mdreloc.c,v 1.42 2008/04/28 20:23:04 martin Exp */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linker_if.h" static struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_SPARCV9, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_SPARCV9, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } /* * The following table holds for each relocation type: * - the width in bits of the memory location the relocation * applies to (not currently used) * - the number of bits the relocation value must be shifted to the * right (i.e. discard least significant bits) to fit into * the appropriate field in the instruction word. * - flags indicating whether * * the relocation involves a symbol * * the relocation is relative to the current position * * the relocation is for a GOT entry * * the relocation is relative to the load address * */ #define _RF_S 0x80000000 /* Resolve symbol */ #define _RF_A 0x40000000 /* Use addend */ #define _RF_P 0x20000000 /* Location relative */ #define _RF_G 0x10000000 /* GOT offset */ #define _RF_B 0x08000000 /* Load address relative */ #define _RF_U 0x04000000 /* Unaligned */ #define _RF_X 0x02000000 /* Bare symbols, needs proc */ #define _RF_D 0x01000000 /* Use dynamic TLS offset */ #define _RF_O 0x00800000 /* Use static TLS offset */ #define _RF_I 0x00400000 /* Use TLS object ID */ #define _RF_SZ(s) (((s) & 0xff) << 8) /* memory target size */ #define _RF_RS(s) ( (s) & 0xff) /* right shift */ static const int reloc_target_flags[] = { 0, /* NONE */ _RF_S|_RF_A| _RF_SZ(8) | _RF_RS(0), /* 8 */ _RF_S|_RF_A| _RF_SZ(16) | _RF_RS(0), /* 16 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* 32 */ _RF_S|_RF_A|_RF_P| _RF_SZ(8) | _RF_RS(0), /* DISP_8 */ _RF_S|_RF_A|_RF_P| _RF_SZ(16) | _RF_RS(0), /* DISP_16 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* DISP_32 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP_30 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP_22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(10), /* HI22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 13 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* LO10 */ _RF_G| _RF_SZ(32) | _RF_RS(0), /* GOT10 */ _RF_G| _RF_SZ(32) | _RF_RS(0), /* GOT13 */ _RF_G| _RF_SZ(32) | _RF_RS(10), /* GOT22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* PC10 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(10), /* PC22 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WPLT30 */ _RF_SZ(32) | _RF_RS(0), /* COPY */ _RF_S|_RF_A| _RF_SZ(64) | _RF_RS(0), /* GLOB_DAT */ _RF_SZ(32) | _RF_RS(0), /* JMP_SLOT */ _RF_A| _RF_B| _RF_SZ(64) | _RF_RS(0), /* RELATIVE */ _RF_S|_RF_A| _RF_U| _RF_SZ(32) | _RF_RS(0), /* UA_32 */ _RF_A| _RF_SZ(32) | _RF_RS(0), /* PLT32 */ _RF_A| _RF_SZ(32) | _RF_RS(10), /* HIPLT22 */ _RF_A| _RF_SZ(32) | _RF_RS(0), /* LOPLT10 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* PCPLT32 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(10), /* PCPLT22 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* PCPLT10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 11 */ _RF_S|_RF_A|_RF_X| _RF_SZ(64) | _RF_RS(0), /* 64 */ _RF_S|_RF_A|/*extra*/ _RF_SZ(32) | _RF_RS(0), /* OLO10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(42), /* HH22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(32), /* HM10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(10), /* LM22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(42), /* PC_HH22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(32), /* PC_HM10 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(10), /* PC_LM22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP16 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP19 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* GLOB_JMP */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 7 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 5 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 6 */ _RF_S|_RF_A|_RF_P| _RF_SZ(64) | _RF_RS(0), /* DISP64 */ _RF_A| _RF_SZ(64) | _RF_RS(0), /* PLT64 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(10), /* HIX22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* LOX10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(22), /* H44 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(12), /* M44 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* L44 */ _RF_S|_RF_A| _RF_SZ(64) | _RF_RS(0), /* REGISTER */ _RF_S|_RF_A| _RF_U| _RF_SZ(64) | _RF_RS(0), /* UA64 */ _RF_S|_RF_A| _RF_U| _RF_SZ(16) | _RF_RS(0), /* UA16 */ #if 0 /* TLS */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* GD_HI22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* GD_LO10 */ 0, /* GD_ADD */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* GD_CALL */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* LDM_HI22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* LDM_LO10 */ 0, /* LDM_ADD */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* LDM_CALL */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* LDO_HIX22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* LDO_LOX10 */ 0, /* LDO_ADD */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* IE_HI22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* IE_LO10 */ 0, /* IE_LD */ 0, /* IE_LDX */ 0, /* IE_ADD */ _RF_S|_RF_A| _RF_O| _RF_SZ(32) | _RF_RS(10), /* LE_HIX22 */ _RF_S|_RF_A| _RF_O| _RF_SZ(32) | _RF_RS(0), /* LE_LOX10 */ _RF_S| _RF_I| _RF_SZ(32) | _RF_RS(0), /* DTPMOD32 */ _RF_S| _RF_I| _RF_SZ(64) | _RF_RS(0), /* DTPMOD64 */ _RF_S|_RF_A| _RF_D| _RF_SZ(32) | _RF_RS(0), /* DTPOFF32 */ _RF_S|_RF_A| _RF_D| _RF_SZ(64) | _RF_RS(0), /* DTPOFF64 */ _RF_S|_RF_A| _RF_O| _RF_SZ(32) | _RF_RS(0), /* TPOFF32 */ _RF_S|_RF_A| _RF_O| _RF_SZ(64) | _RF_RS(0) /* TPOFF64 */ #endif }; #if 0 static const char *const reloc_names[] = { "NONE", "8", "16", "32", "DISP_8", "DISP_16", "DISP_32", "WDISP_30", "WDISP_22", "HI22", "22", "13", "LO10", "GOT10", "GOT13", "GOT22", "PC10", "PC22", "WPLT30", "COPY", "GLOB_DAT", "JMP_SLOT", "RELATIVE", "UA_32", "PLT32", "HIPLT22", "LOPLT10", "LOPLT10", "PCPLT22", "PCPLT32", "10", "11", "64", "OLO10", "HH22", "HM10", "LM22", "PC_HH22", "PC_HM10", "PC_LM22", "WDISP16", "WDISP19", "GLOB_JMP", "7", "5", "6", "DISP64", "PLT64", "HIX22", "LOX10", "H44", "M44", "L44", "REGISTER", "UA64", "UA16", "GD_HI22", "GD_LO10", "GD_ADD", "GD_CALL", "LDM_HI22", "LDMO10", "LDM_ADD", "LDM_CALL", "LDO_HIX22", "LDO_LOX10", "LDO_ADD", "IE_HI22", "IE_LO10", "IE_LD", "IE_LDX", "IE_ADD", "LE_HIX22", "LE_LOX10", "DTPMOD32", "DTPMOD64", "DTPOFF32", "DTPOFF64", "TPOFF32", "TPOFF64" }; #endif #define RELOC_RESOLVE_SYMBOL(t) ((reloc_target_flags[t] & _RF_S) != 0) #define RELOC_PC_RELATIVE(t) ((reloc_target_flags[t] & _RF_P) != 0) #define RELOC_BASE_RELATIVE(t) ((reloc_target_flags[t] & _RF_B) != 0) #define RELOC_UNALIGNED(t) ((reloc_target_flags[t] & _RF_U) != 0) #define RELOC_USE_ADDEND(t) ((reloc_target_flags[t] & _RF_A) != 0) #define RELOC_BARE_SYMBOL(t) ((reloc_target_flags[t] & _RF_X) != 0) #define RELOC_USE_TLS_DOFF(t) ((reloc_target_flags[t] & _RF_D) != 0) #define RELOC_USE_TLS_OFF(t) ((reloc_target_flags[t] & _RF_O) != 0) #define RELOC_USE_TLS_ID(t) ((reloc_target_flags[t] & _RF_I) != 0) #define RELOC_TARGET_SIZE(t) ((reloc_target_flags[t] >> 8) & 0xff) #define RELOC_VALUE_RIGHTSHIFT(t) (reloc_target_flags[t] & 0xff) static const long reloc_target_bitmask[] = { #define _BM(x) (~(-(1ULL << (x)))) 0, /* NONE */ _BM(8), _BM(16), _BM(32), /* 8, 16, 32 */ _BM(8), _BM(16), _BM(32), /* DISP8, DISP16, DISP32 */ _BM(30), _BM(22), /* WDISP30, WDISP22 */ _BM(22), _BM(22), /* HI22, 22 */ _BM(13), _BM(10), /* 13, LO10 */ _BM(10), _BM(13), _BM(22), /* GOT10, GOT13, GOT22 */ _BM(10), _BM(22), /* PC10, PC22 */ _BM(30), 0, /* WPLT30, COPY */ _BM(32), _BM(32), _BM(32), /* GLOB_DAT, JMP_SLOT, RELATIVE */ _BM(32), _BM(32), /* UA32, PLT32 */ _BM(22), _BM(10), /* HIPLT22, LOPLT10 */ _BM(32), _BM(22), _BM(10), /* PCPLT32, PCPLT22, PCPLT10 */ _BM(10), _BM(11), -1, /* 10, 11, 64 */ _BM(13), _BM(22), /* OLO10, HH22 */ _BM(10), _BM(22), /* HM10, LM22 */ _BM(22), _BM(10), _BM(22), /* PC_HH22, PC_HM10, PC_LM22 */ _BM(16), _BM(19), /* WDISP16, WDISP19 */ -1, /* GLOB_JMP */ _BM(7), _BM(5), _BM(6), /* 7, 5, 6 */ -1, -1, /* DISP64, PLT64 */ _BM(22), _BM(13), /* HIX22, LOX10 */ _BM(22), _BM(10), _BM(13), /* H44, M44, L44 */ -1, -1, _BM(16), /* REGISTER, UA64, UA16 */ #if 0 _BM(22), _BM(10), 0, _BM(30), /* GD_HI22, GD_LO10, GD_ADD, GD_CALL */ _BM(22), _BM(10), 0, /* LDM_HI22, LDMO10, LDM_ADD */ _BM(30), /* LDM_CALL */ _BM(22), _BM(10), 0, /* LDO_HIX22, LDO_LOX10, LDO_ADD */ _BM(22), _BM(10), 0, 0, /* IE_HI22, IE_LO10, IE_LD, IE_LDX */ 0, /* IE_ADD */ _BM(22), _BM(13), /* LE_HIX22, LE_LOX10 */ _BM(32), -1, /* DTPMOD32, DTPMOD64 */ _BM(32), -1, /* DTPOFF32, DTPOFF64 */ _BM(32), -1 /* TPOFF32, TPOFF64 */ #endif #undef _BM }; #define RELOC_VALUE_BITMASK(t) (reloc_target_bitmask[t]) int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup __unused) { const Elf_Rela *rela; Elf_Addr *where; if (type != ELF_RELOC_RELA) return (-1); rela = (const Elf_Rela *)data; if (ELF64_R_TYPE_ID(rela->r_info) != R_SPARC_RELATIVE) return (-1); where = (Elf_Addr *)(relocbase + rela->r_offset); *where = elf_relocaddr(lf, rela->r_addend + relocbase); return (0); } /* Process one elf relocation with addend. */ int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { const Elf_Rela *rela; Elf_Word *where32; Elf_Addr *where; Elf_Size rtype, symidx; Elf_Addr value; Elf_Addr mask; Elf_Addr addr; int error; if (type != ELF_RELOC_RELA) return (-1); rela = (const Elf_Rela *)data; where = (Elf_Addr *)(relocbase + rela->r_offset); where32 = (Elf_Word *)where; rtype = ELF64_R_TYPE_ID(rela->r_info); symidx = ELF_R_SYM(rela->r_info); if (rtype == R_SPARC_NONE || rtype == R_SPARC_RELATIVE) return (0); if (rtype == R_SPARC_JMP_SLOT || rtype == R_SPARC_COPY || rtype >= sizeof(reloc_target_bitmask) / sizeof(*reloc_target_bitmask)) { printf("kldload: unexpected relocation type %ld\n", rtype); return (-1); } if (RELOC_UNALIGNED(rtype)) { printf("kldload: unaligned relocation type %ld\n", rtype); return (-1); } value = rela->r_addend; if (RELOC_RESOLVE_SYMBOL(rtype)) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); value += addr; if (RELOC_BARE_SYMBOL(rtype)) value = elf_relocaddr(lf, value); } if (rtype == R_SPARC_OLO10) value = (value & 0x3ff) + ELF64_R_TYPE_DATA(rela->r_info); if (rtype == R_SPARC_HIX22) value ^= 0xffffffffffffffff; if (RELOC_PC_RELATIVE(rtype)) value -= (Elf_Addr)where; if (RELOC_BASE_RELATIVE(rtype)) value = elf_relocaddr(lf, value + relocbase); mask = RELOC_VALUE_BITMASK(rtype); value >>= RELOC_VALUE_RIGHTSHIFT(rtype); value &= mask; if (rtype == R_SPARC_LOX10) value |= 0x1c00; if (RELOC_TARGET_SIZE(rtype) > 32) { *where &= ~mask; *where |= value; } else { *where32 &= ~mask; *where32 |= value; } return (0); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/clang380-import/sys/sys/mbuf.h =================================================================== --- projects/clang380-import/sys/sys/mbuf.h (revision 293686) +++ projects/clang380-import/sys/sys/mbuf.h (revision 293687) @@ -1,1311 +1,1313 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 * $FreeBSD$ */ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ /* XXX: These includes suck. Sorry! */ #include #ifdef _KERNEL #include #include #ifdef WITNESS #include #endif #endif /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in * sys/param.h), which has no additional overhead and is used instead of the * internal data area; this is done when at least MINCLSIZE of data must be * stored. Additionally, it is possible to allocate a separate buffer * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. * * NB: These calculation do not take actual compiler-induced alignment and * padding inside the complete struct mbuf into account. Appropriate * attention is required when changing members of struct mbuf. * * MLEN is data length in a normal mbuf. * MHLEN is data length in an mbuf with pktheader. * MINCLSIZE is a smallest amount of data that should be put into cluster. * * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are sensible. */ struct mbuf; #define MHSIZE offsetof(struct mbuf, m_dat) #define MPKTHSIZE offsetof(struct mbuf, m_pktdat) #define MLEN ((int)(MSIZE - MHSIZE)) #define MHLEN ((int)(MSIZE - MPKTHSIZE)) #define MINCLSIZE (MHLEN + 1) #ifdef _KERNEL /*- * Macro for type conversion: convert mbuf pointer to data pointer of correct * type: * * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. * mtodo(m, o) -- Same as above but with offset 'o' into data. */ #define mtod(m, t) ((t)((m)->m_data)) #define mtodo(m, o) ((void *)(((m)->m_data) + (o))) /* * Argument structure passed to UMA routines during mbuf and packet * allocations. */ struct mb_args { int flags; /* Flags for mbuf being allocated */ short type; /* Type of mbuf being allocated */ }; #endif /* _KERNEL */ /* * Packet tag structure (see below for details). */ struct m_tag { SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ u_int16_t m_tag_id; /* Tag ID */ u_int16_t m_tag_len; /* Length of data */ u_int32_t m_tag_cookie; /* ABI/Module ID */ void (*m_tag_free)(struct m_tag *); }; /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. * Size ILP32: 48 * LP64: 56 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ struct pkthdr { struct ifnet *rcvif; /* rcv interface */ SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ int32_t len; /* total packet length */ /* Layer crossing persistent information. */ uint32_t flowid; /* packet's 4-tuple system */ uint64_t csum_flags; /* checksum and offload features */ uint16_t fibnum; /* this packet should use this fib */ uint8_t cosqos; /* class/quality of service */ uint8_t rsstype; /* hash type */ uint8_t l2hlen; /* layer 2 header length */ uint8_t l3hlen; /* layer 3 header length */ uint8_t l4hlen; /* layer 4 header length */ uint8_t l5hlen; /* layer 5 header length */ union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_per; /* Layer specific non-persistent local storage for reassembly, etc. */ union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_loc; }; #define ether_vtag PH_per.sixteen[0] #define PH_vt PH_per #define vt_nrecs sixteen[0] #define tso_segsz PH_per.sixteen[1] #define csum_phsum PH_per.sixteen[2] #define csum_data PH_per.thirtytwo[1] /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. * Size ILP32: 28 * LP64: 48 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ struct m_ext { volatile u_int *ext_cnt; /* pointer to ref count info */ caddr_t ext_buf; /* start of buffer */ uint32_t ext_size; /* size of buffer, for ext_free */ uint32_t ext_type:8, /* type of external storage */ ext_flags:24; /* external storage mbuf flags */ void (*ext_free) /* free routine if not the usual */ (struct mbuf *, void *, void *); void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ }; /* * The core of the mbuf object along with some shortcut defines for practical * purposes. */ struct mbuf { /* * Header present at the beginning of every mbuf. * Size ILP32: 24 * LP64: 32 * Compile-time assertions in uipc_mbuf.c test these values to ensure * that they are correct. */ union { /* next buffer in chain */ struct mbuf *m_next; SLIST_ENTRY(mbuf) m_slist; STAILQ_ENTRY(mbuf) m_stailq; }; union { /* next chain in queue/record */ struct mbuf *m_nextpkt; SLIST_ENTRY(mbuf) m_slistpkt; STAILQ_ENTRY(mbuf) m_stailqpkt; }; caddr_t m_data; /* location of data */ int32_t m_len; /* amount of data in this mbuf */ uint32_t m_type:8, /* type of data in this mbuf */ m_flags:24; /* flags; see below */ #if !defined(__LP64__) uint32_t m_pad; /* pad for 64bit alignment */ #endif /* * A set of optional headers (packet header, external storage header) * and internal data storage. Historically, these arrays were sized * to MHLEN (space left after a packet header) and MLEN (space left * after only a regular mbuf header); they are now variable size in * order to support future work on variable-size mbufs. */ union { struct { struct pkthdr m_pkthdr; /* M_PKTHDR set */ union { struct m_ext m_ext; /* M_EXT set */ char m_pktdat[0]; }; }; char m_dat[0]; /* !M_PKTHDR, !M_EXT */ }; }; /* * mbuf flags of global significance and layer crossing. * Those of only protocol/layer specific significance are to be mapped * to M_PROTO[1-12] and cleared at layer handoff boundaries. * NB: Limited to the lower 24 bits. */ #define M_EXT 0x00000001 /* has associated external storage */ #define M_PKTHDR 0x00000002 /* start of record */ #define M_EOR 0x00000004 /* end of record */ #define M_RDONLY 0x00000008 /* associated data is marked read-only */ #define M_BCAST 0x00000010 /* send/received as link-level broadcast */ #define M_MCAST 0x00000020 /* send/received as link-level multicast */ #define M_PROMISC 0x00000040 /* packet was not for us */ #define M_VLANTAG 0x00000080 /* ether_vtag is valid */ #define M_UNUSED_8 0x00000100 /* --available-- */ #define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */ #define M_PROTO1 0x00001000 /* protocol-specific */ #define M_PROTO2 0x00002000 /* protocol-specific */ #define M_PROTO3 0x00004000 /* protocol-specific */ #define M_PROTO4 0x00008000 /* protocol-specific */ #define M_PROTO5 0x00010000 /* protocol-specific */ #define M_PROTO6 0x00020000 /* protocol-specific */ #define M_PROTO7 0x00040000 /* protocol-specific */ #define M_PROTO8 0x00080000 /* protocol-specific */ #define M_PROTO9 0x00100000 /* protocol-specific */ #define M_PROTO10 0x00200000 /* protocol-specific */ #define M_PROTO11 0x00400000 /* protocol-specific */ #define M_PROTO12 0x00800000 /* protocol-specific */ /* * Flags to purge when crossing layers. */ #define M_PROTOFLAGS \ (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\ M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12) /* * Flags preserved when copying m_pkthdr. */ #define M_COPYFLAGS \ (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \ M_PROTOFLAGS) /* * Mbuf flag description for use with printf(9) %b identifier. */ #define M_FLAG_BITS \ "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \ "\7M_PROMISC\10M_VLANTAG" #define M_FLAG_PROTOBITS \ "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \ "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \ "\27M_PROTO11\30M_PROTO12" #define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS) /* * Network interface cards are able to hash protocol fields (such as IPv4 * addresses and TCP port numbers) classify packets into flows. These flows * can then be used to maintain ordering while delivering packets to the OS * via parallel input queues, as well as to provide a stateless affinity * model. NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set * m_flag fields to indicate how the hash should be interpreted by the * network stack. * * Most NICs support RSS, which provides ordering and explicit affinity, and * use the hash m_flag bits to indicate what header fields were covered by * the hash. M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations * that provide an opaque flow identifier, allowing for ordering and * distribution without explicit affinity. */ /* Microsoft RSS standard hash types */ #define M_HASHTYPE_NONE 0 #define M_HASHTYPE_RSS_IPV4 1 /* IPv4 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV4 2 /* TCPv4 4-tuple */ #define M_HASHTYPE_RSS_IPV6 3 /* IPv6 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV6 4 /* TCPv6 4-tuple */ #define M_HASHTYPE_RSS_IPV6_EX 5 /* IPv6 2-tuple + ext hdrs */ #define M_HASHTYPE_RSS_TCP_IPV6_EX 6 /* TCPv6 4-tiple + ext hdrs */ /* Non-standard RSS hash types */ #define M_HASHTYPE_RSS_UDP_IPV4 7 /* IPv4 UDP 4-tuple */ #define M_HASHTYPE_RSS_UDP_IPV4_EX 8 /* IPv4 UDP 4-tuple + ext hdrs */ #define M_HASHTYPE_RSS_UDP_IPV6 9 /* IPv6 UDP 4-tuple */ #define M_HASHTYPE_RSS_UDP_IPV6_EX 10 /* IPv6 UDP 4-tuple + ext hdrs */ #define M_HASHTYPE_OPAQUE 255 /* ordering, not affinity */ #define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0) #define M_HASHTYPE_GET(m) ((m)->m_pkthdr.rsstype) #define M_HASHTYPE_SET(m, v) ((m)->m_pkthdr.rsstype = (v)) #define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v)) /* * COS/QOS class and quality of service tags. * It uses DSCP code points as base. */ #define QOS_DSCP_CS0 0x00 #define QOS_DSCP_DEF QOS_DSCP_CS0 #define QOS_DSCP_CS1 0x20 #define QOS_DSCP_AF11 0x28 #define QOS_DSCP_AF12 0x30 #define QOS_DSCP_AF13 0x38 #define QOS_DSCP_CS2 0x40 #define QOS_DSCP_AF21 0x48 #define QOS_DSCP_AF22 0x50 #define QOS_DSCP_AF23 0x58 #define QOS_DSCP_CS3 0x60 #define QOS_DSCP_AF31 0x68 #define QOS_DSCP_AF32 0x70 #define QOS_DSCP_AF33 0x78 #define QOS_DSCP_CS4 0x80 #define QOS_DSCP_AF41 0x88 #define QOS_DSCP_AF42 0x90 #define QOS_DSCP_AF43 0x98 #define QOS_DSCP_CS5 0xa0 #define QOS_DSCP_EF 0xb8 #define QOS_DSCP_CS6 0xc0 #define QOS_DSCP_CS7 0xe0 /* * External mbuf storage buffer types. */ #define EXT_CLUSTER 1 /* mbuf cluster */ -#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ +#define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */ #define EXT_JUMBOP 3 /* jumbo cluster page sized */ #define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */ +#define EXT_SFBUF_NOCACHE 8 /* sendfile(2)'s sf_buf not to be cached */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ #define EXT_VENDOR3 226 /* for vendor-internal use */ #define EXT_VENDOR4 227 /* for vendor-internal use */ #define EXT_EXP1 244 /* for experimental use */ #define EXT_EXP2 245 /* for experimental use */ #define EXT_EXP3 246 /* for experimental use */ #define EXT_EXP4 247 /* for experimental use */ #define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 253 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */ #define EXT_EXTREF 255 /* has externally maintained ext_cnt ptr */ /* * Flags for external mbuf buffer types. * NB: limited to the lower 24 bits. */ #define EXT_FLAG_EMBREF 0x000001 /* embedded ext_cnt, notyet */ #define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */ #define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */ #define EXT_FLAG_VENDOR1 0x010000 /* for vendor-internal use */ #define EXT_FLAG_VENDOR2 0x020000 /* for vendor-internal use */ #define EXT_FLAG_VENDOR3 0x040000 /* for vendor-internal use */ #define EXT_FLAG_VENDOR4 0x080000 /* for vendor-internal use */ #define EXT_FLAG_EXP1 0x100000 /* for experimental use */ #define EXT_FLAG_EXP2 0x200000 /* for experimental use */ #define EXT_FLAG_EXP3 0x400000 /* for experimental use */ #define EXT_FLAG_EXP4 0x800000 /* for experimental use */ /* * EXT flag description for use with printf(9) %b identifier. */ #define EXT_FLAG_BITS \ "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \ "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \ "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \ "\30EXT_FLAG_EXP4" /* * External reference/free functions. */ void sf_ext_ref(void *, void *); void sf_ext_free(void *, void *); +void sf_ext_free_nocache(void *, void *); /* * Flags indicating checksum, segmentation and other offload work to be * done, or already done, by hardware or lower layers. It is split into * separate inbound and outbound flags. * * Outbound flags that are set by upper protocol layers requesting lower * layers, or ideally the hardware, to perform these offloading tasks. * For outbound packets this field and its flags can be directly tested * against ifnet if_hwassist. */ #define CSUM_IP 0x00000001 /* IP header checksum offload */ #define CSUM_IP_UDP 0x00000002 /* UDP checksum offload */ #define CSUM_IP_TCP 0x00000004 /* TCP checksum offload */ #define CSUM_IP_SCTP 0x00000008 /* SCTP checksum offload */ #define CSUM_IP_TSO 0x00000010 /* TCP segmentation offload */ #define CSUM_IP_ISCSI 0x00000020 /* iSCSI checksum offload */ #define CSUM_IP6_UDP 0x00000200 /* UDP checksum offload */ #define CSUM_IP6_TCP 0x00000400 /* TCP checksum offload */ #define CSUM_IP6_SCTP 0x00000800 /* SCTP checksum offload */ #define CSUM_IP6_TSO 0x00001000 /* TCP segmentation offload */ #define CSUM_IP6_ISCSI 0x00002000 /* iSCSI checksum offload */ /* Inbound checksum support where the checksum was verified by hardware. */ #define CSUM_L3_CALC 0x01000000 /* calculated layer 3 csum */ #define CSUM_L3_VALID 0x02000000 /* checksum is correct */ #define CSUM_L4_CALC 0x04000000 /* calculated layer 4 csum */ #define CSUM_L4_VALID 0x08000000 /* checksum is correct */ #define CSUM_L5_CALC 0x10000000 /* calculated layer 5 csum */ #define CSUM_L5_VALID 0x20000000 /* checksum is correct */ #define CSUM_COALESED 0x40000000 /* contains merged segments */ /* * CSUM flag description for use with printf(9) %b identifier. */ #define CSUM_BITS \ "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \ "\6CSUM_IP_ISCSI" \ "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \ "\16CSUM_IP6_ISCSI" \ "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESED" /* CSUM flags compatibility mappings. */ #define CSUM_IP_CHECKED CSUM_L3_CALC #define CSUM_IP_VALID CSUM_L3_VALID #define CSUM_DATA_VALID CSUM_L4_VALID #define CSUM_PSEUDO_HDR CSUM_L4_CALC #define CSUM_SCTP_VALID CSUM_L4_VALID #define CSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP) #define CSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */ #define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6) #define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID #define CSUM_TCP CSUM_IP_TCP #define CSUM_UDP CSUM_IP_UDP #define CSUM_SCTP CSUM_IP_SCTP #define CSUM_TSO (CSUM_IP_TSO|CSUM_IP6_TSO) #define CSUM_UDP_IPV6 CSUM_IP6_UDP #define CSUM_TCP_IPV6 CSUM_IP6_TCP #define CSUM_SCTP_IPV6 CSUM_IP6_SCTP /* * mbuf types describing the content of the mbuf (including external storage). */ #define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ #define MT_VENDOR1 4 /* for vendor-internal use */ #define MT_VENDOR2 5 /* for vendor-internal use */ #define MT_VENDOR3 6 /* for vendor-internal use */ #define MT_VENDOR4 7 /* for vendor-internal use */ #define MT_SONAME 8 /* socket name */ #define MT_EXP1 9 /* for experimental use */ #define MT_EXP2 10 /* for experimental use */ #define MT_EXP3 11 /* for experimental use */ #define MT_EXP4 12 /* for experimental use */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ #define MT_NOINIT 255 /* Not a type but a flag to allocate a non-initialized mbuf */ /* * String names of mbuf-related UMA(9) and malloc(9) types. Exposed to * !_KERNEL so that monitoring tools can look up the zones with * libmemstat(3). */ #define MBUF_MEM_NAME "mbuf" #define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" #define MBUF_PACKET_MEM_NAME "mbuf_packet" #define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" #define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" #define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" #define MBUF_TAG_MEM_NAME "mbuf_tag" #define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" #ifdef _KERNEL #ifdef WITNESS #define MBUF_CHECKSLEEP(how) do { \ if (how == M_WAITOK) \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ "Sleeping in \"%s\"", __func__); \ } while (0) #else #define MBUF_CHECKSLEEP(how) #endif /* * Network buffer allocation API * * The rest of it is defined in kern/kern_mbuf.c */ extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; extern uma_zone_t zone_pack; extern uma_zone_t zone_jumbop; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; extern uma_zone_t zone_ext_refcnt; void mb_dupcl(struct mbuf *, const struct mbuf *); void mb_free_ext(struct mbuf *); int m_pkthdr_init(struct mbuf *, int); static __inline int m_gettype(int size) { int type; switch (size) { case MSIZE: type = EXT_MBUF; break; case MCLBYTES: type = EXT_CLUSTER; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: type = EXT_JUMBOP; break; #endif case MJUM9BYTES: type = EXT_JUMBO9; break; case MJUM16BYTES: type = EXT_JUMBO16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (type); } /* * Associated an external reference counted buffer with an mbuf. */ static __inline void m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt, void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2) { KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__)); atomic_add_int(ref_cnt, 1); m->m_flags |= M_EXT; m->m_ext.ext_buf = buf; m->m_ext.ext_cnt = ref_cnt; m->m_data = m->m_ext.ext_buf; m->m_ext.ext_size = size; m->m_ext.ext_free = freef; m->m_ext.ext_arg1 = arg1; m->m_ext.ext_arg2 = arg2; m->m_ext.ext_type = EXT_EXTREF; m->m_ext.ext_flags = 0; } static __inline uma_zone_t m_getzone(int size) { uma_zone_t zone; switch (size) { case MCLBYTES: zone = zone_clust; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: zone = zone_jumbop; break; #endif case MJUM9BYTES: zone = zone_jumbo9; break; case MJUM16BYTES: zone = zone_jumbo16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (zone); } /* * Initialize an mbuf with linear storage. * * Inline because the consumer text overhead will be roughly the same to * initialize or call a function with this many parameters and M_PKTHDR * should go away with constant propagation for !MGETHDR. */ static __inline int m_init(struct mbuf *m, uma_zone_t zone __unused, int size __unused, int how, short type, int flags) { int error; m->m_next = NULL; m->m_nextpkt = NULL; m->m_data = m->m_dat; m->m_len = 0; m->m_flags = flags; m->m_type = type; if (flags & M_PKTHDR) { if ((error = m_pkthdr_init(m, how)) != 0) return (error); } return (0); } static __inline struct mbuf * m_get(int how, short type) { struct mb_args args; args.flags = 0; args.type = type; return (uma_zalloc_arg(zone_mbuf, &args, how)); } /* * XXX This should be deprecated, very little use. */ static __inline struct mbuf * m_getclr(int how, short type) { struct mbuf *m; struct mb_args args; args.flags = 0; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); if (m != NULL) bzero(m->m_data, MLEN); return (m); } static __inline struct mbuf * m_gethdr(int how, short type) { struct mb_args args; args.flags = M_PKTHDR; args.type = type; return (uma_zalloc_arg(zone_mbuf, &args, how)); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { struct mb_args args; args.flags = flags; args.type = type; return (uma_zalloc_arg(zone_pack, &args, how)); } static __inline int m_clget(struct mbuf *m, int how) { KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", __func__, m)); m->m_ext.ext_buf = (char *)NULL; uma_zalloc_arg(zone_clust, m, how); /* * On a cluster allocation failure, drain the packet zone and retry, * we might be able to loosen a few clusters up on the drain. */ if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { zone_drain(zone_pack); uma_zalloc_arg(zone_clust, m, how); } return (m->m_flags & M_EXT); } /* * m_cljget() is different from m_clget() as it can allocate clusters without * attaching them to an mbuf. In that case the return value is the pointer * to the cluster of the requested size. If an mbuf was specified, it gets * the cluster attached to it and the return value can be safely ignored. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. */ static __inline void * m_cljget(struct mbuf *m, int how, int size) { uma_zone_t zone; if (m != NULL) { KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", __func__, m)); m->m_ext.ext_buf = NULL; } zone = m_getzone(size); return (uma_zalloc_arg(zone, m, how)); } static __inline void m_cljset(struct mbuf *m, void *cl, int type) { uma_zone_t zone; int size; switch (type) { case EXT_CLUSTER: size = MCLBYTES; zone = zone_clust; break; #if MJUMPAGESIZE != MCLBYTES case EXT_JUMBOP: size = MJUMPAGESIZE; zone = zone_jumbop; break; #endif case EXT_JUMBO9: size = MJUM9BYTES; zone = zone_jumbo9; break; case EXT_JUMBO16: size = MJUM16BYTES; zone = zone_jumbo16; break; default: panic("%s: unknown cluster type %d", __func__, type); break; } m->m_data = m->m_ext.ext_buf = cl; m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; m->m_ext.ext_flags = 0; m->m_ext.ext_cnt = uma_find_refcnt(zone, cl); m->m_flags |= M_EXT; } static __inline void m_chtype(struct mbuf *m, short new_type) { m->m_type = new_type; } static __inline void m_clrprotoflags(struct mbuf *m) { while (m) { m->m_flags &= ~M_PROTOFLAGS; m = m->m_next; } } static __inline struct mbuf * m_last(struct mbuf *m) { while (m->m_next) m = m->m_next; return (m); } /* * mbuf, cluster, and external object allocation macros (for compatibility * purposes). */ #define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) #define MGET(m, how, type) ((m) = m_get((how), (type))) #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) #define MCLGET(m, how) m_clget((m), (how)) #define MEXTADD(m, buf, size, free, arg1, arg2, flags, type) \ (void )m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2),\ (flags), (type), M_NOWAIT) #define m_getm(m, len, how, type) \ m_getm2((m), (len), (how), (type), M_PKTHDR) /* * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ (!(((m)->m_flags & M_EXT)) || \ (*((m)->m_ext.ext_cnt) == 1)) ) \ /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR, \ ("%s: no mbuf packet header!", __func__)) /* * Ensure that the supplied mbuf is a valid, non-free mbuf. * * XXX: Broken at the moment. Need some UMA magic to make it work again. */ #define M_ASSERTVALID(m) \ KASSERT((((struct mbuf *)m)->m_flags & 0) == 0, \ ("%s: attempted use of a free mbuf!", __func__)) /* * Return the address of the start of the buffer associated with an mbuf, * handling external storage, packet-header mbufs, and regular data mbufs. */ #define M_START(m) \ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \ &(m)->m_dat[0]) /* * Return the size of the buffer associated with an mbuf, handling external * storage, packet-header mbufs, and regular data mbufs. */ #define M_SIZE(m) \ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \ ((m)->m_flags & M_PKTHDR) ? MHLEN : \ MLEN) /* * Set the m_data pointer of a newly allocated mbuf to place an object of the * specified size at the end of the mbuf, longword aligned. * * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as * separate macros, each asserting that it was called at the proper moment. * This required callers to themselves test the storage type and call the * right one. Rather than require callers to be aware of those layout * decisions, we centralize here. */ static __inline void m_align(struct mbuf *m, int len) { #ifdef INVARIANTS const char *msg = "%s: not a virgin mbuf"; #endif int adjust; KASSERT(m->m_data == M_START(m), (msg, __func__)); adjust = M_SIZE(m) - len; m->m_data += adjust &~ (sizeof(long)-1); } #define M_ALIGN(m, len) m_align(m, len) #define MH_ALIGN(m, len) m_align(m, len) #define MEXT_ALIGN(m, len) m_align(m, len) /* * Compute the amount of space available before the current start of data in * an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_LEADINGSPACE(m) \ (M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0) /* * Compute the amount of space available after the end of data in an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_TRAILINGSPACE(m) \ (M_WRITABLE(m) ? \ ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0) /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be * allocated, how specifies whether to wait. If the allocation fails, the * original mbuf chain is freed and m is set to NULL. */ #define M_PREPEND(m, plen, how) do { \ struct mbuf **_mmp = &(m); \ struct mbuf *_mm = *_mmp; \ int _mplen = (plen); \ int __mhow = (how); \ \ MBUF_CHECKSLEEP(how); \ if (M_LEADINGSPACE(_mm) >= _mplen) { \ _mm->m_data -= _mplen; \ _mm->m_len += _mplen; \ } else \ _mm = m_prepend(_mm, _mplen, __mhow); \ if (_mm != NULL && _mm->m_flags & M_PKTHDR) \ _mm->m_pkthdr.len += _mplen; \ *_mmp = _mm; \ } while (0) /* * Change mbuf to new type. This is a relatively expensive operation and * should be avoided. */ #define MCHTYPE(m, t) m_chtype((m), (t)) /* Length to m_copy to copy all. */ #define M_COPYALL 1000000000 /* Compatibility with 4.3. */ #define m_copy(m, o, l) m_copym((m), (o), (l), M_NOWAIT) extern int max_datalen; /* MHLEN - max_hdr */ extern int max_hdr; /* Largest link + protocol header */ extern int max_linkhdr; /* Largest link-level header */ extern int max_protohdr; /* Largest protocol header */ extern int nmbclusters; /* Maximum number of clusters */ struct uio; void m_adj(struct mbuf *, int); int m_apply(struct mbuf *, int, int, int (*)(void *, void *, u_int), void *); int m_append(struct mbuf *, int, c_caddr_t); void m_cat(struct mbuf *, struct mbuf *); void m_catpkt(struct mbuf *, struct mbuf *); int m_extadd(struct mbuf *, caddr_t, u_int, void (*)(struct mbuf *, void *, void *), void *, void *, int, int, int); struct mbuf *m_collapse(struct mbuf *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); struct mbuf *m_copym(const struct mbuf *, int, int, int); struct mbuf *m_copypacket(struct mbuf *, int); void m_copy_pkthdr(struct mbuf *, struct mbuf *); struct mbuf *m_copyup(struct mbuf *, int, int); struct mbuf *m_defrag(struct mbuf *, int); void m_demote_pkthdr(struct mbuf *); void m_demote(struct mbuf *, int, int); struct mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(char *, caddr_t, u_int)); struct mbuf *m_dup(const struct mbuf *, int); int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int); u_int m_fixhdr(struct mbuf *); struct mbuf *m_fragment(struct mbuf *, int, int); void m_freem(struct mbuf *); struct mbuf *m_get2(int, int, short, int); struct mbuf *m_getjcl(int, short, int, int); struct mbuf *m_getm2(struct mbuf *, int, int, short, int); struct mbuf *m_getptr(struct mbuf *, int, int *); u_int m_length(struct mbuf *, struct mbuf **); int m_mbuftouio(struct uio *, struct mbuf *, int); void m_move_pkthdr(struct mbuf *, struct mbuf *); struct mbuf *m_prepend(struct mbuf *, int, int); void m_print(const struct mbuf *, int); struct mbuf *m_pulldown(struct mbuf *, int, int, int *); struct mbuf *m_pullup(struct mbuf *, int); int m_sanity(struct mbuf *, int); struct mbuf *m_split(struct mbuf *, int, int); struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); struct mbuf *m_unshare(struct mbuf *, int); /*- * Network packets may have annotations attached by affixing a list of * "packet tags" to the pkthdr structure. Packet tags are dynamically * allocated semi-opaque data structures that have a fixed header * (struct m_tag) that specifies the size of the memory block and a * pair that identifies it. The cookie is a 32-bit unique * unsigned value used to identify a module or ABI. By convention this value * is chosen as the date+time that the module is created, expressed as the * number of seconds since the epoch (e.g., using date -u +'%s'). The type * value is an ABI/module-specific value that identifies a particular * annotation and is private to the module. For compatibility with systems * like OpenBSD that define packet tags w/o an ABI/module cookie, the value * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find * compatibility shim functions and several tag types are defined below. * Users that do not require compatibility should use a private cookie value * so that packet tag-related definitions can be maintained privately. * * Note that the packet tag returned by m_tag_alloc has the default memory * alignment implemented by malloc. To reference private data one can use a * construct like: * * struct m_tag *mtag = m_tag_alloc(...); * struct foo *p = (struct foo *)(mtag+1); * * if the alignment of struct m_tag is sufficient for referencing members of * struct foo. Otherwise it is necessary to embed struct m_tag within the * private data structure to insure proper alignment; e.g., * * struct foo { * struct m_tag tag; * ... * }; * struct foo *p = (struct foo *) m_tag_alloc(...); * struct m_tag *mtag = &p->tag; */ /* * Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise * tags are expected to ``vanish'' when they pass through a network * interface. For most interfaces this happens normally as the tags are * reclaimed when the mbuf is free'd. However in some special cases * reclaiming must be done manually. An example is packets that pass through * the loopback interface. Also, one must be careful to do this when * ``turning around'' packets (e.g., icmp_reflect). * * To mark a tag persistent bit-or this flag in when defining the tag id. * The tag will then be treated as described above. */ #define MTAG_PERSISTENT 0x800 #define PACKET_TAG_NONE 0 /* Nadda */ /* Packet tags for use with PACKET_ABI_COMPAT. */ #define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ #define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ #define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ #define PACKET_TAG_GIF 8 /* GIF processing done */ #define PACKET_TAG_GRE 9 /* GRE processing done */ #define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ #define PACKET_TAG_ENCAP 11 /* Encap. processing */ #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ #define PACKET_TAG_DUMMYNET 15 /* dummynet info */ #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ #define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ #define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */ #define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */ /* Specific cookies and tags. */ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *); void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); int m_tag_copy_chain(struct mbuf *, const struct mbuf *, int); void m_tag_delete_nonpersistent(struct mbuf *); /* * Initialize the list of tags associated with an mbuf. */ static __inline void m_tag_init(struct mbuf *m) { SLIST_INIT(&m->m_pkthdr.tags); } /* * Set up the contents of a tag. Note that this does not fill in the free * method; the caller is expected to do that. * * XXX probably should be called m_tag_init, but that was already taken. */ static __inline void m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len) { t->m_tag_id = type; t->m_tag_len = len; t->m_tag_cookie = cookie; } /* * Reclaim resources associated with a tag. */ static __inline void m_tag_free(struct m_tag *t) { (*t->m_tag_free)(t); } /* * Return the first tag associated with an mbuf. */ static __inline struct m_tag * m_tag_first(struct mbuf *m) { return (SLIST_FIRST(&m->m_pkthdr.tags)); } /* * Return the next tag in the list of tags associated with an mbuf. */ static __inline struct m_tag * m_tag_next(struct mbuf *m __unused, struct m_tag *t) { return (SLIST_NEXT(t, m_tag_link)); } /* * Prepend a tag to the list of tags associated with an mbuf. */ static __inline void m_tag_prepend(struct mbuf *m, struct m_tag *t) { SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); } /* * Unlink a tag from the list of tags associated with an mbuf. */ static __inline void m_tag_unlink(struct mbuf *m, struct m_tag *t) { SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); } /* These are for OpenBSD compatibility. */ #define MTAG_ABI_COMPAT 0 /* compatibility ABI */ static __inline struct m_tag * m_tag_get(int type, int length, int wait) { return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait)); } static __inline struct m_tag * m_tag_find(struct mbuf *m, int type, struct m_tag *start) { return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL : m_tag_locate(m, MTAG_ABI_COMPAT, type, start)); } static __inline struct mbuf * m_free(struct mbuf *m) { struct mbuf *n = m->m_next; if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) m_tag_delete_chain(m, NULL); if (m->m_flags & M_EXT) mb_free_ext(m); else if ((m->m_flags & M_NOFREE) == 0) uma_zfree(zone_mbuf, m); return (n); } static __inline int rt_m_getfib(struct mbuf *m) { KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf.")); return (m->m_pkthdr.fibnum); } #define M_GETFIB(_m) rt_m_getfib(_m) #define M_SETFIB(_m, _fib) do { \ KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf.")); \ ((_m)->m_pkthdr.fibnum) = (_fib); \ } while (0) /* flags passed as first argument for "m_ether_tcpip_hash()" */ #define MBUF_HASHFLAG_L2 (1 << 2) #define MBUF_HASHFLAG_L3 (1 << 3) #define MBUF_HASHFLAG_L4 (1 << 4) /* mbuf hashing helper routines */ uint32_t m_ether_tcpip_hash_init(void); uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t); #ifdef MBUF_PROFILING void m_profile(struct mbuf *m); #define M_PROFILE(m) m_profile(m) #else #define M_PROFILE(m) #endif struct mbufq { STAILQ_HEAD(, mbuf) mq_head; int mq_len; int mq_maxlen; }; static inline void mbufq_init(struct mbufq *mq, int maxlen) { STAILQ_INIT(&mq->mq_head); mq->mq_maxlen = maxlen; mq->mq_len = 0; } static inline struct mbuf * mbufq_flush(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); STAILQ_INIT(&mq->mq_head); mq->mq_len = 0; return (m); } static inline void mbufq_drain(struct mbufq *mq) { struct mbuf *m, *n; n = mbufq_flush(mq); while ((m = n) != NULL) { n = STAILQ_NEXT(m, m_stailqpkt); m_freem(m); } } static inline struct mbuf * mbufq_first(const struct mbufq *mq) { return (STAILQ_FIRST(&mq->mq_head)); } static inline struct mbuf * mbufq_last(const struct mbufq *mq) { return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt)); } static inline int mbufq_full(const struct mbufq *mq) { return (mq->mq_len >= mq->mq_maxlen); } static inline int mbufq_len(const struct mbufq *mq) { return (mq->mq_len); } static inline int mbufq_enqueue(struct mbufq *mq, struct mbuf *m) { if (mbufq_full(mq)) return (ENOBUFS); STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; return (0); } static inline struct mbuf * mbufq_dequeue(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); if (m) { STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt); m->m_nextpkt = NULL; mq->mq_len--; } return (m); } static inline void mbufq_prepend(struct mbufq *mq, struct mbuf *m) { STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; } #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */ Index: projects/clang380-import/sys/sys/sf_buf.h =================================================================== --- projects/clang380-import/sys/sys/sf_buf.h (revision 293686) +++ projects/clang380-import/sys/sys/sf_buf.h (revision 293687) @@ -1,196 +1,203 @@ /*- * Copyright (c) 2014 Gleb Smirnoff * Copyright (c) 2003-2004 Alan L. Cox * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SF_BUF_H_ #define _SYS_SF_BUF_H_ struct sfstat { /* sendfile statistics */ + uint64_t sf_syscalls; /* times sendfile was called */ + uint64_t sf_noiocnt; /* times sendfile didn't require I/O */ uint64_t sf_iocnt; /* times sendfile had to do disk I/O */ + uint64_t sf_pages_read; /* pages read as part of a request */ + uint64_t sf_pages_valid; /* pages were valid for a request */ + uint64_t sf_rhpages_requested; /* readahead pages requested */ + uint64_t sf_rhpages_read; /* readahead pages read */ + uint64_t sf_busy; /* times aborted on a busy page */ uint64_t sf_allocfail; /* times sfbuf allocation failed */ uint64_t sf_allocwait; /* times sfbuf allocation had to wait */ }; #ifdef _KERNEL #include #include #include #include #include #include /* * Sf_bufs, or sendfile(2) buffers provide a vm_page that is mapped * into kernel address space. Note, that they aren't used only * by sendfile(2)! * * Sf_bufs could be implemented as a feature of vm_page_t, but that * would require growth of the structure. That's why they are implemented * as a separate hash indexed by vm_page address. Implementation lives in * kern/subr_sfbuf.c. Meanwhile, most 64-bit machines have a physical map, * so they don't require this hash at all, thus ignore subr_sfbuf.c. * * Different 32-bit architectures demand different requirements on sf_buf * hash and functions. They request features in machine/vmparam.h, which * enable parts of this file. They can also optionally provide helpers in * machine/sf_buf.h * * Defines are: * SFBUF This machine requires sf_buf hash. * subr_sfbuf.c should be compiled. * SFBUF_CPUSET This machine can perform SFB_CPUPRIVATE mappings, * that do no invalidate cache on the rest of CPUs. * SFBUF_NOMD This machine doesn't have machine/sf_buf.h * * SFBUF_OPTIONAL_DIRECT_MAP Value of this define is used as boolean * variable that tells whether machine is * capable of direct map or not at runtime. * SFBUF_MAP This machine provides its own sf_buf_map() and * sf_buf_unmap(). * SFBUF_PROCESS_PAGE This machine provides sf_buf_process_page() * function. */ #ifdef SFBUF #if defined(SMP) && defined(SFBUF_CPUSET) #include #endif #include struct sf_buf { LIST_ENTRY(sf_buf) list_entry; /* list of buffers */ TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */ vm_page_t m; /* currently mapped page */ vm_offset_t kva; /* va of mapping */ int ref_count; /* usage of this mapping */ #if defined(SMP) && defined(SFBUF_CPUSET) cpuset_t cpumask; /* where mapping is valid */ #endif }; #else /* ! SFBUF */ struct sf_buf; #endif /* SFBUF */ #ifndef SFBUF_NOMD #include #endif #ifdef SFBUF_OPTIONAL_DIRECT_MAP #include #endif #ifdef SFBUF struct sf_buf *sf_buf_alloc(struct vm_page *, int); void sf_buf_free(struct sf_buf *); void sf_buf_ref(struct sf_buf *); static inline vm_offset_t sf_buf_kva(struct sf_buf *sf) { #ifdef SFBUF_OPTIONAL_DIRECT_MAP if (SFBUF_OPTIONAL_DIRECT_MAP) return (SFBUF_PHYS_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf))); #endif return (sf->kva); } static inline vm_page_t sf_buf_page(struct sf_buf *sf) { #ifdef SFBUF_OPTIONAL_DIRECT_MAP if (SFBUF_OPTIONAL_DIRECT_MAP) return ((vm_page_t)sf); #endif return (sf->m); } #ifndef SFBUF_MAP #include static inline void sf_buf_map(struct sf_buf *sf, int flags) { pmap_qenter(sf->kva, &sf->m, 1); } static inline int sf_buf_unmap(struct sf_buf *sf) { return (0); } #endif /* SFBUF_MAP */ #if defined(SMP) && defined(SFBUF_CPUSET) void sf_buf_shootdown(struct sf_buf *, int); #endif #ifdef SFBUF_PROCESS_PAGE boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *)); #endif #else /* ! SFBUF */ static inline struct sf_buf * sf_buf_alloc(struct vm_page *m, int pri) { return ((struct sf_buf *)m); } static inline void sf_buf_free(struct sf_buf *sf) { } static inline void sf_buf_ref(struct sf_buf *sf) { } #endif /* SFBUF */ /* * Options to sf_buf_alloc() are specified through its flags argument. This * argument's value should be the result of a bitwise or'ing of one or more * of the following values. */ #define SFB_CATCH 1 /* Check signals if the allocation sleeps. */ #define SFB_CPUPRIVATE 2 /* Create a CPU private mapping. */ #define SFB_DEFAULT 0 #define SFB_NOWAIT 4 /* Return NULL if all bufs are used. */ extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; #define SFSTAT_ADD(name, val) \ counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\ (val)) #define SFSTAT_INC(name) SFSTAT_ADD(name, 1) #endif /* _KERNEL */ #endif /* !_SYS_SF_BUF_H_ */ Index: projects/clang380-import/sys/sys/sockbuf.h =================================================================== --- projects/clang380-import/sys/sys/sockbuf.h (revision 293686) +++ projects/clang380-import/sys/sys/sockbuf.h (revision 293687) @@ -1,252 +1,252 @@ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 * * $FreeBSD$ */ #ifndef _SYS_SOCKBUF_H_ #define _SYS_SOCKBUF_H_ #include /* for struct selinfo */ #include #include #include #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ /* * Constants for sb_flags field of struct sockbuf. */ #define SB_WAIT 0x04 /* someone is waiting for data/space */ #define SB_SEL 0x08 /* someone is selecting */ #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_UPCALL 0x20 /* someone wants an upcall */ #define SB_NOINTR 0x40 /* operations not interruptible */ #define SB_AIO 0x80 /* AIO operations queued */ #define SB_KNOTE 0x100 /* kernel note attached */ #define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ #define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */ #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ #define SB_STOP 0x1000 /* backpressure indicator */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ #define SBS_RCVATMARK 0x0040 /* at mark on input */ struct mbuf; struct sockaddr; struct socket; struct thread; struct xsockbuf { u_int sb_cc; u_int sb_hiwat; u_int sb_mbcnt; u_int sb_mcnt; u_int sb_ccnt; u_int sb_mbmax; int sb_lowat; int sb_timeo; short sb_flags; }; /* * Variables for socket buffering. */ struct sockbuf { struct selinfo sb_sel; /* process selecting read/write */ struct mtx sb_mtx; /* sockbuf lock */ struct sx sb_sx; /* prevent I/O interlacing */ short sb_state; /* (c/d) socket state on sockbuf */ #define sb_startzero sb_mb struct mbuf *sb_mb; /* (c/d) the mbuf chain */ struct mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */ struct mbuf *sb_lastrecord; /* (c/d) first mbuf of last * record in socket buffer */ struct mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */ struct mbuf *sb_fnrdy; /* (c/d) pointer to first not ready buffer */ u_int sb_sndptroff; /* (c/d) byte offset of ptr into chain */ u_int sb_acc; /* (c/d) available chars in buffer */ u_int sb_ccc; /* (c/d) claimed chars in buffer */ u_int sb_hiwat; /* (c/d) max actual char count */ u_int sb_mbcnt; /* (c/d) chars of mbufs used */ u_int sb_mcnt; /* (c/d) number of mbufs in buffer */ u_int sb_ccnt; /* (c/d) number of clusters in buffer */ u_int sb_mbmax; /* (c/d) max chars of mbufs to use */ u_int sb_ctl; /* (c/d) non-data chars in buffer */ int sb_lowat; /* (c/d) low water mark */ sbintime_t sb_timeo; /* (c/d) timeout for read/write */ short sb_flags; /* (c/d) flags, see below */ int (*sb_upcall)(struct socket *, void *, int); /* (c/d) */ void *sb_upcallarg; /* (c/d) */ }; #ifdef _KERNEL /* * Per-socket buffer mutex used to protect most fields in the socket * buffer. */ #define SOCKBUF_MTX(_sb) (&(_sb)->sb_mtx) #define SOCKBUF_LOCK_INIT(_sb, _name) \ mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF) #define SOCKBUF_LOCK_DESTROY(_sb) mtx_destroy(SOCKBUF_MTX(_sb)) #define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb)) #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) #define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb)) #define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED) #define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED) /* * Socket buffer private mbuf(9) flags. */ #define M_NOTREADY M_PROTO1 /* m_data not populated yet */ #define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */ #define M_NOTAVAIL (M_NOTREADY | M_BLOCKED) -void sbappend(struct sockbuf *sb, struct mbuf *m); -void sbappend_locked(struct sockbuf *sb, struct mbuf *m); +void sbappend(struct sockbuf *sb, struct mbuf *m, int flags); +void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags); int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control); int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control); void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * sbcreatecontrol(caddr_t p, int size, int type, int level); void sbdestroy(struct sockbuf *sb, struct socket *so); void sbdrop(struct sockbuf *sb, int len); void sbdrop_locked(struct sockbuf *sb, int len); struct mbuf * sbcut_locked(struct sockbuf *sb, int len); void sbdroprecord(struct sockbuf *sb); void sbdroprecord_locked(struct sockbuf *sb); void sbflush(struct sockbuf *sb); void sbflush_locked(struct sockbuf *sb); void sbrelease(struct sockbuf *sb, struct socket *so); void sbrelease_internal(struct sockbuf *sb, struct socket *so); void sbrelease_locked(struct sockbuf *sb, struct socket *so); int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td); int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td); struct mbuf * sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff); struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); int sbwait(struct sockbuf *sb); int sblock(struct sockbuf *sb, int flags); void sbunlock(struct sockbuf *sb); void sballoc(struct sockbuf *, struct mbuf *); void sbfree(struct sockbuf *, struct mbuf *); int sbready(struct sockbuf *, struct mbuf *, int); /* * Return how much data is available to be taken out of socket * buffer right now. */ static inline u_int sbavail(struct sockbuf *sb) { #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif return (sb->sb_acc); } /* * Return how much data sits there in the socket buffer * It might be that some data is not yet ready to be read. */ static inline u_int sbused(struct sockbuf *sb) { #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif return (sb->sb_ccc); } /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might * still be negative (ccc > hiwat or mbcnt > mbmax). */ static inline long sbspace(struct sockbuf *sb) { int bleft, mleft; /* size should match sockbuf fields */ #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif if (sb->sb_flags & SB_STOP) return(0); bleft = sb->sb_hiwat - sb->sb_ccc; mleft = sb->sb_mbmax - sb->sb_mbcnt; return ((bleft < mleft) ? bleft : mleft); } #define SB_EMPTY_FIXUP(sb) do { \ if ((sb)->sb_mb == NULL) { \ (sb)->sb_mbtail = NULL; \ (sb)->sb_lastrecord = NULL; \ } \ } while (/*CONSTCOND*/0) #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *, const char *, int); void sblastmbufchk(struct sockbuf *, const char *, int); void sbcheck(struct sockbuf *, const char *, int); #define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__) #define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__) #define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__) #else #define SBLASTRECORDCHK(sb) do {} while (0) #define SBLASTMBUFCHK(sb) do {} while (0) #define SBCHECK(sb) do {} while (0) #endif /* SOCKBUF_DEBUG */ #endif /* _KERNEL */ #endif /* _SYS_SOCKBUF_H_ */ Index: projects/clang380-import/sys/sys/socket.h =================================================================== --- projects/clang380-import/sys/sys/socket.h (revision 293686) +++ projects/clang380-import/sys/sys/socket.h (revision 293687) @@ -1,672 +1,675 @@ /*- * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)socket.h 8.4 (Berkeley) 2/21/94 * $FreeBSD$ */ #ifndef _SYS_SOCKET_H_ #define _SYS_SOCKET_H_ #include #include #include #include /* * Definitions related to sockets: types, address families, options. */ /* * Data types. */ #if __BSD_VISIBLE #ifndef _GID_T_DECLARED typedef __gid_t gid_t; #define _GID_T_DECLARED #endif #ifndef _OFF_T_DECLARED typedef __off_t off_t; #define _OFF_T_DECLARED #endif #ifndef _PID_T_DECLARED typedef __pid_t pid_t; #define _PID_T_DECLARED #endif #endif #ifndef _SA_FAMILY_T_DECLARED typedef __sa_family_t sa_family_t; #define _SA_FAMILY_T_DECLARED #endif #ifndef _SOCKLEN_T_DECLARED typedef __socklen_t socklen_t; #define _SOCKLEN_T_DECLARED #endif #ifndef _SSIZE_T_DECLARED typedef __ssize_t ssize_t; #define _SSIZE_T_DECLARED #endif #if __BSD_VISIBLE #ifndef _UID_T_DECLARED typedef __uid_t uid_t; #define _UID_T_DECLARED #endif #endif #ifndef _UINT32_T_DECLARED typedef __uint32_t uint32_t; #define _UINT32_T_DECLARED #endif #ifndef _UINTPTR_T_DECLARED typedef __uintptr_t uintptr_t; #define _UINTPTR_T_DECLARED #endif /* * Types */ #define SOCK_STREAM 1 /* stream socket */ #define SOCK_DGRAM 2 /* datagram socket */ #define SOCK_RAW 3 /* raw-protocol interface */ #if __BSD_VISIBLE #define SOCK_RDM 4 /* reliably-delivered message */ #endif #define SOCK_SEQPACKET 5 /* sequenced packet stream */ #if __BSD_VISIBLE /* * Creation flags, OR'ed into socket() and socketpair() type argument. */ #define SOCK_CLOEXEC 0x10000000 #define SOCK_NONBLOCK 0x20000000 #endif /* * Option flags per-socket. */ #define SO_DEBUG 0x0001 /* turn on debugging info recording */ #define SO_ACCEPTCONN 0x0002 /* socket has had listen() */ #define SO_REUSEADDR 0x0004 /* allow local address reuse */ #define SO_KEEPALIVE 0x0008 /* keep connections alive */ #define SO_DONTROUTE 0x0010 /* just use interface addresses */ #define SO_BROADCAST 0x0020 /* permit sending of broadcast msgs */ #if __BSD_VISIBLE #define SO_USELOOPBACK 0x0040 /* bypass hardware when possible */ #endif #define SO_LINGER 0x0080 /* linger on close if data present */ #define SO_OOBINLINE 0x0100 /* leave received OOB data in line */ #if __BSD_VISIBLE #define SO_REUSEPORT 0x0200 /* allow local address & port reuse */ #define SO_TIMESTAMP 0x0400 /* timestamp received dgram traffic */ #define SO_NOSIGPIPE 0x0800 /* no SIGPIPE from EPIPE */ #define SO_ACCEPTFILTER 0x1000 /* there is an accept filter */ #define SO_BINTIME 0x2000 /* timestamp received dgram traffic */ #endif #define SO_NO_OFFLOAD 0x4000 /* socket cannot be offloaded */ #define SO_NO_DDP 0x8000 /* disable direct data placement */ /* * Additional options, not kept in so_options. */ #define SO_SNDBUF 0x1001 /* send buffer size */ #define SO_RCVBUF 0x1002 /* receive buffer size */ #define SO_SNDLOWAT 0x1003 /* send low-water mark */ #define SO_RCVLOWAT 0x1004 /* receive low-water mark */ #define SO_SNDTIMEO 0x1005 /* send timeout */ #define SO_RCVTIMEO 0x1006 /* receive timeout */ #define SO_ERROR 0x1007 /* get error status and clear */ #define SO_TYPE 0x1008 /* get socket type */ #if __BSD_VISIBLE #define SO_LABEL 0x1009 /* socket's MAC label */ #define SO_PEERLABEL 0x1010 /* socket's peer's MAC label */ #define SO_LISTENQLIMIT 0x1011 /* socket's backlog limit */ #define SO_LISTENQLEN 0x1012 /* socket's complete queue length */ #define SO_LISTENINCQLEN 0x1013 /* socket's incomplete queue length */ #define SO_SETFIB 0x1014 /* use this FIB to route */ #define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */ #define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */ #define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */ #endif /* * Space reserved for new socket options added by third-party vendors. * This range applies to all socket option levels. New socket options * in FreeBSD should always use an option value less than SO_VENDOR. */ #if __BSD_VISIBLE #define SO_VENDOR 0x80000000 #endif /* * Structure used for manipulating linger option. */ struct linger { int l_onoff; /* option on/off */ int l_linger; /* linger time */ }; #if __BSD_VISIBLE struct accept_filter_arg { char af_name[16]; char af_arg[256-16]; }; #endif /* * Level number for (get/set)sockopt() to apply to socket itself. */ #define SOL_SOCKET 0xffff /* options for socket level */ /* * Address families. */ #define AF_UNSPEC 0 /* unspecified */ #if __BSD_VISIBLE #define AF_LOCAL AF_UNIX /* local to host (pipes, portals) */ #endif #define AF_UNIX 1 /* standardized name for AF_LOCAL */ #define AF_INET 2 /* internetwork: UDP, TCP, etc. */ #if __BSD_VISIBLE #define AF_IMPLINK 3 /* arpanet imp addresses */ #define AF_PUP 4 /* pup protocols: e.g. BSP */ #define AF_CHAOS 5 /* mit CHAOS protocols */ #define AF_NETBIOS 6 /* SMB protocols */ #define AF_ISO 7 /* ISO protocols */ #define AF_OSI AF_ISO #define AF_ECMA 8 /* European computer manufacturers */ #define AF_DATAKIT 9 /* datakit protocols */ #define AF_CCITT 10 /* CCITT protocols, X.25 etc */ #define AF_SNA 11 /* IBM SNA */ #define AF_DECnet 12 /* DECnet */ #define AF_DLI 13 /* DEC Direct data link interface */ #define AF_LAT 14 /* LAT */ #define AF_HYLINK 15 /* NSC Hyperchannel */ #define AF_APPLETALK 16 /* Apple Talk */ #define AF_ROUTE 17 /* Internal Routing Protocol */ #define AF_LINK 18 /* Link layer interface */ #define pseudo_AF_XTP 19 /* eXpress Transfer Protocol (no AF) */ #define AF_COIP 20 /* connection-oriented IP, aka ST II */ #define AF_CNT 21 /* Computer Network Technology */ #define pseudo_AF_RTIP 22 /* Help Identify RTIP packets */ #define AF_IPX 23 /* Novell Internet Protocol */ #define AF_SIP 24 /* Simple Internet Protocol */ #define pseudo_AF_PIP 25 /* Help Identify PIP packets */ #define AF_ISDN 26 /* Integrated Services Digital Network*/ #define AF_E164 AF_ISDN /* CCITT E.164 recommendation */ #define pseudo_AF_KEY 27 /* Internal key-management function */ #endif #define AF_INET6 28 /* IPv6 */ #if __BSD_VISIBLE #define AF_NATM 29 /* native ATM access */ #define AF_ATM 30 /* ATM */ #define pseudo_AF_HDRCMPLT 31 /* Used by BPF to not rewrite headers * in interface output routine */ #define AF_NETGRAPH 32 /* Netgraph sockets */ #define AF_SLOW 33 /* 802.3ad slow protocol */ #define AF_SCLUSTER 34 /* Sitara cluster protocol */ #define AF_ARP 35 #define AF_BLUETOOTH 36 /* Bluetooth sockets */ #define AF_IEEE80211 37 /* IEEE 802.11 protocol */ #define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */ #define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */ #define AF_MAX 42 /* * When allocating a new AF_ constant, please only allocate * even numbered constants for FreeBSD until 134 as odd numbered AF_ * constants 39-133 are now reserved for vendors. */ #define AF_VENDOR00 39 #define AF_VENDOR01 41 #define AF_VENDOR02 43 #define AF_VENDOR03 45 #define AF_VENDOR04 47 #define AF_VENDOR05 49 #define AF_VENDOR06 51 #define AF_VENDOR07 53 #define AF_VENDOR08 55 #define AF_VENDOR09 57 #define AF_VENDOR10 59 #define AF_VENDOR11 61 #define AF_VENDOR12 63 #define AF_VENDOR13 65 #define AF_VENDOR14 67 #define AF_VENDOR15 69 #define AF_VENDOR16 71 #define AF_VENDOR17 73 #define AF_VENDOR18 75 #define AF_VENDOR19 77 #define AF_VENDOR20 79 #define AF_VENDOR21 81 #define AF_VENDOR22 83 #define AF_VENDOR23 85 #define AF_VENDOR24 87 #define AF_VENDOR25 89 #define AF_VENDOR26 91 #define AF_VENDOR27 93 #define AF_VENDOR28 95 #define AF_VENDOR29 97 #define AF_VENDOR30 99 #define AF_VENDOR31 101 #define AF_VENDOR32 103 #define AF_VENDOR33 105 #define AF_VENDOR34 107 #define AF_VENDOR35 109 #define AF_VENDOR36 111 #define AF_VENDOR37 113 #define AF_VENDOR38 115 #define AF_VENDOR39 117 #define AF_VENDOR40 119 #define AF_VENDOR41 121 #define AF_VENDOR42 123 #define AF_VENDOR43 125 #define AF_VENDOR44 127 #define AF_VENDOR45 129 #define AF_VENDOR46 131 #define AF_VENDOR47 133 #endif /* * Structure used by kernel to store most * addresses. */ struct sockaddr { unsigned char sa_len; /* total length */ sa_family_t sa_family; /* address family */ char sa_data[14]; /* actually longer; address value */ }; #if __BSD_VISIBLE #define SOCK_MAXADDRLEN 255 /* longest possible addresses */ /* * Structure used by kernel to pass protocol * information in raw sockets. */ struct sockproto { unsigned short sp_family; /* address family */ unsigned short sp_protocol; /* protocol */ }; #endif #include #if __BSD_VISIBLE /* * Protocol families, same as address families for now. */ #define PF_UNSPEC AF_UNSPEC #define PF_LOCAL AF_LOCAL #define PF_UNIX PF_LOCAL /* backward compatibility */ #define PF_INET AF_INET #define PF_IMPLINK AF_IMPLINK #define PF_PUP AF_PUP #define PF_CHAOS AF_CHAOS #define PF_NETBIOS AF_NETBIOS #define PF_ISO AF_ISO #define PF_OSI AF_ISO #define PF_ECMA AF_ECMA #define PF_DATAKIT AF_DATAKIT #define PF_CCITT AF_CCITT #define PF_SNA AF_SNA #define PF_DECnet AF_DECnet #define PF_DLI AF_DLI #define PF_LAT AF_LAT #define PF_HYLINK AF_HYLINK #define PF_APPLETALK AF_APPLETALK #define PF_ROUTE AF_ROUTE #define PF_LINK AF_LINK #define PF_XTP pseudo_AF_XTP /* really just proto family, no AF */ #define PF_COIP AF_COIP #define PF_CNT AF_CNT #define PF_SIP AF_SIP #define PF_IPX AF_IPX #define PF_RTIP pseudo_AF_RTIP /* same format as AF_INET */ #define PF_PIP pseudo_AF_PIP #define PF_ISDN AF_ISDN #define PF_KEY pseudo_AF_KEY #define PF_INET6 AF_INET6 #define PF_NATM AF_NATM #define PF_ATM AF_ATM #define PF_NETGRAPH AF_NETGRAPH #define PF_SLOW AF_SLOW #define PF_SCLUSTER AF_SCLUSTER #define PF_ARP AF_ARP #define PF_BLUETOOTH AF_BLUETOOTH #define PF_IEEE80211 AF_IEEE80211 #define PF_INET_SDP AF_INET_SDP #define PF_INET6_SDP AF_INET6_SDP #define PF_MAX AF_MAX /* * Definitions for network related sysctl, CTL_NET. * * Second level is protocol family. * Third level is protocol number. * * Further levels are defined by the individual families. */ /* * PF_ROUTE - Routing table * * Three additional levels are defined: * Fourth: address family, 0 is wildcard * Fifth: type of info, defined below * Sixth: flag(s) to mask with for NET_RT_FLAGS */ #define NET_RT_DUMP 1 /* dump; may limit to a.f. */ #define NET_RT_FLAGS 2 /* by flags, e.g. RESOLVING */ #define NET_RT_IFLIST 3 /* survey interface list */ #define NET_RT_IFMALIST 4 /* return multicast address list */ #define NET_RT_IFLISTL 5 /* Survey interface list, using 'l'en * versions of msghdr structs. */ #endif /* __BSD_VISIBLE */ /* * Maximum queue length specifiable by listen. */ #define SOMAXCONN 128 /* * Message header for recvmsg and sendmsg calls. * Used value-result for recvmsg, value only for sendmsg. */ struct msghdr { void *msg_name; /* optional address */ socklen_t msg_namelen; /* size of address */ struct iovec *msg_iov; /* scatter/gather array */ int msg_iovlen; /* # elements in msg_iov */ void *msg_control; /* ancillary data, see below */ socklen_t msg_controllen; /* ancillary data buffer len */ int msg_flags; /* flags on received message */ }; #define MSG_OOB 0x1 /* process out-of-band data */ #define MSG_PEEK 0x2 /* peek at incoming message */ #define MSG_DONTROUTE 0x4 /* send without using routing tables */ #define MSG_EOR 0x8 /* data completes record */ #define MSG_TRUNC 0x10 /* data discarded before delivery */ #define MSG_CTRUNC 0x20 /* control data lost before delivery */ #define MSG_WAITALL 0x40 /* wait for full request or error */ #if __POSIX_VISIBLE >= 200809 #define MSG_NOSIGNAL 0x20000 /* do not generate SIGPIPE on EOF */ #endif #if __BSD_VISIBLE #define MSG_DONTWAIT 0x80 /* this message should be nonblocking */ #define MSG_EOF 0x100 /* data completes connection */ #define MSG_NOTIFICATION 0x2000 /* SCTP notification */ #define MSG_NBIO 0x4000 /* FIONBIO mode, used by fifofs */ #define MSG_COMPAT 0x8000 /* used in sendit() */ #define MSG_CMSG_CLOEXEC 0x40000 /* make received fds close-on-exec */ #endif #ifdef _KERNEL #define MSG_SOCALLBCK 0x10000 /* for use by socket callbacks - soreceive (TCP) */ #endif /* * Header for ancillary data objects in msg_control buffer. * Used for additional information with/about a datagram * not expressible by flags. The format is a sequence * of message elements headed by cmsghdr structures. */ struct cmsghdr { socklen_t cmsg_len; /* data byte count, including hdr */ int cmsg_level; /* originating protocol */ int cmsg_type; /* protocol-specific type */ /* followed by u_char cmsg_data[]; */ }; #if __BSD_VISIBLE /* * While we may have more groups than this, the cmsgcred struct must * be able to fit in an mbuf and we have historically supported a * maximum of 16 groups. */ #define CMGROUP_MAX 16 /* * Credentials structure, used to verify the identity of a peer * process that has sent us a message. This is allocated by the * peer process but filled in by the kernel. This prevents the * peer from lying about its identity. (Note that cmcred_groups[0] * is the effective GID.) */ struct cmsgcred { pid_t cmcred_pid; /* PID of sending process */ uid_t cmcred_uid; /* real UID of sending process */ uid_t cmcred_euid; /* effective UID of sending process */ gid_t cmcred_gid; /* real GID of sending process */ short cmcred_ngroups; /* number or groups */ gid_t cmcred_groups[CMGROUP_MAX]; /* groups */ }; /* * Socket credentials. */ struct sockcred { uid_t sc_uid; /* real user id */ uid_t sc_euid; /* effective user id */ gid_t sc_gid; /* real group id */ gid_t sc_egid; /* effective group id */ int sc_ngroups; /* number of supplemental groups */ gid_t sc_groups[1]; /* variable length */ }; /* * Compute size of a sockcred structure with groups. */ #define SOCKCREDSIZE(ngrps) \ (sizeof(struct sockcred) + (sizeof(gid_t) * ((ngrps) - 1))) #endif /* __BSD_VISIBLE */ /* given pointer to struct cmsghdr, return pointer to data */ #define CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ _ALIGN(sizeof(struct cmsghdr))) /* given pointer to struct cmsghdr, return pointer to next cmsghdr */ #define CMSG_NXTHDR(mhdr, cmsg) \ ((char *)(cmsg) == NULL ? CMSG_FIRSTHDR(mhdr) : \ ((char *)(cmsg) + _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len) + \ _ALIGN(sizeof(struct cmsghdr)) > \ (char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \ (struct cmsghdr *)0 : \ (struct cmsghdr *)(void *)((char *)(cmsg) + \ _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len))) /* * RFC 2292 requires to check msg_controllen, in case that the kernel returns * an empty list for some reasons. */ #define CMSG_FIRSTHDR(mhdr) \ ((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \ (struct cmsghdr *)(mhdr)->msg_control : \ (struct cmsghdr *)NULL) #if __BSD_VISIBLE /* RFC 2292 additions */ #define CMSG_SPACE(l) (_ALIGN(sizeof(struct cmsghdr)) + _ALIGN(l)) #define CMSG_LEN(l) (_ALIGN(sizeof(struct cmsghdr)) + (l)) #endif #ifdef _KERNEL #define CMSG_ALIGN(n) _ALIGN(n) #endif /* "Socket"-level control message types: */ #define SCM_RIGHTS 0x01 /* access rights (array of int) */ #if __BSD_VISIBLE #define SCM_TIMESTAMP 0x02 /* timestamp (struct timeval) */ #define SCM_CREDS 0x03 /* process creds (struct cmsgcred) */ #define SCM_BINTIME 0x04 /* timestamp (struct bintime) */ #endif #if __BSD_VISIBLE /* * 4.3 compat sockaddr, move to compat file later */ struct osockaddr { unsigned short sa_family; /* address family */ char sa_data[14]; /* up to 14 bytes of direct address */ }; /* * 4.3-compat message header (move to compat file later). */ struct omsghdr { char *msg_name; /* optional address */ int msg_namelen; /* size of address */ struct iovec *msg_iov; /* scatter/gather array */ int msg_iovlen; /* # elements in msg_iov */ char *msg_accrights; /* access rights sent/received */ int msg_accrightslen; }; #endif /* * howto arguments for shutdown(2), specified by Posix.1g. */ #define SHUT_RD 0 /* shut down the reading side */ #define SHUT_WR 1 /* shut down the writing side */ #define SHUT_RDWR 2 /* shut down both sides */ #if __BSD_VISIBLE /* for SCTP */ /* we cheat and use the SHUT_XX defines for these */ #define PRU_FLUSH_RD SHUT_RD #define PRU_FLUSH_WR SHUT_WR #define PRU_FLUSH_RDWR SHUT_RDWR #endif #if __BSD_VISIBLE /* * sendfile(2) header/trailer struct */ struct sf_hdtr { struct iovec *headers; /* pointer to an array of header struct iovec's */ int hdr_cnt; /* number of header iovec's */ struct iovec *trailers; /* pointer to an array of trailer struct iovec's */ int trl_cnt; /* number of trailer iovec's */ }; /* * Sendfile-specific flag(s) */ #define SF_NODISKIO 0x00000001 -#define SF_MNOWAIT 0x00000002 +#define SF_MNOWAIT 0x00000002 /* obsolete */ #define SF_SYNC 0x00000004 +#define SF_NOCACHE 0x00000010 +#define SF_FLAGS(rh, flags) (((rh) << 16) | (flags)) #ifdef _KERNEL #define SFK_COMPAT 0x00000001 +#define SF_READAHEAD(flags) ((flags) >> 16) #endif /* _KERNEL */ #endif /* __BSD_VISIBLE */ #ifndef _KERNEL #include __BEGIN_DECLS int accept(int, struct sockaddr * __restrict, socklen_t * __restrict); int bind(int, const struct sockaddr *, socklen_t); int connect(int, const struct sockaddr *, socklen_t); #if __BSD_VISIBLE int accept4(int, struct sockaddr * __restrict, socklen_t * __restrict, int); int bindat(int, int, const struct sockaddr *, socklen_t); int connectat(int, int, const struct sockaddr *, socklen_t); #endif int getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict); int getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict); int getsockopt(int, int, int, void * __restrict, socklen_t * __restrict); int listen(int, int); ssize_t recv(int, void *, size_t, int); ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict); ssize_t recvmsg(int, struct msghdr *, int); ssize_t send(int, const void *, size_t, int); ssize_t sendto(int, const void *, size_t, int, const struct sockaddr *, socklen_t); ssize_t sendmsg(int, const struct msghdr *, int); #if __BSD_VISIBLE int sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int); int setfib(int); #endif int setsockopt(int, int, int, const void *, socklen_t); int shutdown(int, int); int sockatmark(int); int socket(int, int, int); int socketpair(int, int, int, int *); __END_DECLS #endif /* !_KERNEL */ #ifdef _KERNEL struct socket; struct tcpcb *so_sototcpcb(struct socket *so); struct inpcb *so_sotoinpcb(struct socket *so); struct sockbuf *so_sockbuf_snd(struct socket *); struct sockbuf *so_sockbuf_rcv(struct socket *); int so_state_get(const struct socket *); void so_state_set(struct socket *, int); int so_options_get(const struct socket *); void so_options_set(struct socket *, int); int so_error_get(const struct socket *); void so_error_set(struct socket *, int); int so_linger_get(const struct socket *); void so_linger_set(struct socket *, int); struct protosw *so_protosw_get(const struct socket *); void so_protosw_set(struct socket *, struct protosw *); void so_sorwakeup_locked(struct socket *so); void so_sowwakeup_locked(struct socket *so); void so_sorwakeup(struct socket *so); void so_sowwakeup(struct socket *so); void so_lock(struct socket *so); void so_unlock(struct socket *so); void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg); #endif #endif /* !_SYS_SOCKET_H_ */ Index: projects/clang380-import/sys/sys/sysent.h =================================================================== --- projects/clang380-import/sys/sys/sysent.h (revision 293686) +++ projects/clang380-import/sys/sys/sysent.h (revision 293687) @@ -1,278 +1,279 @@ /*- * Copyright (c) 1982, 1988, 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSENT_H_ #define _SYS_SYSENT_H_ #include struct rlimit; struct sysent; struct thread; struct ksiginfo; struct syscall_args; enum systrace_probe_t { SYSTRACE_ENTRY, SYSTRACE_RETURN, }; typedef int sy_call_t(struct thread *, void *); typedef void (*systrace_probe_func_t)(struct syscall_args *, enum systrace_probe_t, int); typedef void (*systrace_args_func_t)(int, void *, uint64_t *, int *); extern systrace_probe_func_t systrace_probe_func; struct sysent { /* system call table */ int sy_narg; /* number of arguments */ sy_call_t *sy_call; /* implementing function */ au_event_t sy_auevent; /* audit event associated with syscall */ systrace_args_func_t sy_systrace_args_func; /* optional argument conversion function. */ u_int32_t sy_entry; /* DTrace entry ID for systrace. */ u_int32_t sy_return; /* DTrace return ID for systrace. */ u_int32_t sy_flags; /* General flags for system calls. */ u_int32_t sy_thrcnt; }; /* * A system call is permitted in capability mode. */ #define SYF_CAPENABLED 0x00000001 #define SY_THR_FLAGMASK 0x7 #define SY_THR_STATIC 0x1 #define SY_THR_DRAINING 0x2 #define SY_THR_ABSENT 0x4 #define SY_THR_INCR 0x8 #ifdef KLD_MODULE #define SY_THR_STATIC_KLD 0 #else #define SY_THR_STATIC_KLD SY_THR_STATIC #endif struct image_params; struct __sigset; struct trapframe; struct vnode; struct sysentvec { int sv_size; /* number of entries */ struct sysent *sv_table; /* pointer to sysent */ u_int sv_mask; /* optional mask to index */ int sv_errsize; /* size of errno translation table */ int *sv_errtbl; /* errno translation table */ int (*sv_transtrap)(int, int); /* translate trap-to-signal mapping */ int (*sv_fixup)(register_t **, struct image_params *); /* stack fixup function */ void (*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *); /* send signal */ char *sv_sigcode; /* start of sigtramp code */ int *sv_szsigcode; /* size of sigtramp code */ char *sv_name; /* name of binary type */ int (*sv_coredump)(struct thread *, struct vnode *, off_t, int); /* function to dump core, or NULL */ int (*sv_imgact_try)(struct image_params *); int sv_minsigstksz; /* minimum signal stack size */ int sv_pagesize; /* pagesize */ vm_offset_t sv_minuser; /* VM_MIN_ADDRESS */ vm_offset_t sv_maxuser; /* VM_MAXUSER_ADDRESS */ vm_offset_t sv_usrstack; /* USRSTACK */ vm_offset_t sv_psstrings; /* PS_STRINGS */ int sv_stackprot; /* vm protection for stack */ register_t *(*sv_copyout_strings)(struct image_params *); void (*sv_setregs)(struct thread *, struct image_params *, u_long); void (*sv_fixlimit)(struct rlimit *, int); u_long *sv_maxssiz; u_int sv_flags; void (*sv_set_syscall_retval)(struct thread *, int); int (*sv_fetch_syscall_args)(struct thread *, struct syscall_args *); const char **sv_syscallnames; vm_offset_t sv_timekeep_base; vm_offset_t sv_shared_page_base; vm_offset_t sv_shared_page_len; vm_offset_t sv_sigcode_base; void *sv_shared_page_obj; void (*sv_schedtail)(struct thread *); void (*sv_thread_detach)(struct thread *); + int (*sv_trap)(struct thread *); }; #define SV_ILP32 0x000100 /* 32-bit executable. */ #define SV_LP64 0x000200 /* 64-bit executable. */ #define SV_IA32 0x004000 /* Intel 32-bit executable. */ #define SV_AOUT 0x008000 /* a.out executable. */ #define SV_SHP 0x010000 /* Shared page. */ #define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */ #define SV_TIMEKEEP 0x040000 #define SV_ABI_MASK 0xff #define SV_PROC_FLAG(p, x) ((p)->p_sysent->sv_flags & (x)) #define SV_PROC_ABI(p) ((p)->p_sysent->sv_flags & SV_ABI_MASK) #define SV_CURPROC_FLAG(x) SV_PROC_FLAG(curproc, x) #define SV_CURPROC_ABI() SV_PROC_ABI(curproc) /* same as ELFOSABI_XXX, to prevent header pollution */ #define SV_ABI_LINUX 3 #define SV_ABI_FREEBSD 9 #define SV_ABI_CLOUDABI 17 #define SV_ABI_UNDEF 255 #ifdef _KERNEL extern struct sysentvec aout_sysvec; extern struct sysentvec elf_freebsd_sysvec; extern struct sysentvec null_sysvec; extern struct sysent sysent[]; extern const char *syscallnames[]; #if defined(__amd64__) extern int i386_read_exec; #endif #define NO_SYSCALL (-1) struct module; struct syscall_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ int *offset; /* offset into sysent */ struct sysent *new_sysent; /* new sysent */ struct sysent old_sysent; /* old sysent */ int flags; /* flags for syscall_register */ }; #define MAKE_SYSENT(syscallname) \ static struct sysent syscallname##_sysent = { \ (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ (sy_call_t *)& sys_##syscallname, \ SYS_AUE_##syscallname \ } #define MAKE_SYSENT_COMPAT(syscallname) \ static struct sysent syscallname##_sysent = { \ (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ (sy_call_t *)& syscallname, \ SYS_AUE_##syscallname \ } #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg) \ static struct syscall_module_data name##_syscall_mod = { \ evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL } \ }; \ \ static moduledata_t name##_mod = { \ "sys/" #name, \ syscall_module_handler, \ &name##_syscall_mod \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE) #define SYSCALL_MODULE_HELPER(syscallname) \ static int syscallname##_syscall = SYS_##syscallname; \ MAKE_SYSENT(syscallname); \ SYSCALL_MODULE(syscallname, \ & syscallname##_syscall, & syscallname##_sysent, \ NULL, NULL) #define SYSCALL_MODULE_PRESENT(syscallname) \ (sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys && \ sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys) /* * Syscall registration helpers with resource allocation handling. */ struct syscall_helper_data { struct sysent new_sysent; struct sysent old_sysent; int syscall_no; int registered; }; #define SYSCALL_INIT_HELPER(syscallname) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& sys_ ## syscallname, \ .sy_auevent = SYS_AUE_##syscallname \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER_COMPAT(syscallname) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& syscallname, \ .sy_auevent = SYS_AUE_##syscallname \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_LAST { \ .syscall_no = NO_SYSCALL \ } int syscall_register(int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags); int syscall_deregister(int *offset, struct sysent *old_sysent); int syscall_module_handler(struct module *mod, int what, void *arg); int syscall_helper_register(struct syscall_helper_data *sd, int flags); int syscall_helper_unregister(struct syscall_helper_data *sd); struct proc; const char *syscallname(struct proc *p, u_int code); /* Special purpose system call functions. */ struct nosys_args; int lkmnosys(struct thread *, struct nosys_args *); int lkmressys(struct thread *, struct nosys_args *); int syscall_thread_enter(struct thread *td, struct sysent *se); void syscall_thread_exit(struct thread *td, struct sysent *se); int shared_page_alloc(int size, int align); int shared_page_fill(int size, int align, const void *data); void shared_page_write(int base, int size, const void *data); void exec_sysvec_init(void *param); void exec_inittk(void); #define INIT_SYSENTVEC(name, sv) \ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \ (sysinit_cfunc_t)exec_sysvec_init, sv); #endif /* _KERNEL */ #endif /* !_SYS_SYSENT_H_ */ Index: projects/clang380-import/sys/vm/vm_domain.c =================================================================== --- projects/clang380-import/sys/vm/vm_domain.c (revision 293686) +++ projects/clang380-import/sys/vm/vm_domain.c (revision 293687) @@ -1,374 +1,384 @@ /*- * Copyright (c) 2015 Adrian Chadd . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include "opt_ddb.h" #include #include #include #include #include #include #if MAXMEMDOM > 1 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static __inline int -vm_domain_rr_selectdomain(void) +vm_domain_rr_selectdomain(int skip_domain) { #if MAXMEMDOM > 1 struct thread *td; td = curthread; td->td_dom_rr_idx++; td->td_dom_rr_idx %= vm_ndomains; + + /* + * If skip_domain is provided then skip over that + * domain. This is intended for round robin variants + * which first try a fixed domain. + */ + if ((skip_domain > -1) && (td->td_dom_rr_idx == skip_domain)) { + td->td_dom_rr_idx++; + td->td_dom_rr_idx %= vm_ndomains; + } return (td->td_dom_rr_idx); #else return (0); #endif } /* * This implements a very simple set of VM domain memory allocation * policies and iterators. */ /* * A VM domain policy represents a desired VM domain policy. * Iterators implement searching through VM domains in a specific * order. */ /* * When setting a policy, the caller must establish their own * exclusive write protection for the contents of the domain * policy. */ int vm_domain_policy_init(struct vm_domain_policy *vp) { bzero(vp, sizeof(*vp)); vp->p.policy = VM_POLICY_NONE; vp->p.domain = -1; return (0); } int vm_domain_policy_set(struct vm_domain_policy *vp, vm_domain_policy_type_t vt, int domain) { seq_write_begin(&vp->seq); vp->p.policy = vt; vp->p.domain = domain; seq_write_end(&vp->seq); return (0); } /* * Take a local copy of a policy. * * The destination policy isn't write-barriered; this is used * for doing local copies into something that isn't shared. */ void vm_domain_policy_localcopy(struct vm_domain_policy *dst, const struct vm_domain_policy *src) { seq_t seq; for (;;) { seq = seq_read(&src->seq); *dst = *src; if (seq_consistent(&src->seq, seq)) return; cpu_spinwait(); } } /* * Take a write-barrier copy of a policy. * * The destination policy is write -barriered; this is used * for doing copies into policies that may be read by other * threads. */ void vm_domain_policy_copy(struct vm_domain_policy *dst, const struct vm_domain_policy *src) { seq_t seq; struct vm_domain_policy d; for (;;) { seq = seq_read(&src->seq); d = *src; if (seq_consistent(&src->seq, seq)) { seq_write_begin(&dst->seq); dst->p.domain = d.p.domain; dst->p.policy = d.p.policy; seq_write_end(&dst->seq); return; } cpu_spinwait(); } } int vm_domain_policy_validate(const struct vm_domain_policy *vp) { switch (vp->p.policy) { case VM_POLICY_NONE: case VM_POLICY_ROUND_ROBIN: case VM_POLICY_FIRST_TOUCH: case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: if (vp->p.domain == -1) return (0); return (-1); case VM_POLICY_FIXED_DOMAIN: case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: if (vp->p.domain >= 0 && vp->p.domain < vm_ndomains) return (0); return (-1); default: return (-1); } return (-1); } int vm_domain_policy_cleanup(struct vm_domain_policy *vp) { /* For now, empty */ return (0); } int vm_domain_iterator_init(struct vm_domain_iterator *vi) { /* Nothing to do for now */ return (0); } /* * Manually setup an iterator with the given details. */ int vm_domain_iterator_set(struct vm_domain_iterator *vi, vm_domain_policy_type_t vt, int domain) { switch (vt) { case VM_POLICY_FIXED_DOMAIN: vi->policy = VM_POLICY_FIXED_DOMAIN; vi->domain = domain; vi->n = 1; break; case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: vi->policy = VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN; vi->domain = domain; vi->n = vm_ndomains; break; case VM_POLICY_FIRST_TOUCH: vi->policy = VM_POLICY_FIRST_TOUCH; vi->domain = PCPU_GET(domain); vi->n = 1; break; case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: vi->policy = VM_POLICY_FIRST_TOUCH_ROUND_ROBIN; vi->domain = PCPU_GET(domain); vi->n = vm_ndomains; break; case VM_POLICY_ROUND_ROBIN: default: vi->policy = VM_POLICY_ROUND_ROBIN; vi->domain = -1; vi->n = vm_ndomains; break; } return (0); } /* * Setup an iterator based on the given policy. */ static inline void _vm_domain_iterator_set_policy(struct vm_domain_iterator *vi, const struct vm_domain_policy *vt) { /* * Initialise the iterator. * * For first-touch, the initial domain is set * via the current thread CPU domain. * * For fixed-domain, it's assumed that the * caller has initialised the specific domain * it is after. */ switch (vt->p.policy) { case VM_POLICY_FIXED_DOMAIN: vi->policy = vt->p.policy; vi->domain = vt->p.domain; vi->n = 1; break; case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: vi->policy = vt->p.policy; vi->domain = vt->p.domain; vi->n = vm_ndomains; break; case VM_POLICY_FIRST_TOUCH: vi->policy = vt->p.policy; vi->domain = PCPU_GET(domain); vi->n = 1; break; case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: vi->policy = vt->p.policy; vi->domain = PCPU_GET(domain); vi->n = vm_ndomains; break; case VM_POLICY_ROUND_ROBIN: default: /* * Default to round-robin policy. */ vi->policy = VM_POLICY_ROUND_ROBIN; vi->domain = -1; vi->n = vm_ndomains; break; } } void vm_domain_iterator_set_policy(struct vm_domain_iterator *vi, const struct vm_domain_policy *vt) { seq_t seq; struct vm_domain_policy vt_lcl; for (;;) { seq = seq_read(&vt->seq); vt_lcl = *vt; if (seq_consistent(&vt->seq, seq)) { _vm_domain_iterator_set_policy(vi, &vt_lcl); return; } cpu_spinwait(); } } /* * Return the next VM domain to use. * * Returns 0 w/ domain set to the next domain to use, or * -1 to indicate no more domains are available. */ int vm_domain_iterator_run(struct vm_domain_iterator *vi, int *domain) { /* General catch-all */ if (vi->n <= 0) return (-1); switch (vi->policy) { case VM_POLICY_FIXED_DOMAIN: case VM_POLICY_FIRST_TOUCH: *domain = vi->domain; vi->n--; break; case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: /* * XXX TODO: skip over the rr'ed domain * if it equals the one we started with. */ if (vi->n == vm_ndomains) *domain = vi->domain; else - *domain = vm_domain_rr_selectdomain(); + *domain = vm_domain_rr_selectdomain(vi->domain); vi->n--; break; case VM_POLICY_ROUND_ROBIN: default: - *domain = vm_domain_rr_selectdomain(); + *domain = vm_domain_rr_selectdomain(-1); vi->n--; break; } return (0); } /* * Returns 1 if the iteration is done, or 0 if it has not. * This can only be called after at least one loop through * the iterator. Ie, it's designed to be used as a tail * check of a loop, not the head check of a loop. */ int vm_domain_iterator_isdone(struct vm_domain_iterator *vi) { return (vi->n <= 0); } int vm_domain_iterator_cleanup(struct vm_domain_iterator *vi) { return (0); } Index: projects/clang380-import/sys =================================================================== --- projects/clang380-import/sys (revision 293686) +++ projects/clang380-import/sys (revision 293687) Property changes on: projects/clang380-import/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r293430-293685 Index: projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND =================================================================== --- projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND (nonexistent) +++ projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND (revision 293687) @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Set to use GCC's stack unwinder (instead of LLVM's libunwind). Property changes on: projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND =================================================================== --- projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND (nonexistent) +++ projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND (revision 293687) @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder). Property changes on: projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_concat/conf.sh =================================================================== --- projects/clang380-import/tools/regression/geom_concat/conf.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_concat/conf.sh (revision 293687) @@ -1,8 +1,15 @@ #!/bin/sh # $FreeBSD$ name="$(mktemp -u concat.XXXXXX)" class="concat" base=`basename $0` +gconcat_test_cleanup() +{ + [ -c /dev/$class/$name ] && gconcat destroy $name + geom_test_cleanup +} +trap gconcat_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh Index: projects/clang380-import/tools/regression/geom_concat/test-1.t =================================================================== --- projects/clang380-import/tools/regression/geom_concat/test-1.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_concat/test-1.t (revision 293687) @@ -1,30 +1,23 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo '1..1' -us=45 +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 - -gconcat create $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gconcat create $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait # Size of created device should be 1MB + 2MB + 3MB. size=`diskinfo /dev/concat/${name} | awk '{print $3}'` if [ $size -eq 6291456 ]; then echo "ok - Size is 6291456" else echo "not ok - Size is 6291456" fi - -gconcat destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` Index: projects/clang380-import/tools/regression/geom_concat/test-2.t =================================================================== --- projects/clang380-import/tools/regression/geom_concat/test-2.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_concat/test-2.t (revision 293687) @@ -1,35 +1,30 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo '1..1' -us=45 tsize=6 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 + dd if=/dev/random of=${src} bs=1m count=$tsize >/dev/null 2>&1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 - -gconcat create $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gconcat create $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait dd if=${src} of=/dev/concat/${name} bs=1m count=$tsize >/dev/null 2>&1 dd if=/dev/concat/${name} of=${dst} bs=1m count=$tsize >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok - md5 checksum comparison" else echo "ok - md5 checksum comparison" fi -gconcat destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_eli/attach-d.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/attach-d.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/attach-d.t (revision 293687) @@ -1,38 +1,38 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..3" dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -P -K $keyfile md${no} geli attach -d -p -k $keyfile md${no} if [ -c /dev/md${no}.eli ]; then echo "ok 1" else echo "not ok 1" fi # Be sure it doesn't detach on read. dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null sleep 1 if [ -c /dev/md${no}.eli ]; then echo "ok 2" else echo "not ok 2" fi true > /dev/md${no}.eli sleep 1 if [ ! -c /dev/md${no}.eli ]; then echo "ok 3" else echo "not ok 3" fi -mdconfig -d -u $no rm -f $keyfile Index: projects/clang380-import/tools/regression/geom_eli/conf.sh =================================================================== --- projects/clang380-import/tools/regression/geom_eli/conf.sh (nonexistent) +++ projects/clang380-import/tools/regression/geom_eli/conf.sh (revision 293687) @@ -0,0 +1,21 @@ +#!/bin/sh +# $FreeBSD$ + +class="eli" +base=`basename $0` + +# We need to use linear probing in order to detect the first available md(4) +# device instead of using mdconfig -a -t, because geli(8) attachs md(4) devices +no=0 +while [ -c /dev/md$no ]; do + : $(( no += 1 )) +done + +geli_test_cleanup() +{ + [ -c /dev/md${no}.eli ] && geli detach md${no}.eli + mdconfig -d -u $no +} +trap geli_test_cleanup ABRT EXIT INT TERM + +. `dirname $0`/../geom_subr.sh Property changes on: projects/clang380-import/tools/regression/geom_eli/conf.sh ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_eli/configure-b-B.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/configure-b-B.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/configure-b-B.t (revision 293687) @@ -1,130 +1,129 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..17" geli init -B none -P -K /dev/null md${no} if [ $? -eq 0 ]; then echo "ok 1" else echo "not ok 1" fi geli dump md${no} | egrep 'flags: 0x0$' >/dev/null if [ $? -eq 0 ]; then echo "ok 2" else echo "not ok 2" fi geli init -B none -b -P -K /dev/null md${no} if [ $? -eq 0 ]; then echo "ok 3" else echo "not ok 3" fi geli dump md${no} | egrep 'flags: 0x2$' >/dev/null if [ $? -eq 0 ]; then echo "ok 4" else echo "not ok 4" fi geli configure -B md${no} if [ $? -eq 0 ]; then echo "ok 5" else echo "not ok 5" fi geli dump md${no} | egrep 'flags: 0x0$' >/dev/null if [ $? -eq 0 ]; then echo "ok 6" else echo "not ok 6" fi geli configure -b md${no} if [ $? -eq 0 ]; then echo "ok 7" else echo "not ok 7" fi geli dump md${no} | egrep 'flags: 0x2$' >/dev/null if [ $? -eq 0 ]; then echo "ok 8" else echo "not ok 8" fi geli attach -p -k /dev/null md${no} if [ $? -eq 0 ]; then echo "ok 9" else echo "not ok 9" fi geli list md${no}.eli | egrep '^Flags: .*BOOT' >/dev/null if [ $? -eq 0 ]; then echo "ok 10" else echo "not ok 10" fi geli configure -B md${no} if [ $? -eq 0 ]; then echo "ok 11" else echo "not ok 11" fi geli list md${no}.eli | egrep '^Flags: .*BOOT' >/dev/null if [ $? -ne 0 ]; then echo "ok 12" else echo "not ok 12" fi geli dump md${no} | egrep 'flags: 0x0$' >/dev/null if [ $? -eq 0 ]; then echo "ok 13" else echo "not ok 13" fi geli configure -b md${no} if [ $? -eq 0 ]; then echo "ok 14" else echo "not ok 14" fi geli list md${no}.eli | egrep '^Flags: .*BOOT' >/dev/null if [ $? -eq 0 ]; then echo "ok 15" else echo "not ok 15" fi geli dump md${no} | egrep 'flags: 0x2$' >/dev/null if [ $? -eq 0 ]; then echo "ok 16" else echo "not ok 16" fi geli detach md${no} if [ $? -eq 0 ]; then echo "ok 17" else echo "not ok 17" fi - -mdconfig -d -u $no Index: projects/clang380-import/tools/regression/geom_eli/delkey.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/delkey.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/delkey.t (revision 293687) @@ -1,140 +1,140 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile3=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile4=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +keyfile2=`mktemp $base.XXXXXX` || exit 1 +keyfile3=`mktemp $base.XXXXXX` || exit 1 +keyfile4=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..14" dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile2} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile3} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile4} bs=512 count=16 >/dev/null 2>&1 geli init -B none -P -K $keyfile1 md${no} geli attach -p -k $keyfile1 md${no} geli setkey -n 1 -P -K $keyfile2 md${no} # Remove key 0 for attached provider. geli delkey -n 0 md${no} if [ $? -eq 0 ]; then echo "ok 1" else echo "not ok 1" fi geli detach md${no} # We cannot use keyfile1 anymore. geli attach -p -k $keyfile1 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 2" else echo "not ok 2" fi # Attach with key 1. geli attach -p -k $keyfile2 md${no} if [ $? -eq 0 ]; then echo "ok 3" else echo "not ok 3" fi # We cannot remove last key without -f option (for attached provider). geli delkey -n 1 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 4" else echo "not ok 4" fi # Remove last key for attached provider. geli delkey -f -n 1 md${no} if [ $? -eq 0 ]; then echo "ok 5" else echo "not ok 5" fi # If there are no valid keys, but provider is attached, we can save situation. geli setkey -n 0 -P -K $keyfile3 md${no} if [ $? -eq 0 ]; then echo "ok 6" else echo "not ok 6" fi geli detach md${no} # We cannot use keyfile2 anymore. geli attach -p -k $keyfile2 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 7" else echo "not ok 7" fi # Attach with key 0. geli attach -p -k $keyfile3 md${no} if [ $? -eq 0 ]; then echo "ok 8" else echo "not ok 8" fi # Setup key 1. geli setkey -n 1 -P -K $keyfile4 md${no} if [ $? -eq 0 ]; then echo "ok 9" else echo "not ok 9" fi geli detach md${no} # Remove key 1 for detached provider. geli delkey -n 1 md${no} if [ $? -eq 0 ]; then echo "ok 10" else echo "not ok 10" fi # We cannot use keyfile4 anymore. geli attach -p -k $keyfile4 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 11" else echo "not ok 11" fi # We cannot remove last key without -f option (for detached provider). geli delkey -n 0 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 12" else echo "not ok 12" fi # Remove last key for detached provider. geli delkey -f -n 0 md${no} if [ $? -eq 0 ]; then echo "ok 13" else echo "not ok 13" fi # We cannot use keyfile3 anymore. geli attach -p -k $keyfile3 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 14" else echo "not ok 14" fi -mdconfig -d -u $no rm -f $keyfile1 $keyfile2 $keyfile3 $keyfile4 Index: projects/clang380-import/tools/regression/geom_eli/detach-l.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/detach-l.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/detach-l.t (revision 293687) @@ -1,44 +1,44 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..4" dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -P -K $keyfile md${no} geli attach -p -k $keyfile md${no} if [ -c /dev/md${no}.eli ]; then echo "ok 1" else echo "not ok 1" fi # Be sure it doesn't detach before 'detach -l'. dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null sleep 1 if [ -c /dev/md${no}.eli ]; then echo "ok 2" else echo "not ok 2" fi geli detach -l md${no} if [ -c /dev/md${no}.eli ]; then echo "ok 3" else echo "not ok 3" fi dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null sleep 1 if [ ! -c /dev/md${no}.eli ]; then echo "ok 4" else echo "not ok 4" fi -mdconfig -d -u $no rm -f $keyfile Index: projects/clang380-import/tools/regression/geom_eli/init-B.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/init-B.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/init-B.t (revision 293687) @@ -1,106 +1,104 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -backupfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +backupfile=`mktemp $base.XXXXXX` || exit 1 echo "1..13" dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 mdconfig -a -t malloc -s $sectors -u $no || exit 1 # -B none rm -f /var/backups/md${no}.eli geli init -B none -P -K $keyfile md${no} 2>/dev/null if [ ! -f /var/backups/md${no}.eli ]; then echo "ok 1 - -B none" else echo "not ok 1 - -B none" fi # no -B rm -f /var/backups/md${no}.eli geli init -P -K $keyfile md${no} >/dev/null 2>&1 if [ -f /var/backups/md${no}.eli ]; then echo "ok 2 - no -B" else echo "not ok 2 - no -B" fi geli clear md${no} geli attach -p -k $keyfile md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 3 - no -B" else echo "not ok 3 - no -B" fi if [ ! -c /dev/md${no}.eli ]; then echo "ok 4 - no -B" else echo "not ok 4 - no -B" fi geli restore /var/backups/md${no}.eli md${no} if [ $? -eq 0 ]; then echo "ok 5 - no -B" else echo "not ok 5 - no -B" fi geli attach -p -k $keyfile md${no} 2>/dev/null if [ $? -eq 0 ]; then echo "ok 6 - no -B" else echo "not ok 6 - no -B" fi if [ -c /dev/md${no}.eli ]; then echo "ok 7 - no -B" else echo "not ok 7 - no -B" fi geli detach md${no} rm -f /var/backups/md${no}.eli # -B file rm -f $backupfile geli init -B $backupfile -P -K $keyfile md${no} >/dev/null 2>&1 if [ -f $backupfile ]; then echo "ok 8 - -B file" else echo "not ok 8 - -B file" fi geli clear md${no} geli attach -p -k $keyfile md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 9 - -B file" else echo "not ok 9 - -B file" fi if [ ! -c /dev/md${no}.eli ]; then echo "ok 10 - -B file" else echo "not ok 10 - -B file" fi geli restore $backupfile md${no} if [ $? -eq 0 ]; then echo "ok 11 - -B file" else echo "not ok 11 - -B file" fi geli attach -p -k $keyfile md${no} 2>/dev/null if [ $? -eq 0 ]; then echo "ok 12 - -B file" else echo "not ok 12 - -B file" fi if [ -c /dev/md${no}.eli ]; then echo "ok 13 - -B file" else echo "not ok 13 - -B file" fi -geli detach md${no} -rm -f $backupfile -mdconfig -d -u $no -rm -f $keyfile +rm -f $backupfile $keyfile Index: projects/clang380-import/tools/regression/geom_eli/init-J.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/init-J.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/init-J.t (revision 293687) @@ -1,126 +1,126 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile0=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -passfile0=`mktemp /tmp/$base.XXXXXX` || exit 1 -passfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile0=`mktemp $base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +passfile0=`mktemp $base.XXXXXX` || exit 1 +passfile1=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..150" dd if=/dev/random of=${keyfile0} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random bs=512 count=16 2>/dev/null | sha1 > ${passfile0} dd if=/dev/random bs=512 count=16 2>/dev/null | sha1 > ${passfile1} i=1 for iter in -1 0 64; do geli init -i ${iter} -B none -J ${passfile0} -P md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli init -i ${iter} -B none -J ${passfile0} -P -K ${keyfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli init -i ${iter} -B none -J ${passfile0} -K ${keyfile0} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -p md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${keyfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${passfile0} -p md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${keyfile0} -k ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${keyfile0} -k ${keyfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${passfile0} -k ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${passfile0} -k ${keyfile0} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${keyfile0} | geli attach -j ${passfile0} -k - md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${passfile0} | geli attach -j - -k ${keyfile0} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli init -i ${iter} -B none -J ${passfile0} -J ${passfile1} -P md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli init -i ${iter} -B none -J ${passfile0} -J ${passfile1} -P -K ${keyfile0} -K ${keyfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli init -i ${iter} -B none -J ${passfile0} -J ${passfile1} -K ${keyfile0} -K ${keyfile1} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -p md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile1} -p md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -k ${keyfile1} -p md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -j ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile1} -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile1} -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -k ${keyfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -k ${keyfile1} -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile1} -k ${keyfile0} -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile0} -k ${keyfile1} -j ${passfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -k ${keyfile1} -k ${keyfile0} -j ${passfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not " echo "ok ${i}"; i=$((i+1)) geli attach -j ${passfile0} -j ${passfile1} -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${passfile0} | geli attach -j - -j ${passfile1} -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${passfile1} | geli attach -j ${passfile0} -j - -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${keyfile0} | geli attach -j ${passfile0} -j ${passfile1} -k - -k ${keyfile1} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${keyfile1} | geli attach -j ${passfile0} -j ${passfile1} -k ${keyfile0} -k - md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${keyfile0} ${keyfile1} | geli attach -j ${passfile0} -j ${passfile1} -k - md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) cat ${passfile0} ${passfile1} | awk '{printf "%s", $0}' | geli attach -j - -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not " echo "ok ${i}"; i=$((i+1)) geli detach md${no} || echo -n "not " echo "ok ${i}"; i=$((i+1)) done -mdconfig -d -u $no rm -f ${keyfile0} ${keyfile1} ${passfile0} ${passfile1} Index: projects/clang380-import/tools/regression/geom_eli/init-a.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/init-a.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/init-a.t (revision 293687) @@ -1,59 +1,60 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 echo "1..1380" i=1 for cipher in aes:0 aes:128 aes:256 \ aes-xts:0 aes-xts:128 aes-xts:256 \ aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \ 3des:0 3des:192 \ 3des-cbc:0 3des-cbc:192 \ blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \ blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \ blowfish:416 blowfish:448 \ blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \ blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \ blowfish-cbc:416 blowfish-cbc:448 \ camellia:0 camellia:128 camellia:192 camellia:256 \ camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do ealgo=${cipher%%:*} keylen=${cipher##*:} for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null geli attach -p -k $keyfile md${no} secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'` dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1 dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5` md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5` if [ ${md_rnd} = ${md_ddev} ]; then echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} rm -f $rnd mdconfig -d -u $no done done done rm -f $keyfile Index: projects/clang380-import/tools/regression/geom_eli/init-i-P.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/init-i-P.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/init-i-P.t (revision 293687) @@ -1,22 +1,22 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..1" dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -i 64 -P -K ${keyfile} md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 1" else echo "not ok 1" fi -mdconfig -d -u $no rm -f $keyfile Index: projects/clang380-import/tools/regression/geom_eli/init.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/init.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/init.t (revision 293687) @@ -1,64 +1,65 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 echo "1..460" i=1 for cipher in aes:0 aes:128 aes:256 \ aes-xts:0 aes-xts:128 aes-xts:256 \ aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \ 3des:0 3des:192 \ 3des-cbc:0 3des-cbc:192 \ blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \ blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \ blowfish:416 blowfish:448 \ blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \ blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \ blowfish-cbc:416 blowfish-cbc:448 \ camellia:0 camellia:128 camellia:192 camellia:256 \ camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do ealgo=${cipher%%:*} keylen=${cipher##*:} for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null geli attach -p -k $keyfile md${no} secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'` dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1 dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5` md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5` md_edev=`dd if=/dev/md${no} bs=${secsize} count=${secs} 2>/dev/null | md5` if [ ${md_rnd} = ${md_ddev} ]; then echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) if [ ${md_rnd} != ${md_edev} ]; then echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} rm -f $rnd mdconfig -d -u $no done done rm -f $keyfile Index: projects/clang380-import/tools/regression/geom_eli/integrity-copy.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/integrity-copy.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/integrity-copy.t (revision 293687) @@ -1,98 +1,99 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -sector=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +sector=`mktemp $base.XXXXXX` || exit 1 echo "1..5520" i=1 for cipher in aes:0 aes:128 aes:256 \ aes-xts:0 aes-xts:128 aes-xts:256 \ aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \ 3des:0 3des:192 \ 3des-cbc:0 3des-cbc:192 \ blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \ blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \ blowfish:416 blowfish:448 \ blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \ blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \ blowfish-cbc:416 blowfish-cbc:448 \ camellia:0 camellia:128 camellia:192 camellia:256 \ camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do ealgo=${cipher%%:*} keylen=${cipher##*:} for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do for secsize in 512 1024 2048 4096 8192; do #mdconfig -a -t malloc -s `expr $secsize \* 2 + 512`b -u $no || exit 1 mdconfig -a -t malloc -s $sectors -u $no || exit 1 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null geli attach -p -k $keyfile md${no} dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1 dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1 if [ $? -eq 0 ]; then echo "ok $i - small 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - small 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} # Copy first small sector to the second small sector. # This should be detected as corruption. dd if=/dev/md${no} of=${sector} bs=512 count=1 >/dev/null 2>&1 dd if=${sector} of=/dev/md${no} bs=512 count=1 seek=1 >/dev/null 2>&1 geli attach -p -k $keyfile md${no} dd if=/dev/md${no}.eli of=/dev/null bs=${secsize} count=1 >/dev/null 2>&1 if [ $? -ne 0 ]; then echo "ok $i - small 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - small 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) ms=`diskinfo /dev/md${no} | awk '{print $3 - 512}'` ns=`diskinfo /dev/md${no}.eli | awk '{print $4}'` usecsize=`echo "($ms / $ns) - (($ms / $ns) % 512)" | bc` dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=2 >/dev/null 2>&1 dd if=/dev/md${no}.eli bs=${secsize} count=2 >/dev/null 2>&1 if [ $? -eq 0 ]; then echo "ok $i - big 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - big 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} # Copy first big sector to the second big sector. # This should be detected as corruption. dd if=/dev/md${no} of=${sector} bs=${usecsize} count=1 >/dev/null 2>&1 dd if=${sector} of=/dev/md${no} bs=${usecsize} count=1 seek=1 >/dev/null 2>&1 geli attach -p -k $keyfile md${no} dd if=/dev/md${no}.eli of=/dev/null bs=${secsize} count=2 >/dev/null 2>&1 if [ $? -ne 0 ]; then echo "ok $i - big 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - big 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} mdconfig -d -u $no done done done rm -f $keyfile $sector Index: projects/clang380-import/tools/regression/geom_eli/integrity-data.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/integrity-data.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/integrity-data.t (revision 293687) @@ -1,68 +1,69 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -sector=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +sector=`mktemp $base.XXXXXX` || exit 1 echo "1..2760" i=1 for cipher in aes:0 aes:128 aes:256 \ aes-xts:0 aes-xts:128 aes-xts:256 \ aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \ 3des:0 3des:192 \ 3des-cbc:0 3des-cbc:192 \ blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \ blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \ blowfish:416 blowfish:448 \ blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \ blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \ blowfish-cbc:416 blowfish-cbc:448 \ camellia:0 camellia:128 camellia:192 camellia:256 \ camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do ealgo=${cipher%%:*} keylen=${cipher##*:} for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do for secsize in 512 1024 2048 4096 8192; do mdconfig -a -t malloc -s `expr $secsize \* 2 + 512`b -u $no || exit 1 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null geli attach -p -k $keyfile md${no} dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1 dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1 if [ $? -eq 0 ]; then echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} # Corrupt 8 bytes of data. dd if=/dev/md${no} of=${sector} bs=512 count=1 >/dev/null 2>&1 dd if=/dev/random of=${sector} bs=1 count=8 seek=64 conv=notrunc >/dev/null 2>&1 dd if=${sector} of=/dev/md${no} bs=512 count=1 >/dev/null 2>&1 geli attach -p -k $keyfile md${no} dd if=/dev/md${no}.eli of=/dev/null bs=${secsize} count=1 >/dev/null 2>&1 if [ $? -ne 0 ]; then echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} mdconfig -d -u $no done done done rm -f $keyfile $sector Index: projects/clang380-import/tools/regression/geom_eli/integrity-hmac.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/integrity-hmac.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/integrity-hmac.t (revision 293687) @@ -1,68 +1,69 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -sector=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +sector=`mktemp $base.XXXXXX` || exit 1 echo "1..2760" i=1 for cipher in aes:0 aes:128 aes:256 \ aes-xts:0 aes-xts:128 aes-xts:256 \ aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \ 3des:0 3des:192 \ 3des-cbc:0 3des-cbc:192 \ blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \ blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \ blowfish:416 blowfish:448 \ blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \ blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \ blowfish-cbc:416 blowfish-cbc:448 \ camellia:0 camellia:128 camellia:192 camellia:256 \ camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do ealgo=${cipher%%:*} keylen=${cipher##*:} for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do for secsize in 512 1024 2048 4096 8192; do mdconfig -a -t malloc -s `expr $secsize \* 2 + 512`b -u $no || exit 1 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null geli attach -p -k $keyfile md${no} dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1 dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1 if [ $? -eq 0 ]; then echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} # Corrupt 8 bytes of HMAC. dd if=/dev/md${no} of=${sector} bs=512 count=1 >/dev/null 2>&1 dd if=/dev/random of=${sector} bs=1 count=16 conv=notrunc >/dev/null 2>&1 dd if=${sector} of=/dev/md${no} bs=512 count=1 >/dev/null 2>&1 geli attach -p -k $keyfile md${no} dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1 if [ $? -ne 0 ]; then echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} mdconfig -d -u $no done done done rm -f $keyfile $sector Index: projects/clang380-import/tools/regression/geom_eli/kill.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/kill.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/kill.t (revision 293687) @@ -1,97 +1,97 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +keyfile2=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..9" dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile2} bs=512 count=16 >/dev/null 2>&1 geli init -B none -P -K $keyfile1 md${no} geli attach -p -k $keyfile1 md${no} geli setkey -n 1 -P -K $keyfile2 md${no} # Kill attached provider. geli kill md${no} if [ $? -eq 0 ]; then echo "ok 1" else echo "not ok 1" fi sleep 1 # Provider should be automatically detached. if [ ! -c /dev/md{$no}.eli ]; then echo "ok 2" else echo "not ok 2" fi # We cannot use keyfile1 anymore. geli attach -p -k $keyfile1 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 3" else echo "not ok 3" fi # We cannot use keyfile2 anymore. geli attach -p -k $keyfile2 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 4" else echo "not ok 4" fi geli init -B none -P -K $keyfile1 md${no} geli setkey -n 1 -p -k $keyfile1 -P -K $keyfile2 md${no} # Should be possible to attach with keyfile1. geli attach -p -k $keyfile1 md${no} if [ $? -eq 0 ]; then echo "ok 5" else echo "not ok 5" fi geli detach md${no} # Should be possible to attach with keyfile2. geli attach -p -k $keyfile2 md${no} if [ $? -eq 0 ]; then echo "ok 6" else echo "not ok 6" fi geli detach md${no} # Kill detached provider. geli kill md${no} if [ $? -eq 0 ]; then echo "ok 7" else echo "not ok 7" fi # We cannot use keyfile1 anymore. geli attach -p -k $keyfile1 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 8" else echo "not ok 8" fi # We cannot use keyfile2 anymore. geli attach -p -k $keyfile2 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 9" else echo "not ok 9" fi -mdconfig -d -u $no rm -f $keyfile1 $keyfile2 Index: projects/clang380-import/tools/regression/geom_eli/nokey.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/nokey.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/nokey.t (revision 293687) @@ -1,65 +1,65 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..8" geli init -B none -P md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 1" else echo "not ok 1" fi dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -P -K ${keyfile} md${no} 2>/dev/null if [ $? -eq 0 ]; then echo "ok 2" else echo "not ok 2" fi geli attach -p md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 3" else echo "not ok 3" fi geli attach -p -k ${keyfile} md${no} 2>/dev/null if [ $? -eq 0 ]; then echo "ok 4" else echo "not ok 4" fi geli setkey -n 0 -P md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 5" else echo "not ok 5" fi geli detach md${no} 2>/dev/null if [ $? -eq 0 ]; then echo "ok 6" else echo "not ok 6" fi geli setkey -n 0 -p -P -K ${keyfile} md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 7" else echo "not ok 7" fi geli setkey -n 0 -p -k ${keyfile} -P md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 8" else echo "not ok 8" fi -mdconfig -d -u $no rm -f $keyfile Index: projects/clang380-import/tools/regression/geom_eli/onetime-a.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/onetime-a.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/onetime-a.t (revision 293687) @@ -1,53 +1,54 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 echo "1..1380" i=1 for cipher in aes:0 aes:128 aes:256 \ aes-xts:0 aes-xts:128 aes-xts:256 \ aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \ 3des:0 3des:192 \ 3des-cbc:0 3des-cbc:192 \ blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \ blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \ blowfish:416 blowfish:448 \ blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \ blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \ blowfish-cbc:416 blowfish-cbc:448 \ camellia:0 camellia:128 camellia:192 camellia:256 \ camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do ealgo=${cipher%%:*} keylen=${cipher##*:} for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1 geli onetime -a $aalgo -e $ealgo -l $keylen -s $secsize md${no} 2>/dev/null secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'` dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1 dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5` md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5` if [ ${md_rnd} = ${md_ddev} ]; then echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} rm -f $rnd mdconfig -d -u $no done done done Index: projects/clang380-import/tools/regression/geom_eli/onetime-d.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/onetime-d.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/onetime-d.t (revision 293687) @@ -1,33 +1,34 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 mdconfig -a -t malloc -s $sectors -u $no || exit 1 echo "1..3" geli onetime -d md${no} if [ -c /dev/md${no}.eli ]; then echo "ok 1" else echo "not ok 1" fi # Be sure it doesn't detach on read. dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null sleep 1 if [ -c /dev/md${no}.eli ]; then echo "ok 2" else echo "not ok 2" fi true > /dev/md${no}.eli sleep 1 if [ ! -c /dev/md${no}.eli ]; then echo "ok 3" else echo "not ok 3" fi mdconfig -d -u $no Index: projects/clang380-import/tools/regression/geom_eli/onetime.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/onetime.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/onetime.t (revision 293687) @@ -1,58 +1,59 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 echo "1..460" i=1 for cipher in aes:0 aes:128 aes:256 \ aes-xts:0 aes-xts:128 aes-xts:256 \ aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \ 3des:0 3des:192 \ 3des-cbc:0 3des-cbc:192 \ blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \ blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \ blowfish:416 blowfish:448 \ blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \ blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \ blowfish-cbc:416 blowfish-cbc:448 \ camellia:0 camellia:128 camellia:192 camellia:256 \ camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do ealgo=${cipher%%:*} keylen=${cipher##*:} for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors`b -u $no || exit 1 geli onetime -e $ealgo -l $keylen -s $secsize md${no} 2>/dev/null secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'` dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1 dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5` md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5` md_edev=`dd if=/dev/md${no} bs=${secsize} count=${secs} 2>/dev/null | md5` if [ ${md_rnd} = ${md_ddev} ]; then echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) if [ ${md_rnd} != ${md_edev} ]; then echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" else echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}" fi i=$((i+1)) geli detach md${no} rm -f $rnd mdconfig -d -u $no done done Index: projects/clang380-import/tools/regression/geom_eli/readonly.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/readonly.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/readonly.t (revision 293687) @@ -1,93 +1,94 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..11" dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 geli init -B none -P -K $keyfile md${no} if [ $? -eq 0 ]; then echo "ok 1" else echo "not ok 1" fi geli attach -r -p -k $keyfile md${no} if [ $? -eq 0 ]; then echo "ok 2" else echo "not ok 2" fi sh -c "true >/dev/md${no}.eli" 2>/dev/null if [ $? -ne 0 ]; then echo "ok 3" else echo "not ok 3" fi geli kill md${no} if [ $? -eq 0 ]; then echo "ok 4" else echo "not ok 4" fi # kill should detach provider... if [ ! -c /dev/md{$no}.eli ]; then echo "ok 5" else echo "not ok 5" fi # ...but not destroy the metadata. geli attach -r -p -k $keyfile md${no} if [ $? -eq 0 ]; then echo "ok 6" else echo "not ok 6" fi geli setkey -n 1 -P -K /dev/null md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 7" else echo "not ok 7" fi geli delkey -n 0 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 8" else echo "not ok 8" fi geli delkey -f -n 0 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 9" else echo "not ok 9" fi geli list md${no}.eli | egrep '^Flags: .*READ-ONLY' >/dev/null if [ $? -eq 0 ]; then echo "ok 10" else echo "not ok 10" fi geli detach md${no} if [ $? -eq 0 ]; then echo "ok 11" else echo "not ok 11" fi mdconfig -d -u $no rm -f $keyfile Index: projects/clang380-import/tools/regression/geom_eli/resize.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/resize.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/resize.t (revision 293687) @@ -1,150 +1,148 @@ -#! /bin/sh -# +#!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + echo 1..27 BLK=512 BLKS_PER_MB=2048 md=$(mdconfig -s40m) || exit 1 unit=${md#md} i=1 setsize() { partszMB=$1 unitszMB=$2 { echo a: $(($partszMB * $BLKS_PER_MB)) 0 4.2BSD 1024 8192 echo c: $(($unitszMB * $BLKS_PER_MB)) 0 unused 0 0 } | disklabel -R $md /dev/stdin } # Initialise -kldload geom_eli >/dev/null 2>&1 - setsize 10 40 || echo -n "not " echo ok $i - "Sized ${md}a to 10m" i=$((i + 1)) echo secret >tmp.key geli init -Bnone -PKtmp.key ${md}a || echo -n "not " echo ok $i - "Initialised geli on ${md}a" i=$((i + 1)) geli attach -pk tmp.key ${md}a || echo -n "not " echo ok $i - "Attached ${md}a as ${md}a.eli" i=$((i + 1)) newfs -U ${md}a.eli >/dev/null || echo -n "not " echo ok $i - "Initialised the filesystem on ${md}a.eli" i=$((i + 1)) out=$(fsck -tufs -y ${md}a.eli) echo "$out" | fgrep -q MODIFIED && echo -n "not " echo ok $i - "fsck says ${md}a.eli is clean," $(echo $(echo "$out" | wc -l)) \ "lines of output" i=$((i + 1)) # Doing a backup, resize & restore must be forced (with -f) as geli # verifies that the provider size in the metadata matches the consumer. geli backup ${md}a tmp.meta || echo -n "not " echo ok $i - "Backed up ${md}a metadata" i=$((i + 1)) geli detach ${md}a.eli || echo -n "not " echo ok $i - "Detached ${md}a.eli" i=$((i + 1)) setsize 20 40 || echo -n "not " echo ok $i - "Sized ${md}a to 20m" i=$((i + 1)) geli attach -pktmp.key ${md}a && echo -n "not " echo ok $i - "Attaching ${md}a fails after resizing the consumer" i=$((i + 1)) geli restore tmp.meta ${md}a && echo -n "not " echo ok $i - "Restoring metadata on ${md}a.eli fails without -f" i=$((i + 1)) geli restore -f tmp.meta ${md}a || echo -n "not " echo ok $i - "Restoring metadata on ${md}a.eli can be forced" i=$((i + 1)) geli attach -pktmp.key ${md}a || echo -n "not " echo ok $i - "Attaching ${md}a is now possible" i=$((i + 1)) growfs -y ${md}a.eli >/dev/null || echo -n "not " echo ok $i - "Extended the filesystem on ${md}a.eli" i=$((i + 1)) out=$(fsck -tufs -y ${md}a.eli) echo "$out" | fgrep -q MODIFIED && echo -n "not " echo ok $i - "fsck says ${md}a.eli is clean," $(echo $(echo "$out" | wc -l)) \ "lines of output" i=$((i + 1)) # Now do the resize properly geli detach ${md}a.eli || echo -n "not " echo ok $i - "Detached ${md}a.eli" i=$((i + 1)) setsize 30 40 || echo -n "not " echo ok $i - "Sized ${md}a to 30m" i=$((i + 1)) geli resize -s20m ${md}a || echo -n "not " echo ok $i - "Resizing works ok" i=$((i + 1)) geli resize -s20m ${md}a && echo -n "not " echo ok $i - "Resizing doesn't work a 2nd time (no old metadata)" i=$((i + 1)) geli attach -pktmp.key ${md}a || echo -n "not " echo ok $i - "Attaching ${md}a works ok" i=$((i + 1)) growfs -y ${md}a.eli >/dev/null || echo -n "not " echo ok $i - "Extended the filesystem on ${md}a.eli" i=$((i + 1)) out=$(fsck -tufs -y ${md}a.eli) echo "$out" | fgrep -q MODIFIED && echo -n "not " echo ok $i - "fsck says ${md}a.eli is clean," $(echo $(echo "$out" | wc -l)) \ "lines of output" i=$((i + 1)) geli detach ${md}a.eli gpart destroy -F $md >/dev/null # Verify that the man page example works, changing ada0 to $md, # 1g to 20m, 2g to 30m and keyfile to tmp.key, and adding -B none # to geli init. gpart create -s GPT $md || echo -n "not " echo ok $i - "Installed a GPT on ${md}" i=$((i + 1)) gpart add -s 20m -t freebsd-ufs -i 1 $md || echo -n "not " echo ok $i - "Added a 20m partition in slot 1" i=$((i + 1)) geli init -B none -K tmp.key -P ${md}p1 || echo -n "not " echo ok $i - "Initialised geli on ${md}p1" i=$((i + 1)) gpart resize -s 30m -i 1 $md || echo -n "not " echo ok $i - "Resized partition ${md}p1 to 30m" i=$((i + 1)) geli resize -s 20m ${md}p1 || echo -n "not " echo ok $i - "Resized geli on ${md}p1 to 30m" i=$((i + 1)) geli attach -k tmp.key -p ${md}p1 || echo -n "not " echo ok $i - "Attached ${md}p1.eli" i=$((i + 1)) geli detach ${md}p1.eli -mdconfig -du$unit rm tmp.* Index: projects/clang380-import/tools/regression/geom_eli/setkey.t =================================================================== --- projects/clang380-import/tools/regression/geom_eli/setkey.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_eli/setkey.t (revision 293687) @@ -1,156 +1,156 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile3=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile4=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile5=`mktemp /tmp/$base.XXXXXX` || exit 1 +rnd=`mktemp $base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +keyfile2=`mktemp $base.XXXXXX` || exit 1 +keyfile3=`mktemp $base.XXXXXX` || exit 1 +keyfile4=`mktemp $base.XXXXXX` || exit 1 +keyfile5=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..16" dd if=/dev/random of=${rnd} bs=512 count=${sectors} >/dev/null 2>&1 hash1=`dd if=${rnd} bs=512 count=${sectors} 2>/dev/null | md5` dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile2} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile3} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile4} bs=512 count=16 >/dev/null 2>&1 dd if=/dev/random of=${keyfile5} bs=512 count=16 >/dev/null 2>&1 geli init -B none -P -K $keyfile1 md${no} geli attach -p -k $keyfile1 md${no} dd if=${rnd} of=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null rm -f $rnd hash2=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5` # Change current key (0) for attached provider. geli setkey -P -K $keyfile2 md${no} if [ $? -eq 0 ]; then echo "ok 1" else echo "not ok 1" fi geli detach md${no} # We cannot use keyfile1 anymore. geli attach -p -k $keyfile1 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 2" else echo "not ok 2" fi # Attach with new key. geli attach -p -k $keyfile2 md${no} if [ $? -eq 0 ]; then echo "ok 3" else echo "not ok 3" fi hash3=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5` # Change key 1 for attached provider. geli setkey -n 1 -P -K $keyfile3 md${no} if [ $? -eq 0 ]; then echo "ok 4" else echo "not ok 4" fi geli detach md${no} # Attach with key 1. geli attach -p -k $keyfile3 md${no} if [ $? -eq 0 ]; then echo "ok 5" else echo "not ok 5" fi hash4=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5` geli detach md${no} # Change current (1) key for detached provider. geli setkey -p -k $keyfile3 -P -K $keyfile4 md${no} if [ $? -eq 0 ]; then echo "ok 6" else echo "not ok 6" fi # We cannot use keyfile3 anymore. geli attach -p -k $keyfile3 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 7" else echo "not ok 7" fi # Attach with key 1. geli attach -p -k $keyfile4 md${no} if [ $? -eq 0 ]; then echo "ok 8" else echo "not ok 8" fi hash5=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5` geli detach md${no} # Change key 0 for detached provider. geli setkey -n 0 -p -k $keyfile4 -P -K $keyfile5 md${no} if [ $? -eq 0 ]; then echo "ok 9" else echo "not ok 9" fi # We cannot use keyfile2 anymore. geli attach -p -k $keyfile2 md${no} 2>/dev/null if [ $? -ne 0 ]; then echo "ok 10" else echo "not ok 10" fi # Attach with key 0. geli attach -p -k $keyfile5 md${no} if [ $? -eq 0 ]; then echo "ok 11" else echo "not ok 11" fi hash6=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5` geli detach md${no} if [ ${hash1} = ${hash2} ]; then echo "ok 12" else echo "not ok 12" fi if [ ${hash1} = ${hash3} ]; then echo "ok 13" else echo "not ok 13" fi if [ ${hash1} = ${hash4} ]; then echo "ok 14" else echo "not ok 14" fi if [ ${hash1} = ${hash5} ]; then echo "ok 15" else echo "not ok 15" fi if [ ${hash1} = ${hash6} ]; then echo "ok 16" else echo "not ok 16" fi -mdconfig -d -u $no rm -f $keyfile1 $keyfile2 $keyfile3 $keyfile4 $keyfile5 Index: projects/clang380-import/tools/regression/geom_gate/runtests.sh =================================================================== --- projects/clang380-import/tools/regression/geom_gate/runtests.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/runtests.sh (nonexistent) @@ -1,8 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -dir=`dirname $0` - -for ts in `dirname $0`/test-*.sh; do - sh $ts -done Property changes on: projects/clang380-import/tools/regression/geom_gate/runtests.sh ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_gate/test-1.sh =================================================================== --- projects/clang380-import/tools/regression/geom_gate/test-1.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/test-1.sh (nonexistent) @@ -1,36 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" -conf=`mktemp /tmp/$base.XXXXXX` || exit 1 - -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` - -echo "127.0.0.1 RW $work" > $conf -ggated $conf -ggatec create -u $us 127.0.0.1 $work - -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 - -if [ `cat $work | md5 -q` != $sum ]; then - echo "FAIL" -else - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo "FAIL" - else - echo "PASS" - fi -fi - -ggatec destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -pkill ggated $conf -rm -f $conf Property changes on: projects/clang380-import/tools/regression/geom_gate/test-1.sh ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_gate/test-2.sh =================================================================== --- projects/clang380-import/tools/regression/geom_gate/test-2.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/test-2.sh (nonexistent) @@ -1,28 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -base=`basename $0` -us=45 -work=`mktemp /tmp/$base.XXXXXX` || exit 1 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 - -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`md5 -q $src` - -ggatel create -u $us $work - -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 - -if [ `md5 -q $work` != $sum ]; then - echo "FAIL" -else - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo "FAIL" - else - echo "PASS" - fi -fi - -ggatel destroy -u $us -rm -f $work $src Property changes on: projects/clang380-import/tools/regression/geom_gate/test-2.sh ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_gate/test-3.sh =================================================================== --- projects/clang380-import/tools/regression/geom_gate/test-3.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/test-3.sh (nonexistent) @@ -1,31 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" - -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` - -ggatel create -u $us $work - -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 - -if [ `cat $work | md5 -q` != $sum ]; then - echo "FAIL" -else - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo "FAIL" - else - echo "PASS" - fi -fi - -ggatel destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` Property changes on: projects/clang380-import/tools/regression/geom_gate/test-3.sh ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_gate/Makefile =================================================================== --- projects/clang380-import/tools/regression/geom_gate/Makefile (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/Makefile (nonexistent) @@ -1,8 +0,0 @@ -# -# $FreeBSD$ -# -# Regression tests for geom_gate. -# - -test: - @sh runtests.sh Property changes on: projects/clang380-import/tools/regression/geom_gate/Makefile ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_gate/test-1.t =================================================================== --- projects/clang380-import/tools/regression/geom_gate/test-1.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/test-1.t (revision 293687) @@ -1,40 +1,62 @@ #!/bin/sh # $FreeBSD$ +. `dirname $0`/conf.sh + +echo '1..2' + base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" -conf=`mktemp /tmp/$base.XXXXXX` || exit 1 +us=0 +while [ -c /dev/ggate${us} ]; do + : $(( us += 1 )) +done +conf=`mktemp $base.XXXXXX` || exit 1 +pidfile=/var/run/ggated.pid +port=33080 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` +work=$(attach_md -t malloc -s 1M) +src=$(attach_md -t malloc -s 1M) -echo "127.0.0.1 RW $work" > $conf -ggated $conf -ggatec create -u $us 127.0.0.1 $work +test_cleanup() +{ + ggatec destroy -f -u $us + pkill -F $pidfile + geom_test_cleanup +} +trap test_cleanup ABRT EXIT INT TERM -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 +dd if=/dev/random of=/dev/$work bs=1m count=1 conv=sync +dd if=/dev/random of=/dev/$src bs=1m count=1 conv=sync +src_checksum=$(md5 -q /dev/$src) -echo '1..2' +echo "127.0.0.1 RW /dev/$work" > $conf -if [ `cat $work | md5 -q` != $sum ]; then - echo 'not ok 1 - md5 checksum' +if ! ggated -p $port $conf; then + echo 'ggated failed to start' + echo 'Bail out!' + exit 1 +fi +sleep 1 +if ! ggatec create -p $port -u $us 127.0.0.1 /dev/$work; then + echo 'ggatec create failed' + echo 'Bail out!' + exit 1 +fi + +dd if=/dev/${src} of=/dev/ggate${us} bs=1m count=1 +sleep 1 + +work_checksum=$(md5 -q /dev/$work) +if [ "$work_checksum" != "$src_checksum" ]; then + echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum)" + echo "not ok 2 # SKIP" else echo 'ok 1 - md5 checksum' - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo 'not ok 2 - md5 checksum' + ggate_checksum=$(md5 -q /dev/ggate${us}) + if [ "$ggate_checksum" != "$src_checksum" ]; then + echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)" else echo 'ok 2 - md5 checksum' fi fi - -ggatec destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -pkill ggated $conf -rm -f $conf Index: projects/clang380-import/tools/regression/geom_gate/test-2.t =================================================================== --- projects/clang380-import/tools/regression/geom_gate/test-2.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/test-2.t (revision 293687) @@ -1,31 +1,48 @@ #!/bin/sh # $FreeBSD$ +. `dirname $0`/conf.sh + base=`basename $0` -us=45 -work=`mktemp /tmp/$base.XXXXXX` || exit 1 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 +us=46 +work=`mktemp -u $base.XXXXXX` || exit 1 +src=`mktemp -u $base.XXXXXX` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`md5 -q $src` +test_cleanup() +{ + ggatel destroy -f -u $us + rm -f $work $src -ggatel create -u $us $work + geom_test_cleanup +} +trap test_cleanup ABRT EXIT INT TERM -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 +dd if=/dev/random of=$work bs=1m count=1 conv=sync +dd if=/dev/random of=$src bs=1m count=1 conv=sync +if ! ggatel create -u $us $work; then + echo 'ggatel create failed' + echo 'Bail out!' + exit 1 +fi + +dd if=${src} of=/dev/ggate${us} bs=1m count=1 +sleep 1 + echo '1..2' -if [ `md5 -q $work` != $sum ]; then - echo 'not ok 1 - md5 checksum' +src_checksum=$(md5 -q $src) +work_checksum=$(md5 -q $work) +if [ "$work_checksum" != "$src_checksum" ]; then + echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum) # TODO: bug 204616" + echo 'not ok 2 # SKIP' else echo 'ok 1 - md5 checksum' - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo 'not ok 2 - md5 checksum' + + ggate_checksum=$(md5 -q /dev/ggate${us}) + if [ "$ggate_checksum" != "$src_checksum" ]; then + echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)" else echo 'ok 2 - md5 checksum' fi fi - -ggatel destroy -u $us -rm -f $work $src Index: projects/clang380-import/tools/regression/geom_gate/test-3.t =================================================================== --- projects/clang380-import/tools/regression/geom_gate/test-3.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_gate/test-3.t (revision 293687) @@ -1,34 +1,48 @@ #!/bin/sh # $FreeBSD$ +. `dirname $0`/conf.sh + base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" +us=47 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` +test_cleanup() +{ + ggatel destroy -f -u $us -ggatel create -u $us $work + geom_test_cleanup +} +trap test_cleanup ABRT EXIT INT TERM -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 +work=$(attach_md -t malloc -s 1M) +src=$(attach_md -t malloc -s 1M) +dd if=/dev/random of=/dev/$work bs=1m count=1 conv=sync +dd if=/dev/random of=/dev/$src bs=1m count=1 conv=sync +src_checksum=$(md5 -q /dev/$src) + +if ! ggatel create -u $us /dev/$work; then + echo 'ggatel create failed' + echo 'Bail out!' + exit 1 +fi + +dd if=/dev/${src} of=/dev/ggate${us} bs=1m count=1 conv=sync +sleep 1 + echo '1..2' -if [ `cat $work | md5 -q` != $sum ]; then - echo 'not ok 1 - md5 checksum' +work_checksum=$(md5 -q /dev/$work) +if [ "$work_checksum" != "$src_checksum" ]; then + echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum)" + echo 'not ok 2 # SKIP' else echo 'ok 1 - md5 checksum' - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo 'not ok 2 - md5 checksum' + + ggate_checksum=$(md5 -q /dev/ggate${us}) + if [ "$ggate_checksum" != "$src_checksum" ]; then + echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)" else echo 'ok 2 - md5 checksum' fi fi - -ggatel destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` Index: projects/clang380-import/tools/regression/geom_nop/conf.sh =================================================================== --- projects/clang380-import/tools/regression/geom_nop/conf.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_nop/conf.sh (revision 293687) @@ -1,7 +1,14 @@ #!/bin/sh # $FreeBSD$ class="nop" base=`basename $0` +gnop_test_cleanup() +{ + [ -c /dev/${us}.nop ] && gnop destroy ${us}.nop + geom_test_cleanup +} +trap gnop_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh Index: projects/clang380-import/tools/regression/geom_nop/test-1.t =================================================================== --- projects/clang380-import/tools/regression/geom_nop/test-1.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_nop/test-1.t (revision 293687) @@ -1,25 +1,20 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us=45 +us=$(attach_md -t malloc -s 1M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 +gnop create /dev/${us} || exit 1 -gnop create /dev/md${us} || exit 1 - # Size of created device should be 1MB. -size=`diskinfo /dev/md${us}.nop | awk '{print $3}'` +size=`diskinfo /dev/${us}.nop | awk '{print $3}'` if [ $size -eq 1048576 ]; then echo "ok 1" else echo "not ok 1" fi - -gnop destroy md${us}.nop -mdconfig -d -u $us Index: projects/clang380-import/tools/regression/geom_nop/test-2.t =================================================================== --- projects/clang380-import/tools/regression/geom_nop/test-2.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_nop/test-2.t (revision 293687) @@ -1,29 +1,26 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh -us=45 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 echo "1..1" dd if=/dev/random of=${src} bs=1m count=1 >/dev/null 2>&1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 +us=$(attach_md -t malloc -s 1M) || exit 1 -gnop create /dev/md${us} || exit 1 +gnop create /dev/${us} || exit 1 -dd if=${src} of=/dev/md${us}.nop bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/md${us}.nop of=${dst} bs=1m count=1 >/dev/null 2>&1 +dd if=${src} of=/dev/${us}.nop bs=1m count=1 >/dev/null 2>&1 +dd if=/dev/${us}.nop of=${dst} bs=1m count=1 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -gnop destroy md${us}.nop -mdconfig -d -u $us rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/conf.sh =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/conf.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/conf.sh (revision 293687) @@ -1,8 +1,15 @@ #!/bin/sh # $FreeBSD$ name="$(mktemp -u graid3.XXXXXX)" class="raid3" base=`basename $0` +graid3_test_cleanup() +{ + [ -c /dev/$class/$name ] && graid3 stop $name + geom_test_cleanup +} +trap graid3_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh Index: projects/clang380-import/tools/regression/geom_raid3/test-1.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-1.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-1.t (revision 293687) @@ -1,37 +1,28 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..2" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us0 || exit 1 -mdconfig -a -t malloc -s 2M -u $us1 || exit 1 -mdconfig -a -t malloc -s 3M -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} 2>/dev/null || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} 2>/dev/null || exit 1 devwait # Size of created device should be 2MB - 1024B. mediasize=`diskinfo /dev/raid3/${name} | awk '{print $3}'` if [ $mediasize -eq 2096128 ]; then echo "ok 1" else echo "not ok 1" fi sectorsize=`diskinfo /dev/raid3/${name} | awk '{print $2}'` if [ $sectorsize -eq 1024 ]; then echo "ok 2" else echo "not ok 2" fi - -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 Index: projects/clang380-import/tools/regression/geom_raid3/test-10.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-10.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-10.t (revision 293687) @@ -1,39 +1,32 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label -r $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label -r $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-11.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-11.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-11.t (revision 293687) @@ -1,39 +1,32 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label -w $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label -w $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-12.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-12.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-12.t (revision 293687) @@ -1,46 +1,38 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` nblocks1=9 nblocks2=`expr $nblocks1 - 1` nblocks3=`expr $nblocks2 / 2` -mdconfig -a -t malloc -s $nblocks1 -u $us0 || exit 1 -mdconfig -a -t malloc -s $nblocks1 -u $us1 || exit 1 -mdconfig -a -t malloc -s $nblocks1 -u $us2 || exit 1 +us0=$(attach_md -t malloc -s $nblocks1) || exit 1 +us1=$(attach_md -t malloc -s $nblocks1) || exit 1 +us2=$(attach_md -t malloc -s $nblocks1) || exit 1 -dd if=/dev/random of=/dev/md${us0} count=$nblocks1 >/dev/null 2>&1 -dd if=/dev/random of=/dev/md${us1} count=$nblocks1 >/dev/null 2>&1 -dd if=/dev/random of=/dev/md${us2} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us0} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us1} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us2} count=$nblocks1 >/dev/null 2>&1 -graid3 label -w $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label -w $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # Wait for synchronization. sleep 2 graid3 stop $name # Break one component. -dd if=/dev/random of=/dev/md${us1} count=$nblocks2 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us1} count=$nblocks2 >/dev/null 2>&1 # Provoke retaste of the rest components. -true > /dev/md${us0} -true > /dev/md${us2} +true > /dev/${us0} +true > /dev/${us2} sleep 1 dd if=/dev/raid3/${name} of=/dev/null bs=1k count=$nblocks3 >/dev/null 2>&1 ec=$? if [ $ec -eq 0 ]; then echo "not ok 1" else echo "ok 1" fi - -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 Index: projects/clang380-import/tools/regression/geom_raid3/test-2.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-2.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-2.t (revision 293687) @@ -1,39 +1,32 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-3.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-3.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-3.t (revision 293687) @@ -1,43 +1,36 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 # # Reading without one DATA component (so with parity). # graid3 remove -n 1 $name dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-4.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-4.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-4.t (revision 293687) @@ -1,43 +1,36 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # # Writing without one DATA component. # graid3 remove -n 1 $name dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-5.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-5.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-5.t (revision 293687) @@ -1,43 +1,36 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # # Writing without PARITY component. # graid3 remove -n 2 $name dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-6.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-6.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-6.t (revision 293687) @@ -1,47 +1,40 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 # # Rebuild of DATA component. # graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 graid3 insert -n 1 $name md${us1} sleep 1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-7.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-7.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-7.t (revision 293687) @@ -1,50 +1,43 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 # # Rebuild of PARITY component. # graid3 remove -n 2 $name -dd if=/dev/zero of=/dev/md${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 graid3 insert -n 2 $name md${us2} sleep 1 # Remove DATA component, so PARITY component can be used while reading. graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-8.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-8.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-8.t (revision 293687) @@ -1,46 +1,39 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # # Writing without DATA component and rebuild of DATA component. # graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 graid3 insert -n 1 $name md${us1} sleep 1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_raid3/test-9.t =================================================================== --- projects/clang380-import/tools/regression/geom_raid3/test-9.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_raid3/test-9.t (revision 293687) @@ -1,49 +1,42 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 + dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # # Writing without PARITY component and rebuild of PARITY component. # graid3 remove -n 2 $name -dd if=/dev/zero of=/dev/md${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 graid3 insert -n 2 $name md${us2} sleep 1 # Remove DATA component, so PARITY component can be used while reading. graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_shsec/conf.sh =================================================================== --- projects/clang380-import/tools/regression/geom_shsec/conf.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_shsec/conf.sh (revision 293687) @@ -1,8 +1,15 @@ #!/bin/sh # $FreeBSD$ name="$(mktemp -u shsec.XXXXXX)" class="shsec" base=`basename $0` +shsec_test_cleanup() +{ + [ -c /dev/$class/$name ] && gshsec stop $name + geom_test_cleanup +} +trap shsec_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh Index: projects/clang380-import/tools/regression/geom_shsec/test-1.t =================================================================== --- projects/clang380-import/tools/regression/geom_shsec/test-1.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_shsec/test-1.t (revision 293687) @@ -1,37 +1,28 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..2" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us0 || exit 1 -mdconfig -a -t malloc -s 2M -u $us1 || exit 1 -mdconfig -a -t malloc -s 3M -u $us2 || exit 1 - -gshsec label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} 2>/dev/null || exit 1 +gshsec label $name /dev/${us0} /dev/${us1} /dev/${us2} 2>/dev/null || exit 1 devwait # Size of created device should be 1MB - 512B. mediasize=`diskinfo /dev/shsec/${name} | awk '{print $3}'` if [ $mediasize -eq 1048064 ]; then echo "ok 1" else echo "not ok 1" fi sectorsize=`diskinfo /dev/shsec/${name} | awk '{print $2}'` if [ $sectorsize -eq 512 ]; then echo "ok 2" else echo "not ok 2" fi - -gshsec stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 Index: projects/clang380-import/tools/regression/geom_shsec/test-2.t =================================================================== --- projects/clang380-import/tools/regression/geom_shsec/test-2.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_shsec/test-2.t (revision 293687) @@ -1,59 +1,52 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..4" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` nblocks1=1024 nblocks2=`expr $nblocks1 + 1` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} count=$nblocks1 >/dev/null 2>&1 -mdconfig -a -t malloc -s $nblocks2 -u $us0 || exit 1 -mdconfig -a -t malloc -s $nblocks2 -u $us1 || exit 1 -mdconfig -a -t malloc -s $nblocks2 -u $us2 || exit 1 +us0=$(attach_md -t malloc -s $nblocks2) || exit 1 +us1=$(attach_md -t malloc -s $nblocks2) || exit 1 +us2=$(attach_md -t malloc -s $nblocks2) || exit 1 -gshsec label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gshsec label $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait dd if=${src} of=/dev/shsec/${name} count=$nblocks1 >/dev/null 2>&1 dd if=/dev/shsec/${name} of=${dst} count=$nblocks1 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -dd if=/dev/md${us0} of=${dst} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/${us0} of=${dst} count=$nblocks1 >/dev/null 2>&1 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -dd if=/dev/md${us1} of=${dst} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/${us1} of=${dst} count=$nblocks1 >/dev/null 2>&1 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then echo "not ok 3" else echo "ok 3" fi -dd if=/dev/md${us2} of=${dst} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/${us2} of=${dst} count=$nblocks1 >/dev/null 2>&1 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then echo "not ok 4" else echo "ok 4" fi -gshsec stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_stripe/conf.sh =================================================================== --- projects/clang380-import/tools/regression/geom_stripe/conf.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_stripe/conf.sh (revision 293687) @@ -1,8 +1,15 @@ #!/bin/sh # $FreeBSD$ name="$(mktemp -u stripe.XXXXXX)" class="stripe" base=`basename $0` +gstripe_test_cleanup() +{ + [ -c /dev/$class/$name ] && gstripe destroy $name + geom_test_cleanup +} +trap gstripe_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh Index: projects/clang380-import/tools/regression/geom_stripe/test-1.t =================================================================== --- projects/clang380-import/tools/regression/geom_stripe/test-1.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_stripe/test-1.t (revision 293687) @@ -1,30 +1,23 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us=45 +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 - -gstripe create -s 16384 $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gstripe create -s 16384 $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait # Size of created device should be 1MB * 3. size=`diskinfo /dev/stripe/${name} | awk '{print $3}'` if [ $size -eq 3145728 ]; then echo "ok 1" else echo "not ok 1" fi - -gstripe destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` Index: projects/clang380-import/tools/regression/geom_stripe/test-2.t =================================================================== --- projects/clang380-import/tools/regression/geom_stripe/test-2.t (revision 293686) +++ projects/clang380-import/tools/regression/geom_stripe/test-2.t (revision 293687) @@ -1,35 +1,30 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us=45 tsize=3 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=1m count=$tsize >/dev/null 2>&1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -gstripe create -s 8192 $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gstripe create -s 8192 $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait dd if=${src} of=/dev/stripe/${name} bs=1m count=$tsize >/dev/null 2>&1 dd if=/dev/stripe/${name} of=${dst} bs=1m count=$tsize >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -gstripe destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` rm -f ${src} ${dst} Index: projects/clang380-import/tools/regression/geom_subr.sh =================================================================== --- projects/clang380-import/tools/regression/geom_subr.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_subr.sh (revision 293687) @@ -1,49 +1,60 @@ #!/bin/sh # $FreeBSD$ -if [ $(id -u) -ne 0 ]; then - echo 'Tests must be run as root' - echo 'Bail out!' - exit 1 -fi -kldstat -q -m g_${class} || geom ${class} load || exit 1 - devwait() { while :; do if [ -c /dev/${class}/${name} ]; then return fi sleep 0.2 done } -# Need to keep track of the test md devices to avoid the scenario where a test -# failing will cause the other tests to bomb out, or a test failing will leave -# a large number of md(4) devices lingering around -: ${TMPDIR=/tmp} -export TMPDIR -TEST_MDS_FILE=$(mktemp ${TMPDIR}/test_mds.XXXXXX) || exit 1 - attach_md() { local test_md test_md=$(mdconfig -a "$@") || exit echo $test_md >> $TEST_MDS_FILE || exit echo $test_md } geom_test_cleanup() { local test_md - if [ -f $TEST_MDS_FILE ]; then + if [ -f "$TEST_MDS_FILE" ]; then while read test_md; do # The "#" tells the TAP parser this is a comment echo "# Removing test memory disk: $test_md" mdconfig -d -u $test_md done < $TEST_MDS_FILE fi - rm -f $TEST_MDS_FILE + rm -f "$TEST_MDS_FILE" } + +if [ $(id -u) -ne 0 ]; then + echo 'Tests must be run as root' + echo 'Bail out!' + exit 1 +fi +# If the geom class isn't already loaded, try loading it. +if ! kldstat -q -m g_${class}; then + if ! geom ${class} load; then + echo "Could not load module for geom class=${class}" + echo 'Bail out!' + exit 1 + fi +fi + +# Need to keep track of the test md devices to avoid the scenario where a test +# failing will cause the other tests to bomb out, or a test failing will leave +# a large number of md(4) devices lingering around +: ${TMPDIR=/tmp} +export TMPDIR +if ! TEST_MDS_FILE=$(mktemp ${TMPDIR}/test_mds.XXXXXX); then + echo 'Failed to create temporary file for tracking the test md(4) devices' + echo 'Bail out!' + exit 1 +fi Index: projects/clang380-import/tools/regression/geom_uzip/runtests.sh =================================================================== --- projects/clang380-import/tools/regression/geom_uzip/runtests.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_uzip/runtests.sh (nonexistent) @@ -1,10 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -dir=`dirname $0` - -for ts in `dirname $0`/test-*.sh; do - sh $ts -done Property changes on: projects/clang380-import/tools/regression/geom_uzip/runtests.sh ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_uzip/test-1.sh =================================================================== --- projects/clang380-import/tools/regression/geom_uzip/test-1.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_uzip/test-1.sh (nonexistent) @@ -1,36 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -mntpoint="/mnt/test-1" - -# -# prepare -kldload geom_uzip -uudecode test-1.img.uzip.uue -num=`mdconfig -an -f test-1.img.uzip` || exit 1 -sleep 1 - -# -# mount -mkdir -p "${mntpoint}" -mount -o ro /dev/md${num}.uzip "${mntpoint}" || exit 1 - -# -# compare -#cat "${mntpoint}/etalon.txt" -diff -u etalon/etalon.txt "${mntpoint}/etalon.txt" -if [ $? -eq 0 ]; then - echo "PASS" -else - echo "FAIL" -fi - -# -# cleanup -umount "${mntpoint}" -rmdir "${mntpoint}" -mdconfig -d -u ${num} -sleep 1 -kldunload geom_uzip Property changes on: projects/clang380-import/tools/regression/geom_uzip/test-1.sh ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_uzip/test-2.sh =================================================================== --- projects/clang380-import/tools/regression/geom_uzip/test-2.sh (revision 293686) +++ projects/clang380-import/tools/regression/geom_uzip/test-2.sh (nonexistent) @@ -1,15 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -# -# prepare -kldload geom_uzip -uudecode test-1.img.uzip.uue -num=`mdconfig -an -f test-1.img.uzip` || exit 1 -sleep 1 - -# -# destroy -kldunload geom_uzip Property changes on: projects/clang380-import/tools/regression/geom_uzip/test-2.sh ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_uzip/Makefile =================================================================== --- projects/clang380-import/tools/regression/geom_uzip/Makefile (revision 293686) +++ projects/clang380-import/tools/regression/geom_uzip/Makefile (revision 293687) @@ -1,23 +1,23 @@ # # $FreeBSD$ # # Regression test for geom_ugz. # IMAGE= test-1.img ZIMAGE= ${IMAGE}.uzip UZIMAGE= ${ZIMAGE}.uue test: - @sh runtests.sh + prove -rv ./test-1.t image: makefs -s 1048576 ${IMAGE} etalon printf "#\n# $$" >${UZIMAGE} printf "FreeBSD$$\n#\n\n" >> ${UZIMAGE} mkuzip -o ${ZIMAGE} ${IMAGE} uuencode ${ZIMAGE} ${ZIMAGE} >>${UZIMAGE} rm ${ZIMAGE} clean: rm -f ${IMAGE} ${ZIMAGE} Index: projects/clang380-import/tools/regression/geom_uzip/conf.sh =================================================================== --- projects/clang380-import/tools/regression/geom_uzip/conf.sh (nonexistent) +++ projects/clang380-import/tools/regression/geom_uzip/conf.sh (revision 293687) @@ -0,0 +1,20 @@ +#!/bin/sh +# $FreeBSD$ + +class="uzip" +base=`basename $0` + +uzip_test_cleanup() +{ + if [ -n "$mntpoint" ]; then + umount $mntpoint + rmdir $mntpoint + fi + geom_test_cleanup +} +trap uzip_test_cleanup ABRT EXIT INT TERM + +. `dirname $0`/../geom_subr.sh + +# NOTE: make sure $TMPDIR has been set by geom_subr.sh if unset [by kyua, etc] +mntpoint=$(mktemp -d tmp.XXXXXX) || exit Property changes on: projects/clang380-import/tools/regression/geom_uzip/conf.sh ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang380-import/tools/regression/geom_uzip/test-1.t =================================================================== --- projects/clang380-import/tools/regression/geom_uzip/test-1.t (nonexistent) +++ projects/clang380-import/tools/regression/geom_uzip/test-1.t (revision 293687) @@ -0,0 +1,22 @@ +#!/bin/sh +# $FreeBSD$ + +testsdir=$(dirname $0) +. $testsdir/conf.sh + +echo "1..1" + +UUE=$testsdir/test-1.img.uzip.uue +uudecode $UUE +us0=$(attach_md -f $(basename $UUE .uue)) || exit 1 +sleep 1 + +mount -o ro /dev/${us0}.uzip "${mntpoint}" || exit 1 + +#cat "${mntpoint}/etalon.txt" +diff -I '\$FreeBSD.*\$' -u $testsdir/etalon/etalon.txt "${mntpoint}/etalon.txt" +if [ $? -eq 0 ]; then + echo "ok 1" +else + echo "not ok 1" +fi Property changes on: projects/clang380-import/tools/regression/geom_uzip/test-1.t ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: projects/clang380-import/usr.bin/netstat/mbuf.c =================================================================== --- projects/clang380-import/usr.bin/netstat/mbuf.c (revision 293686) +++ projects/clang380-import/usr.bin/netstat/mbuf.c (revision 293687) @@ -1,339 +1,359 @@ /*- * Copyright (c) 1983, 1988, 1993 * The Regents of the University of California. * Copyright (c) 2005 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)mbuf.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" /* * Print mbuf statistics. */ void mbpr(void *kvmd, u_long mbaddr) { struct memory_type_list *mtlp; struct memory_type *mtp; uintmax_t mbuf_count, mbuf_bytes, mbuf_free, mbuf_failures, mbuf_size; uintmax_t mbuf_sleeps; uintmax_t cluster_count, cluster_limit, cluster_free; uintmax_t cluster_failures, cluster_size, cluster_sleeps; uintmax_t packet_count, packet_bytes, packet_free, packet_failures; uintmax_t packet_sleeps; uintmax_t tag_bytes; uintmax_t jumbop_count, jumbop_limit, jumbop_free; uintmax_t jumbop_failures, jumbop_sleeps, jumbop_size; uintmax_t jumbo9_count, jumbo9_limit, jumbo9_free; uintmax_t jumbo9_failures, jumbo9_sleeps, jumbo9_size; uintmax_t jumbo16_count, jumbo16_limit, jumbo16_free; uintmax_t jumbo16_failures, jumbo16_sleeps, jumbo16_size; uintmax_t bytes_inuse, bytes_incache, bytes_total; int nsfbufs, nsfbufspeak, nsfbufsused; struct sfstat sfstat; size_t mlen; int error; mtlp = memstat_mtl_alloc(); if (mtlp == NULL) { xo_warn("memstat_mtl_alloc"); return; } /* * Use memstat_*_all() because some mbuf-related memory is in uma(9), * and some malloc(9). */ if (live) { if (memstat_sysctl_all(mtlp, 0) < 0) { xo_warnx("memstat_sysctl_all: %s", memstat_strerror(memstat_mtl_geterror(mtlp))); goto out; } } else { if (memstat_kvm_all(mtlp, kvmd) < 0) { error = memstat_mtl_geterror(mtlp); if (error == MEMSTAT_ERROR_KVM) xo_warnx("memstat_kvm_all: %s", kvm_geterr(kvmd)); else xo_warnx("memstat_kvm_all: %s", memstat_strerror(error)); goto out; } } mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_MEM_NAME); if (mtp == NULL) { xo_warnx("memstat_mtl_find: zone %s not found", MBUF_MEM_NAME); goto out; } mbuf_count = memstat_get_count(mtp); mbuf_bytes = memstat_get_bytes(mtp); mbuf_free = memstat_get_free(mtp); mbuf_failures = memstat_get_failures(mtp); mbuf_sleeps = memstat_get_sleeps(mtp); mbuf_size = memstat_get_size(mtp); mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_PACKET_MEM_NAME); if (mtp == NULL) { xo_warnx("memstat_mtl_find: zone %s not found", MBUF_PACKET_MEM_NAME); goto out; } packet_count = memstat_get_count(mtp); packet_bytes = memstat_get_bytes(mtp); packet_free = memstat_get_free(mtp); packet_sleeps = memstat_get_sleeps(mtp); packet_failures = memstat_get_failures(mtp); mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_CLUSTER_MEM_NAME); if (mtp == NULL) { xo_warnx("memstat_mtl_find: zone %s not found", MBUF_CLUSTER_MEM_NAME); goto out; } cluster_count = memstat_get_count(mtp); cluster_limit = memstat_get_countlimit(mtp); cluster_free = memstat_get_free(mtp); cluster_failures = memstat_get_failures(mtp); cluster_sleeps = memstat_get_sleeps(mtp); cluster_size = memstat_get_size(mtp); mtp = memstat_mtl_find(mtlp, ALLOCATOR_MALLOC, MBUF_TAG_MEM_NAME); if (mtp == NULL) { xo_warnx("memstat_mtl_find: malloc type %s not found", MBUF_TAG_MEM_NAME); goto out; } tag_bytes = memstat_get_bytes(mtp); mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBOP_MEM_NAME); if (mtp == NULL) { xo_warnx("memstat_mtl_find: zone %s not found", MBUF_JUMBOP_MEM_NAME); goto out; } jumbop_count = memstat_get_count(mtp); jumbop_limit = memstat_get_countlimit(mtp); jumbop_free = memstat_get_free(mtp); jumbop_failures = memstat_get_failures(mtp); jumbop_sleeps = memstat_get_sleeps(mtp); jumbop_size = memstat_get_size(mtp); mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO9_MEM_NAME); if (mtp == NULL) { xo_warnx("memstat_mtl_find: zone %s not found", MBUF_JUMBO9_MEM_NAME); goto out; } jumbo9_count = memstat_get_count(mtp); jumbo9_limit = memstat_get_countlimit(mtp); jumbo9_free = memstat_get_free(mtp); jumbo9_failures = memstat_get_failures(mtp); jumbo9_sleeps = memstat_get_sleeps(mtp); jumbo9_size = memstat_get_size(mtp); mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO16_MEM_NAME); if (mtp == NULL) { xo_warnx("memstat_mtl_find: zone %s not found", MBUF_JUMBO16_MEM_NAME); goto out; } jumbo16_count = memstat_get_count(mtp); jumbo16_limit = memstat_get_countlimit(mtp); jumbo16_free = memstat_get_free(mtp); jumbo16_failures = memstat_get_failures(mtp); jumbo16_sleeps = memstat_get_sleeps(mtp); jumbo16_size = memstat_get_size(mtp); xo_open_container("mbuf-statistics"); xo_emit("{:mbuf-current/%ju}/{:mbuf-cache/%ju}/{:mbuf-total/%ju} " "{N:mbufs in use (current\\/cache\\/total)}\n", mbuf_count + packet_count, mbuf_free + packet_free, mbuf_count + packet_count + mbuf_free + packet_free); xo_emit("{:cluster-current/%ju}/{:cluster-cache/%ju}/" "{:cluster-total/%ju}/{:cluster-max/%ju} " "{N:mbuf clusters in use (current\\/cache\\/total\\/max)}\n", cluster_count - packet_free, cluster_free + packet_free, cluster_count + cluster_free, cluster_limit); xo_emit("{:packet-count/%ju}/{:packet-free/%ju} " "{N:mbuf+clusters out of packet secondary zone in use " "(current\\/cache)}\n", packet_count, packet_free); xo_emit("{:jumbo-count/%ju}/{:jumbo-cache/%ju}/{:jumbo-total/%ju}/" "{:jumbo-max/%ju} {:jumbo-page-size/%ju}{U:k} {N:(page size)} " "{N:jumbo clusters in use (current\\/cache\\/total\\/max)}\n", jumbop_count, jumbop_free, jumbop_count + jumbop_free, jumbop_limit, jumbop_size / 1024); xo_emit("{:jumbo9-count/%ju}/{:jumbo9-cache/%ju}/" "{:jumbo9-total/%ju}/{:jumbo9-max/%ju} " "{N:9k jumbo clusters in use (current\\/cache\\/total\\/max)}\n", jumbo9_count, jumbo9_free, jumbo9_count + jumbo9_free, jumbo9_limit); xo_emit("{:jumbo16-count/%ju}/{:jumbo16-cache/%ju}/" "{:jumbo16-total/%ju}/{:jumbo16-limit/%ju} " "{N:16k jumbo clusters in use (current\\/cache\\/total\\/max)}\n", jumbo16_count, jumbo16_free, jumbo16_count + jumbo16_free, jumbo16_limit); #if 0 xo_emit("{:tag-count/%ju} {N:mbuf tags in use}\n", tag_count); #endif /*- * Calculate in-use bytes as: * - straight mbuf memory * - mbuf memory in packets * - the clusters attached to packets * - and the rest of the non-packet-attached clusters. * - m_tag memory * This avoids counting the clusters attached to packets in the cache. * This currently excludes sf_buf space. */ bytes_inuse = mbuf_bytes + /* straight mbuf memory */ packet_bytes + /* mbufs in packets */ (packet_count * cluster_size) + /* clusters in packets */ /* other clusters */ ((cluster_count - packet_count - packet_free) * cluster_size) + tag_bytes + (jumbop_count * jumbop_size) + /* jumbo clusters */ (jumbo9_count * jumbo9_size) + (jumbo16_count * jumbo16_size); /* * Calculate in-cache bytes as: * - cached straught mbufs * - cached packet mbufs * - cached packet clusters * - cached straight clusters * This currently excludes sf_buf space. */ bytes_incache = (mbuf_free * mbuf_size) + /* straight free mbufs */ (packet_free * mbuf_size) + /* mbufs in free packets */ (packet_free * cluster_size) + /* clusters in free packets */ (cluster_free * cluster_size) + /* free clusters */ (jumbop_free * jumbop_size) + /* jumbo clusters */ (jumbo9_free * jumbo9_size) + (jumbo16_free * jumbo16_size); /* * Total is bytes in use + bytes in cache. This doesn't take into * account various other misc data structures, overhead, etc, but * gives the user something useful despite that. */ bytes_total = bytes_inuse + bytes_incache; xo_emit("{:bytes-in-use/%ju}{U:K}/{:bytes-in-cache/%ju}{U:K}/" "{:bytes-total/%ju}{U:K} " "{N:bytes allocated to network (current\\/cache\\/total)}\n", bytes_inuse / 1024, bytes_incache / 1024, bytes_total / 1024); xo_emit("{:mbuf-failures/%ju}/{:cluster-failures/%ju}/" "{:packet-failures/%ju} {N:requests for mbufs denied " "(mbufs\\/clusters\\/mbuf+clusters)}\n", mbuf_failures, cluster_failures, packet_failures); xo_emit("{:mbuf-sleeps/%ju}/{:cluster-sleeps/%ju}/{:packet-sleeps/%ju} " "{N:requests for mbufs delayed " "(mbufs\\/clusters\\/mbuf+clusters)}\n", mbuf_sleeps, cluster_sleeps, packet_sleeps); xo_emit("{:jumbop-sleeps/%ju}/{:jumbo9-sleeps/%ju}/" "{:jumbo16-sleeps/%ju} {N:/requests for jumbo clusters delayed " "(%juk\\/9k\\/16k)}\n", jumbop_sleeps, jumbo9_sleeps, jumbo16_sleeps, jumbop_size / 1024); xo_emit("{:jumbop-failures/%ju}/{:jumbo9-failures/%ju}/" "{:jumbo16-failures/%ju} {N:/requests for jumbo clusters denied " "(%juk\\/9k\\/16k)}\n", jumbop_failures, jumbo9_failures, jumbo16_failures, jumbop_size / 1024); mlen = sizeof(nsfbufs); if (live && sysctlbyname("kern.ipc.nsfbufs", &nsfbufs, &mlen, NULL, 0) == 0 && sysctlbyname("kern.ipc.nsfbufsused", &nsfbufsused, &mlen, NULL, 0) == 0 && sysctlbyname("kern.ipc.nsfbufspeak", &nsfbufspeak, &mlen, NULL, 0) == 0) xo_emit("{:nsfbufs-current/%d}/{:nsfbufs-peak/%d}/" "{:nsfbufs/%d} " "{N:sfbufs in use (current\\/peak\\/max)}\n", nsfbufsused, nsfbufspeak, nsfbufs); if (fetch_stats("kern.ipc.sfstat", mbaddr, &sfstat, sizeof(sfstat), kread_counters) != 0) goto out; + xo_emit("{:sendfile-syscalls/%ju} {N:sendfile syscalls}\n", + (uintmax_t)sfstat.sf_syscalls); + xo_emit("{:sendfile-no-io/%ju} " + "{N:sendfile syscalls completed without I\\/O request}\n", + (uintmax_t)sfstat.sf_noiocnt); + xo_emit("{:sendfile-io-count/%ju} " + "{N:requests for I\\/O initiated by sendfile}\n", + (uintmax_t)sfstat.sf_iocnt); + xo_emit("{:sendfile-pages-sent/%ju} " + "{N:pages read by sendfile as part of a request}\n", + (uintmax_t)sfstat.sf_pages_read); + xo_emit("{:sendfile-pages-valid/%ju} " + "{N:pages were valid at time of a sendfile request}\n", + (uintmax_t)sfstat.sf_pages_valid); + xo_emit("{:sendfile-requested-readahead/%ju} " + "{N:pages were requested for read ahead by applications}\n", + (uintmax_t)sfstat.sf_rhpages_requested); + xo_emit("{:sendfile-readahead/%ju} " + "{N:pages were read ahead by sendfile}\n", + (uintmax_t)sfstat.sf_rhpages_read); + xo_emit("{:sendfile-busy-encounters/%ju} " + "{N:times sendfile encountered an already busy page}\n", + (uintmax_t)sfstat.sf_busy); xo_emit("{:sfbufs-alloc-failed/%ju} {N:requests for sfbufs denied}\n", (uintmax_t)sfstat.sf_allocfail); xo_emit("{:sfbufs-alloc-wait/%ju} {N:requests for sfbufs delayed}\n", (uintmax_t)sfstat.sf_allocwait); - xo_emit("{:sfbufs-io-count/%ju} " - "{N:requests for I\\/O initiated by sendfile}\n", - (uintmax_t)sfstat.sf_iocnt); out: xo_close_container("mbuf-statistics"); memstat_mtl_free(mtlp); } Index: projects/clang380-import/usr.sbin/bhyve/pci_virtio_net.c =================================================================== --- projects/clang380-import/usr.sbin/bhyve/pci_virtio_net.c (revision 293686) +++ projects/clang380-import/usr.sbin/bhyve/pci_virtio_net.c (revision 293687) @@ -1,730 +1,972 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include +#ifndef NETMAP_WITH_LIBS +#define NETMAP_WITH_LIBS +#endif +#include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "pci_emul.h" #include "mevent.h" #include "virtio.h" #define VTNET_RINGSZ 1024 #define VTNET_MAXSEGS 256 /* * Host capabilities. Note that we only offer a few of these. */ #define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */ #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */ #define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ #define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */ #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */ #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */ #define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */ #define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */ #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */ #define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */ #define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */ #define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */ #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */ #define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */ #define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */ #define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */ #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */ #define VIRTIO_NET_F_GUEST_ANNOUNCE \ (1 << 21) /* guest can send gratuitous pkts */ #define VTNET_S_HOSTCAPS \ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) /* * PCI config-space "registers" */ struct virtio_net_config { uint8_t mac[6]; uint16_t status; } __packed; /* * Queue definitions. */ #define VTNET_RXQ 0 #define VTNET_TXQ 1 #define VTNET_CTLQ 2 /* NB: not yet supported */ #define VTNET_MAXQ 3 /* * Fixed network header size */ struct virtio_net_rxhdr { uint8_t vrh_flags; uint8_t vrh_gso_type; uint16_t vrh_hdr_len; uint16_t vrh_gso_size; uint16_t vrh_csum_start; uint16_t vrh_csum_offset; uint16_t vrh_bufs; } __packed; /* * Debug printf */ static int pci_vtnet_debug; #define DPRINTF(params) if (pci_vtnet_debug) printf params #define WPRINTF(params) printf params /* * Per-device softc */ struct pci_vtnet_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; pthread_mutex_t vsc_mtx; struct mevent *vsc_mevp; int vsc_tapfd; + struct nm_desc *vsc_nmd; + int vsc_rx_ready; volatile int resetting; /* set and checked outside lock */ uint64_t vsc_features; /* negotiated features */ struct virtio_net_config vsc_config; pthread_mutex_t rx_mtx; int rx_in_progress; int rx_vhdrlen; int rx_merge; /* merged rx bufs in use */ pthread_t tx_tid; pthread_mutex_t tx_mtx; pthread_cond_t tx_cond; int tx_in_progress; + + void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc); + void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov, + int iovcnt, int len); }; static void pci_vtnet_reset(void *); /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ static int pci_vtnet_cfgread(void *, int, int, uint32_t *); static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); static void pci_vtnet_neg_features(void *, uint64_t); static struct virtio_consts vtnet_vi_consts = { "vtnet", /* our name */ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ sizeof(struct virtio_net_config), /* config reg size */ pci_vtnet_reset, /* reset */ NULL, /* device-wide qnotify -- not used */ pci_vtnet_cfgread, /* read PCI config */ pci_vtnet_cfgwrite, /* write PCI config */ pci_vtnet_neg_features, /* apply negotiated features */ VTNET_S_HOSTCAPS, /* our capabilities */ }; /* * If the transmit thread is active then stall until it is done. */ static void pci_vtnet_txwait(struct pci_vtnet_softc *sc) { pthread_mutex_lock(&sc->tx_mtx); while (sc->tx_in_progress) { pthread_mutex_unlock(&sc->tx_mtx); usleep(10000); pthread_mutex_lock(&sc->tx_mtx); } pthread_mutex_unlock(&sc->tx_mtx); } /* * If the receive thread is active then stall until it is done. */ static void pci_vtnet_rxwait(struct pci_vtnet_softc *sc) { pthread_mutex_lock(&sc->rx_mtx); while (sc->rx_in_progress) { pthread_mutex_unlock(&sc->rx_mtx); usleep(10000); pthread_mutex_lock(&sc->rx_mtx); } pthread_mutex_unlock(&sc->rx_mtx); } static void pci_vtnet_reset(void *vsc) { struct pci_vtnet_softc *sc = vsc; DPRINTF(("vtnet: device reset requested !\n")); sc->resetting = 1; /* * Wait for the transmit and receive threads to finish their * processing. */ pci_vtnet_txwait(sc); pci_vtnet_rxwait(sc); sc->vsc_rx_ready = 0; sc->rx_merge = 1; sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); /* now reset rings, MSI-X vectors, and negotiated capabilities */ vi_reset_dev(&sc->vsc_vs); sc->resetting = 0; } /* * Called to send a buffer chain out to the tap device */ static void pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, int len) { static char pad[60]; /* all zero bytes */ if (sc->vsc_tapfd == -1) return; /* * If the length is < 60, pad out to that and add the * extra zero'd segment to the iov. It is guaranteed that * there is always an extra iov available by the caller. */ if (len < 60) { iov[iovcnt].iov_base = pad; iov[iovcnt].iov_len = 60 - len; iovcnt++; } (void) writev(sc->vsc_tapfd, iov, iovcnt); } /* * Called when there is read activity on the tap file descriptor. * Each buffer posted by the guest is assumed to be able to contain * an entire ethernet frame + rx header. * MP note: the dummybuf is only used for discarding frames, so there * is no need for it to be per-vtnet or locked. */ static uint8_t dummybuf[2048]; static __inline struct iovec * rx_iov_trim(struct iovec *iov, int *niov, int tlen) { struct iovec *riov; /* XXX short-cut: assume first segment is >= tlen */ assert(iov[0].iov_len >= tlen); iov[0].iov_len -= tlen; if (iov[0].iov_len == 0) { assert(*niov > 1); *niov -= 1; riov = &iov[1]; } else { iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); riov = &iov[0]; } return (riov); } static void pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) { struct iovec iov[VTNET_MAXSEGS], *riov; struct vqueue_info *vq; void *vrx; int len, n; uint16_t idx; /* * Should never be called without a valid tap fd */ assert(sc->vsc_tapfd != -1); /* * But, will be called when the rx ring hasn't yet * been set up or the guest is resetting the device. */ if (!sc->vsc_rx_ready || sc->resetting) { /* * Drop the packet and try later. */ (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); return; } /* * Check for available rx buffers */ vq = &sc->vsc_queues[VTNET_RXQ]; if (!vq_has_descs(vq)) { /* * Drop the packet and try later. Interrupt on * empty, if that's negotiated. */ (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); vq_endchains(vq, 1); return; } do { /* * Get descriptor chain. */ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); assert(n >= 1 && n <= VTNET_MAXSEGS); /* * Get a pointer to the rx header, and use the * data immediately following it for the packet buffer. */ vrx = iov[0].iov_base; riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); len = readv(sc->vsc_tapfd, riov, n); if (len < 0 && errno == EWOULDBLOCK) { /* * No more packets, but still some avail ring * entries. Interrupt if needed/appropriate. */ vq_retchain(vq); vq_endchains(vq, 0); return; } /* * The only valid field in the rx packet header is the * number of buffers if merged rx bufs were negotiated. */ memset(vrx, 0, sc->rx_vhdrlen); if (sc->rx_merge) { struct virtio_net_rxhdr *vrxh; vrxh = vrx; vrxh->vrh_bufs = 1; } /* * Release this chain and handle more chains. */ vq_relchain(vq, idx, len + sc->rx_vhdrlen); } while (vq_has_descs(vq)); /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ vq_endchains(vq, 1); } +static int +pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) +{ + int r, i; + int len = 0; + + for (r = nmd->cur_tx_ring; ; ) { + struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); + uint32_t cur, idx; + char *buf; + + if (nm_ring_empty(ring)) { + r++; + if (r > nmd->last_tx_ring) + r = nmd->first_tx_ring; + if (r == nmd->cur_rx_ring) + break; + continue; + } + cur = ring->cur; + idx = ring->slot[cur].buf_idx; + buf = NETMAP_BUF(ring, idx); + + for (i = 0; i < iovcnt; i++) { + memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); + len += iov[i].iov_len; + } + ring->slot[cur].len = len; + ring->head = ring->cur = nm_ring_next(ring, cur); + nmd->cur_tx_ring = r; + ioctl(nmd->fd, NIOCTXSYNC, NULL); + break; + } + + return (len); +} + +static inline int +pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) +{ + int len = 0; + int i = 0; + int r; + + for (r = nmd->cur_rx_ring; ; ) { + struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); + uint32_t cur, idx; + char *buf; + size_t left; + + if (nm_ring_empty(ring)) { + r++; + if (r > nmd->last_rx_ring) + r = nmd->first_rx_ring; + if (r == nmd->cur_rx_ring) + break; + continue; + } + cur = ring->cur; + idx = ring->slot[cur].buf_idx; + buf = NETMAP_BUF(ring, idx); + left = ring->slot[cur].len; + + for (i = 0; i < iovcnt && left > 0; i++) { + if (iov[i].iov_len > left) + iov[i].iov_len = left; + memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); + len += iov[i].iov_len; + left -= iov[i].iov_len; + } + ring->head = ring->cur = nm_ring_next(ring, cur); + nmd->cur_rx_ring = r; + ioctl(nmd->fd, NIOCRXSYNC, NULL); + break; + } + for (; i < iovcnt; i++) + iov[i].iov_len = 0; + + return (len); +} + +/* + * Called to send a buffer chain out to the vale port + */ static void -pci_vtnet_tap_callback(int fd, enum ev_type type, void *param) +pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, + int len) { + static char pad[60]; /* all zero bytes */ + + if (sc->vsc_nmd == NULL) + return; + + /* + * If the length is < 60, pad out to that and add the + * extra zero'd segment to the iov. It is guaranteed that + * there is always an extra iov available by the caller. + */ + if (len < 60) { + iov[iovcnt].iov_base = pad; + iov[iovcnt].iov_len = 60 - len; + iovcnt++; + } + (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt); +} + +static void +pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) +{ + struct iovec iov[VTNET_MAXSEGS], *riov; + struct vqueue_info *vq; + void *vrx; + int len, n; + uint16_t idx; + + /* + * Should never be called without a valid netmap descriptor + */ + assert(sc->vsc_nmd != NULL); + + /* + * But, will be called when the rx ring hasn't yet + * been set up or the guest is resetting the device. + */ + if (!sc->vsc_rx_ready || sc->resetting) { + /* + * Drop the packet and try later. + */ + (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); + return; + } + + /* + * Check for available rx buffers + */ + vq = &sc->vsc_queues[VTNET_RXQ]; + if (!vq_has_descs(vq)) { + /* + * Drop the packet and try later. Interrupt on + * empty, if that's negotiated. + */ + (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); + vq_endchains(vq, 1); + return; + } + + do { + /* + * Get descriptor chain. + */ + n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); + assert(n >= 1 && n <= VTNET_MAXSEGS); + + /* + * Get a pointer to the rx header, and use the + * data immediately following it for the packet buffer. + */ + vrx = iov[0].iov_base; + riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); + + len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n); + + if (len == 0) { + /* + * No more packets, but still some avail ring + * entries. Interrupt if needed/appropriate. + */ + vq_endchains(vq, 0); + return; + } + + /* + * The only valid field in the rx packet header is the + * number of buffers if merged rx bufs were negotiated. + */ + memset(vrx, 0, sc->rx_vhdrlen); + + if (sc->rx_merge) { + struct virtio_net_rxhdr *vrxh; + + vrxh = vrx; + vrxh->vrh_bufs = 1; + } + + /* + * Release this chain and handle more chains. + */ + vq_relchain(vq, idx, len + sc->rx_vhdrlen); + } while (vq_has_descs(vq)); + + /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ + vq_endchains(vq, 1); +} + +static void +pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) +{ struct pci_vtnet_softc *sc = param; pthread_mutex_lock(&sc->rx_mtx); sc->rx_in_progress = 1; - pci_vtnet_tap_rx(sc); + sc->pci_vtnet_rx(sc); sc->rx_in_progress = 0; pthread_mutex_unlock(&sc->rx_mtx); } static void pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * A qnotify means that the rx process can now begin */ if (sc->vsc_rx_ready == 0) { sc->vsc_rx_ready = 1; vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; } } static void pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) { struct iovec iov[VTNET_MAXSEGS + 1]; int i, n; int plen, tlen; uint16_t idx; /* * Obtain chain of descriptors. The first one is * really the header descriptor, so we need to sum * up two lengths: packet length and transfer length. */ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); assert(n >= 1 && n <= VTNET_MAXSEGS); plen = 0; tlen = iov[0].iov_len; for (i = 1; i < n; i++) { plen += iov[i].iov_len; tlen += iov[i].iov_len; } DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n)); - pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen); + sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen); /* chain is processed, release it and set tlen */ vq_relchain(vq, idx, tlen); } static void pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * Any ring entries to process? */ if (!vq_has_descs(vq)) return; /* Signal the tx thread for processing */ pthread_mutex_lock(&sc->tx_mtx); vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; if (sc->tx_in_progress == 0) pthread_cond_signal(&sc->tx_cond); pthread_mutex_unlock(&sc->tx_mtx); } /* * Thread which will handle processing of TX desc */ static void * pci_vtnet_tx_thread(void *param) { struct pci_vtnet_softc *sc = param; struct vqueue_info *vq; int error; vq = &sc->vsc_queues[VTNET_TXQ]; /* * Let us wait till the tx queue pointers get initialised & * first tx signaled */ pthread_mutex_lock(&sc->tx_mtx); error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); for (;;) { /* note - tx mutex is locked here */ while (sc->resetting || !vq_has_descs(vq)) { vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY; mb(); if (!sc->resetting && vq_has_descs(vq)) break; sc->tx_in_progress = 0; error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); } vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; sc->tx_in_progress = 1; pthread_mutex_unlock(&sc->tx_mtx); do { /* * Run through entries, placing them into * iovecs and sending when an end-of-packet * is found */ pci_vtnet_proctx(sc, vq); } while (vq_has_descs(vq)); /* * Generate an interrupt if needed. */ vq_endchains(vq, 1); pthread_mutex_lock(&sc->tx_mtx); } } #ifdef notyet static void pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) { DPRINTF(("vtnet: control qnotify!\n\r")); } #endif static int pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr) { struct ether_addr *ea; char *tmpstr; char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; tmpstr = strsep(&mac_str,"="); if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { ea = ether_aton(mac_str); if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { fprintf(stderr, "Invalid MAC %s\n", mac_str); return (EINVAL); } else memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); } return (0); } +static void +pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname) +{ + char tbuf[80]; + strcpy(tbuf, "/dev/"); + strlcat(tbuf, devname, sizeof(tbuf)); + + sc->pci_vtnet_rx = pci_vtnet_tap_rx; + sc->pci_vtnet_tx = pci_vtnet_tap_tx; + + sc->vsc_tapfd = open(tbuf, O_RDWR); + if (sc->vsc_tapfd == -1) { + WPRINTF(("open of tap device %s failed\n", tbuf)); + return; + } + + /* + * Set non-blocking and register for read + * notifications with the event loop + */ + int opt = 1; + if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { + WPRINTF(("tap device O_NONBLOCK failed\n")); + close(sc->vsc_tapfd); + sc->vsc_tapfd = -1; + } + + sc->vsc_mevp = mevent_add(sc->vsc_tapfd, + EVF_READ, + pci_vtnet_rx_callback, + sc); + if (sc->vsc_mevp == NULL) { + WPRINTF(("Could not register event\n")); + close(sc->vsc_tapfd); + sc->vsc_tapfd = -1; + } +} + +static void +pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) +{ + sc->pci_vtnet_rx = pci_vtnet_netmap_rx; + sc->pci_vtnet_tx = pci_vtnet_netmap_tx; + + sc->vsc_nmd = nm_open(ifname, NULL, 0, 0); + if (sc->vsc_nmd == NULL) { + WPRINTF(("open of netmap device %s failed\n", ifname)); + return; + } + + sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd, + EVF_READ, + pci_vtnet_rx_callback, + sc); + if (sc->vsc_mevp == NULL) { + WPRINTF(("Could not register event\n")); + nm_close(sc->vsc_nmd); + sc->vsc_nmd = NULL; + } +} + static int pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { MD5_CTX mdctx; unsigned char digest[16]; char nstr[80]; char tname[MAXCOMLEN + 1]; struct pci_vtnet_softc *sc; char *devname; char *vtopts; int mac_provided; sc = calloc(1, sizeof(struct pci_vtnet_softc)); pthread_mutex_init(&sc->vsc_mtx, NULL); vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues); sc->vsc_vs.vs_mtx = &sc->vsc_mtx; sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; #ifdef notyet sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; #endif /* * Attempt to open the tap device and read the MAC address * if specified */ mac_provided = 0; sc->vsc_tapfd = -1; + sc->vsc_nmd = NULL; if (opts != NULL) { - char tbuf[80]; int err; devname = vtopts = strdup(opts); (void) strsep(&vtopts, ","); if (vtopts != NULL) { err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac); if (err != 0) { free(devname); return (err); } mac_provided = 1; } - strcpy(tbuf, "/dev/"); - strlcat(tbuf, devname, sizeof(tbuf)); + if (strncmp(devname, "vale", 4) == 0) + pci_vtnet_netmap_setup(sc, devname); + if (strncmp(devname, "tap", 3) == 0 || + strncmp(devname, "vmnet", 5) == 0) + pci_vtnet_tap_setup(sc, devname); free(devname); - - sc->vsc_tapfd = open(tbuf, O_RDWR); - if (sc->vsc_tapfd == -1) { - WPRINTF(("open of tap device %s failed\n", tbuf)); - } else { - /* - * Set non-blocking and register for read - * notifications with the event loop - */ - int opt = 1; - if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { - WPRINTF(("tap device O_NONBLOCK failed\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } - - sc->vsc_mevp = mevent_add(sc->vsc_tapfd, - EVF_READ, - pci_vtnet_tap_callback, - sc); - if (sc->vsc_mevp == NULL) { - WPRINTF(("Could not register event\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } - } } /* * The default MAC address is the standard NetApp OUI of 00-a0-98, * followed by an MD5 of the PCI slot/func number and dev name */ if (!mac_provided) { snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, pi->pi_func, vmname); MD5Init(&mdctx); MD5Update(&mdctx, nstr, strlen(nstr)); MD5Final(digest, &mdctx); sc->vsc_config.mac[0] = 0x00; sc->vsc_config.mac[1] = 0xa0; sc->vsc_config.mac[2] = 0x98; sc->vsc_config.mac[3] = digest[0]; sc->vsc_config.mac[4] = digest[1]; sc->vsc_config.mac[5] = digest[2]; } /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); /* Link is up if we managed to open tap device. */ sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0); /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) return (1); /* use BAR 0 to map config regs in IO space */ vi_set_io_bar(&sc->vsc_vs, 0); sc->resetting = 0; sc->rx_merge = 1; sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); sc->rx_in_progress = 0; pthread_mutex_init(&sc->rx_mtx, NULL); /* * Initialize tx semaphore & spawn TX processing thread. * As of now, only one thread for TX desc processing is * spawned. */ sc->tx_in_progress = 0; pthread_mutex_init(&sc->tx_mtx, NULL); pthread_cond_init(&sc->tx_cond, NULL); pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, pi->pi_func); pthread_set_name_np(sc->tx_tid, tname); return (0); } static int pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) { struct pci_vtnet_softc *sc = vsc; void *ptr; if (offset < 6) { assert(offset + size <= 6); /* * The driver is allowed to change the MAC address */ ptr = &sc->vsc_config.mac[offset]; memcpy(ptr, &value, size); } else { /* silently ignore other writes */ DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); } return (0); } static int pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtnet_softc *sc = vsc; void *ptr; ptr = (uint8_t *)&sc->vsc_config + offset; memcpy(retval, ptr, size); return (0); } static void pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtnet_softc *sc = vsc; sc->vsc_features = negotiated_features; if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) { sc->rx_merge = 0; /* non-merge rx header is 2 bytes shorter */ sc->rx_vhdrlen -= 2; } } struct pci_devemu pci_de_vnet = { .pe_emu = "virtio-net", .pe_init = pci_vtnet_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vnet); Index: projects/clang380-import/usr.sbin/bhyve =================================================================== --- projects/clang380-import/usr.sbin/bhyve (revision 293686) +++ projects/clang380-import/usr.sbin/bhyve (revision 293687) Property changes on: projects/clang380-import/usr.sbin/bhyve ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/usr.sbin/bhyve:r293016-293685 Index: projects/clang380-import/usr.sbin/bsdconfig/share/dialog.subr =================================================================== --- projects/clang380-import/usr.sbin/bsdconfig/share/dialog.subr (revision 293686) +++ projects/clang380-import/usr.sbin/bsdconfig/share/dialog.subr (revision 293687) @@ -1,2341 +1,2340 @@ if [ ! "$_DIALOG_SUBR" ]; then _DIALOG_SUBR=1 # # Copyright (c) 2006-2015 Devin Teske # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # ############################################################ INCLUDES BSDCFG_SHARE="/usr/share/bsdconfig" . $BSDCFG_SHARE/common.subr || exit 1 f_dprintf "%s: loading includes..." dialog.subr f_include $BSDCFG_SHARE/strings.subr f_include $BSDCFG_SHARE/variable.subr BSDCFG_LIBE="/usr/libexec/bsdconfig" f_include_lang $BSDCFG_LIBE/include/messages.subr ############################################################ CONFIGURATION # # Default file descriptor to link to stdout for dialog(1) passthru allowing # execution of dialog from within a sub-shell (so-long as its standard output # is explicitly redirected to this file descriptor). # : ${DIALOG_TERMINAL_PASSTHRU_FD:=${TERMINAL_STDOUT_PASSTHRU:-3}} ############################################################ GLOBALS # # Default name of dialog(1) utility # NOTE: This is changed to "Xdialog" by the optional `-X' argument # DIALOG="dialog" # # Default dialog(1) title and backtitle text # DIALOG_TITLE="$pgm" DIALOG_BACKTITLE="bsdconfig" # # Settings used while interacting with dialog(1) # DIALOG_MENU_TAGS="123456789ABCDEFGHIJKLMNOPQRSTUVWYZabcdefghijklmnopqrstuvwxyz" # # Declare that we are fully-compliant with Xdialog(1) by unset'ing all # compatibility settings. # unset XDIALOG_HIGH_DIALOG_COMPAT unset XDIALOG_FORCE_AUTOSIZE unset XDIALOG_INFOBOX_TIMEOUT # # Exit codes for [X]dialog(1) # DIALOG_OK=${SUCCESS:-0} DIALOG_CANCEL=${FAILURE:-1} DIALOG_HELP=2 DIALOG_ITEM_HELP=2 DIALOG_EXTRA=3 DIALOG_ITEM_HELP=4 export DIALOG_ERROR=254 # sh(1) can't handle the default of `-1' DIALOG_ESC=255 # # Default behavior is to call f_dialog_init() automatically when loaded. # : ${DIALOG_SELF_INITIALIZE=1} # # Default terminal size (used if/when running without a controlling terminal) # : ${DEFAULT_TERMINAL_SIZE:=24 80} # # Minimum width(s) for various dialog(1) implementations (sensible global # default(s) for all widgets of a given variant) # : ${DIALOG_MIN_WIDTH:=24} : ${XDIALOG_MIN_WIDTH:=35} # # When manually sizing Xdialog(1) widgets such as calendar and timebox, you'll # need to know the size of the embedded GUI objects because the height passed # to Xdialog(1) for these widgets has to be tall enough to accomodate them. # # These values are helpful when manually sizing with dialog(1) too, but in a # different way. dialog(1) does not make you accomodate the custom items in the # height (but does for width) -- a height of 3 will display three lines and a # full calendar, for example (whereas Xdialog will truncate the calendar if # given a height of 3). For dialog(1), use these values for making sure that # the height does not exceed max_height (obtained by f_dialog_max_size()). # DIALOG_CALENDAR_HEIGHT=15 DIALOG_TIMEBOX_HEIGHT=6 ############################################################ GENERIC FUNCTIONS # f_dialog_data_sanitize $var_to_edit ... # # When using dialog(1) or Xdialog(1) sometimes unintended warnings or errors # are generated from underlying libraries. For example, if $LANG is set to an # invalid or unknown locale, the warnings from the Xdialog(1) libraries will # clutter the output. This function helps by providing a centralied function # that removes spurious warnings from the dialog(1) (or Xdialog(1)) response. # # Simply pass the name of one or more variables that need to be sanitized. # After execution, the variables will hold their newly-sanitized data. # f_dialog_data_sanitize() { if [ "$#" -eq 0 ]; then f_dprintf "%s: called with zero arguments" \ f_dialog_response_sanitize return $FAILURE fi local __var_to_edit for __var_to_edit in $*; do # Skip warnings and trim leading/trailing whitespace setvar $__var_to_edit "$( f_getvar $__var_to_edit | awk ' BEGIN { data = 0 } { if ( ! data ) { if ( $0 ~ /^$/ ) next if ( $0 ~ /^Gdk-WARNING \*\*:/ ) next data = 1 } print } ' )" done } # f_dialog_line_sanitize $var_to_edit ... # # When using dialog(1) or Xdialog(1) sometimes unintended warnings or errors # are generated from underlying libraries. For example, if $LANG is set to an # invalid or unknown locale, the warnings from the Xdialog(1) libraries will # clutter the output. This function helps by providing a centralied function # that removes spurious warnings from the dialog(1) (or Xdialog(1)) response. # # Simply pass the name of one or more variables that need to be sanitized. # After execution, the variables will hold their newly-sanitized data. # # This function, unlike f_dialog_data_sanitize(), also removes leading/trailing # whitespace from each line. # f_dialog_line_sanitize() { if [ "$#" -eq 0 ]; then f_dprintf "%s: called with zero arguments" \ f_dialog_response_sanitize return $FAILURE fi local __var_to_edit for __var_to_edit in $*; do # Skip warnings and trim leading/trailing whitespace setvar $__var_to_edit "$( f_getvar $__var_to_edit | awk ' BEGIN { data = 0 } { if ( ! data ) { if ( $0 ~ /^$/ ) next if ( $0 ~ /^Gdk-WARNING \*\*:/ ) next data = 1 } sub(/^[[:space:]]*/, "") sub(/[[:space:]]*$/, "") print } ' )" done } ############################################################ TITLE FUNCTIONS # f_dialog_title [$new_title] # # Set the title of future dialog(1) ($DIALOG_TITLE) or backtitle of Xdialog(1) # ($DIALOG_BACKTITLE) invocations. If no arguments are given or the first # argument is NULL, the current title is returned. # # Each time this function is called, a backup of the current values is made # allowing a one-time (single-level) restoration of the previous title using # the f_dialog_title_restore() function (below). # f_dialog_title() { local new_title="$1" if [ "${1+set}" ]; then if [ "$USE_XDIALOG" ]; then _DIALOG_BACKTITLE="$DIALOG_BACKTITLE" DIALOG_BACKTITLE="$new_title" else _DIALOG_TITLE="$DIALOG_TITLE" DIALOG_TITLE="$new_title" fi else if [ "$USE_XDIALOG" ]; then echo "$DIALOG_BACKTITLE" else echo "$DIALOG_TITLE" fi fi } # f_dialog_title_restore # # Restore the previous title set by the last call to f_dialog_title(). # Restoration is non-recursive and only works to restore the most-recent title. # f_dialog_title_restore() { if [ "$USE_XDIALOG" ]; then DIALOG_BACKTITLE="$_DIALOG_BACKTITLE" else DIALOG_TITLE="$_DIALOG_TITLE" fi } # f_dialog_backtitle [$new_backtitle] # # Set the backtitle of future dialog(1) ($DIALOG_BACKTITLE) or title of # Xdialog(1) ($DIALOG_TITLE) invocations. If no arguments are given or the # first argument is NULL, the current backtitle is returned. # f_dialog_backtitle() { local new_backtitle="$1" if [ "${1+set}" ]; then if [ "$USE_XDIALOG" ]; then _DIALOG_TITLE="$DIALOG_TITLE" DIALOG_TITLE="$new_backtitle" else _DIALOG_BACKTITLE="$DIALOG_BACKTITLE" DIALOG_BACKTITLE="$new_backtitle" fi else if [ "$USE_XDIALOG" ]; then echo "$DIALOG_TITLE" else echo "$DIALOG_BACKTITLE" fi fi } # f_dialog_backtitle_restore # # Restore the previous backtitle set by the last call to f_dialog_backtitle(). # Restoration is non-recursive and only works to restore the most-recent # backtitle. # f_dialog_backtitle_restore() { if [ "$USE_XDIALOG" ]; then DIALOG_TITLE="$_DIALOG_TITLE" else DIALOG_BACKTITLE="$_DIALOG_BACKTITLE" fi } ############################################################ SIZE FUNCTIONS # f_dialog_max_size $var_height $var_width # # Get the maximum height and width for a dialog widget and store the values in # $var_height and $var_width (respectively). # f_dialog_max_size() { local funcname=f_dialog_max_size local __var_height="$1" __var_width="$2" __max_size [ "$__var_height" -o "$__var_width" ] || return $FAILURE if [ "$USE_XDIALOG" ]; then __max_size="$XDIALOG_MAXSIZE" # see CONFIGURATION else if __max_size=$( $DIALOG --print-maxsize \ 2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD ) then f_dprintf "$funcname: %s --print-maxsize = [%s]" \ "$DIALOG" "$__max_size" # usually "MaxSize: 24, 80" __max_size="${__max_size#*: }" f_replaceall "$__max_size" "," "" __max_size else f_eval_catch -dk __max_size $funcname stty \ 'stty size' || __max_size= # usually "24 80" fi : ${__max_size:=$DEFAULT_TERMINAL_SIZE} fi if [ "$__var_height" ]; then local __height="${__max_size%%[$IFS]*}" # # If we're not using Xdialog(1), we should assume that $DIALOG # will render --backtitle behind the widget. In such a case, we # should prevent a widget from obscuring the backtitle (unless # $NO_BACKTITLE is set and non-NULL, allowing a trap-door). # if [ ! "$USE_XDIALOG" ] && [ ! "$NO_BACKTITLE" ]; then # # If use_shadow (in ~/.dialogrc) is OFF, we need to # subtract 4, otherwise 5. However, don't check this # every time, rely on an initialization variable set # by f_dialog_init(). # local __adjust=5 [ "$NO_SHADOW" ] && __adjust=4 # Don't adjust height if already too small (allowing # obscured backtitle for small values of __height). [ ${__height:-0} -gt 11 ] && __height=$(( $__height - $__adjust )) fi setvar "$__var_height" "$__height" fi [ "$__var_width" ] && setvar "$__var_width" "${__max_size##*[$IFS]}" } # f_dialog_size_constrain $var_height $var_width [$min_height [$min_width]] # # Modify $var_height to be no-less-than $min_height (if given; zero otherwise) # and no-greater-than terminal height (or screen height if $USE_XDIALOG is # set). # # Also modify $var_width to be no-less-than $XDIALOG_MIN_WIDTH (or # $XDIALOG_MIN_WIDTH if $_USE_XDIALOG is set) and no-greater-than terminal # or screen width. The use of $[X]DIALOG_MIN_WIDTH can be overridden by # passing $min_width. # # Return status is success unless one of the passed arguments is invalid # or all of the $var_* arguments are either NULL or missing. # f_dialog_size_constrain() { local __var_height="$1" __var_width="$2" local __min_height="$3" __min_width="$4" local __retval=$SUCCESS # Return failure unless at least one var_* argument is passed [ "$__var_height" -o "$__var_width" ] || return $FAILURE # # Print debug warnings if any given (non-NULL) argument are invalid # NOTE: Don't change the name of $__{var,min,}{height,width} # local __height __width local __arg __cp __fname=f_dialog_size_constrain for __arg in height width; do debug= f_getvar __var_$__arg __cp [ "$__cp" ] || continue if ! debug= f_getvar "$__cp" __$__arg; then f_dprintf "%s: var_%s variable \`%s' not set" \ $__fname $__arg "$__cp" __retval=$FAILURE elif ! eval f_isinteger \$__$__arg; then f_dprintf "%s: var_%s variable value not a number" \ $__fname $__arg __retval=$FAILURE fi done for __arg in height width; do debug= f_getvar __min_$__arg __cp [ "$__cp" ] || continue f_isinteger "$__cp" && continue f_dprintf "%s: min_%s value not a number" $__fname $__arg __retval=$FAILURE setvar __min_$__arg "" done # Obtain maximum height and width values # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __max_height_size_constain __max_width_size_constrain f_dialog_max_size \ __max_height_size_constrain __max_width_size_constrain # Adjust height if desired if [ "$__var_height" ]; then if [ $__height -lt ${__min_height:-0} ]; then setvar "$__var_height" $__min_height elif [ $__height -gt $__max_height_size_constrain ]; then setvar "$__var_height" $__max_height_size_constrain fi fi # Adjust width if desired if [ "$__var_width" ]; then if [ "$USE_XDIALOG" ]; then : ${__min_width:=${XDIALOG_MIN_WIDTH:-35}} else : ${__min_width:=${DIALOG_MIN_WIDTH:-24}} fi if [ $__width -lt $__min_width ]; then setvar "$__var_width" $__min_width elif [ $__width -gt $__max_width_size_constrain ]; then setvar "$__var_width" $__max_width_size_constrain fi fi if [ "$debug" ]; then # Print final constrained values to debugging [ "$__var_height" ] && f_quietly f_getvar "$__var_height" [ "$__var_width" ] && f_quietly f_getvar "$__var_width" fi return $__retval # success if no debug warnings were printed } # f_dialog_menu_constrain $var_height $var_width $var_rows "$prompt" \ # [$min_height [$min_width [$min_rows]]] # # Modify $var_height to be no-less-than $min_height (if given; zero otherwise) # and no-greater-than terminal height (or screen height if $USE_XDIALOG is # set). # # Also modify $var_width to be no-less-than $XDIALOG_MIN_WIDTH (or # $XDIALOG_MIN_WIDTH if $_USE_XDIALOG is set) and no-greater-than terminal # or screen width. The use of $[X]DIALOG_MIN_WIDTH can be overridden by # passing $min_width. # # Last, modify $var_rows to be no-less-than $min_rows (if specified; zero # otherwise) and no-greater-than (max_height - 8) where max_height is the # terminal height (or screen height if $USE_XDIALOG is set). If $prompt is NULL # or missing, dialog(1) allows $var_rows to be (max_height - 7), maximizing the # number of visible rows. # # Return status is success unless one of the passed arguments is invalid # or all of the $var_* arguments are either NULL or missing. # f_dialog_menu_constrain() { local __var_height="$1" __var_width="$2" __var_rows="$3" __prompt="$4" local __min_height="$5" __min_width="$6" __min_rows="$7" # Return failure unless at least one var_* argument is passed [ "$__var_height" -o "$__var_width" -o "$__var_rows" ] || return $FAILURE # # Print debug warnings if any given (non-NULL) argument are invalid # NOTE: Don't change the name of $__{var,min,}{height,width,rows} # local __height_menu_constrain __width_menu_constrain local __rows_menu_constrain local __arg __cp __fname=f_dialog_menu_constrain for __arg in height width rows; do debug= f_getvar __var_$__arg __cp [ "$__cp" ] || continue if ! debug= f_getvar "$__cp" __${__arg}_menu_constrain; then f_dprintf "%s: var_%s variable \`%s' not set" \ $__fname $__arg "$__cp" __retval=$FAILURE elif ! eval f_isinteger \$__${__arg}_menu_constrain; then f_dprintf "%s: var_%s variable value not a number" \ $__fname $__arg __retval=$FAILURE fi done for __arg in height width rows; do debug= f_getvar __min_$__arg __cp [ "$__cp" ] || continue f_isinteger "$__cp" && continue f_dprintf "%s: min_%s value not a number" $__fname $__arg __retval=$FAILURE setvar __min_$__arg "" done # Obtain maximum height and width values # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __max_height_menu_constrain __max_width_menu_constrain f_dialog_max_size \ __max_height_menu_constrain __max_width_menu_constrain # Adjust height if desired if [ "$__var_height" ]; then if [ $__height_menu_constrain -lt ${__min_height:-0} ]; then setvar "$__var_height" $__min_height elif [ $__height_menu_constrain -gt \ $__max_height_menu_constrain ] then setvar "$__var_height" $__max_height_menu_constrain fi fi # Adjust width if desired if [ "$__var_width" ]; then if [ "$USE_XDIALOG" ]; then : ${__min_width:=${XDIALOG_MIN_WIDTH:-35}} else : ${__min_width:=${DIALOG_MIN_WIDTH:-24}} fi if [ $__width_menu_constrain -lt $__min_width ]; then setvar "$__var_width" $__min_width elif [ $__width_menu_constrain -gt \ $__max_width_menu_constrain ] then setvar "$__var_width" $__max_width_menu_constrain fi fi # Adjust rows if desired if [ "$__var_rows" ]; then if [ "$USE_XDIALOG" ]; then : ${__min_rows:=1} else : ${__min_rows:=0} fi local __max_rows_menu_constrain=$(( $__max_height_menu_constrain - 7 )) # If prompt_len is zero (no prompt), bump the max-rows by 1 # Default assumption is (if no argument) that there's no prompt [ ${__prompt_len:-0} -gt 0 ] || __max_rows_menu_constrain=$(( $__max_rows_menu_constrain + 1 )) if [ $__rows_menu_constrain -lt $__min_rows ]; then setvar "$__var_rows" $__min_rows elif [ $__rows_menu_constrain -gt $__max_rows_menu_constrain ] then setvar "$__var_rows" $__max_rows_menu_constrain fi fi if [ "$debug" ]; then # Print final constrained values to debugging [ "$__var_height" ] && f_quietly f_getvar "$__var_height" [ "$__var_width" ] && f_quietly f_getvar "$__var_width" [ "$__var_rows" ] && f_quietly f_getvar "$__var_rows" fi return $__retval # success if no debug warnings were printed } # f_dialog_infobox_size [-n] $var_height $var_width \ # $title $backtitle $prompt [$hline] # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--infobox' boxes sensibly. # # This function helps solve this issue by taking two sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height and width. The second set of arguments are the # title, backtitle, prompt, and [optionally] hline. The optimal height and # width for the described widget (not exceeding the actual terminal height or # width) is stored in $var_height and $var_width (respectively). # # If the first argument is `-n', the calculated sizes ($var_height and # $var_width) are not constrained to minimum/maximum values. # # Newline character sequences (``\n'') in $prompt are expanded as-is done by # dialog(1). # f_dialog_infobox_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" local __title="$3" __btitle="$4" __prompt="$5" __hline="$6" # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" ] || return $FAILURE # Default height/width of zero for auto-sizing local __height=0 __width=0 __n # Adjust height if desired if [ "$__var_height" ]; then # # Set height based on number of rows in prompt # __n=$( echo -n "$__prompt" | f_number_of_lines ) __n=$(( $__n + 2 )) [ $__n -gt $__height ] && __height=$__n # # For Xdialog(1) bump height if backtitle is enabled (displayed # in the X11 window with a separator line between the backtitle # and msg text). # if [ "$USE_XDIALOG" -a "$__btitle" ]; then __n=$( echo "$__btitle" | f_number_of_lines ) __height=$(( $__height + $__n + 2 )) fi setvar "$__var_height" $__height fi # Adjust width if desired if [ "$__var_width" ]; then # # Bump width for long titles # __n=$(( ${#__title} + 4 )) [ $__n -gt $__width ] && __width=$__n # # If using Xdialog(1), bump width for long backtitles (which # appear within the window). # if [ "$USE_XDIALOG" ]; then __n=$(( ${#__btitle} + 4 )) [ $__n -gt $__width ] && __width=$__n fi # # Bump width for long prompts # __n=$( echo "$__prompt" | f_longest_line_length ) __n=$(( $__n + 4 )) # add width for border [ $__n -gt $__width ] && __width=$__n # # Bump width for long hlines. Xdialog(1) supports `--hline' but # it's currently not used (so don't do anything here if using # Xdialog(1)). # if [ ! "$USE_XDIALOG" ]; then __n=$(( ${#__hline} + 10 )) [ $__n -gt $__width ] && __width=$__n fi # Bump width by 16.6% if using Xdialog(1) [ "$USE_XDIALOG" ] && __width=$(( $__width + $__width / 6 )) setvar "$__var_width" $__width fi # Constrain values to sensible minimums/maximums unless `-n' was passed # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_size_constrain "$__var_height" "$__var_width" } # f_dialog_buttonbox_size [-n] $var_height $var_width \ # $title $backtitle $prompt [$hline] # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--msgbox' and `--yesno' boxes sensibly. # # This function helps solve this issue by taking two sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height and width. The second set of arguments are the # title, backtitle, prompt, and [optionally] hline. The optimal height and # width for the described widget (not exceeding the actual terminal height or # width) is stored in $var_height and $var_width (respectively). # # If the first argument is `-n', the calculated sizes ($var_height and # $var_width) are not constrained to minimum/maximum values. # # Newline character sequences (``\n'') in $prompt are expanded as-is done by # dialog(1). # f_dialog_buttonbox_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" local __title="$3" __btitle="$4" __prompt="$5" __hline="$6" # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" ] || return $FAILURE # Calculate height/width of infobox (adjusted/constrained below) # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_bbox_size __width_bbox_size f_dialog_infobox_size -n \ "${__var_height:+__height_bbox_size}" \ "${__var_width:+__width_bbox_size}" \ "$__title" "$__btitle" "$__prompt" "$__hline" # Adjust height if desired if [ "$__var_height" ]; then # Add height to accomodate the buttons __height_bbox_size=$(( $__height_bbox_size + 2 )) # Adjust for clipping with Xdialog(1) on Linux/GTK2 [ "$USE_XDIALOG" ] && __height_bbox_size=$(( $__height_bbox_size + 3 )) setvar "$__var_height" $__height_bbox_size fi # No adjustemnts to width, just pass-thru the infobox width if [ "$__var_width" ]; then setvar "$__var_width" $__width_bbox_size fi # Constrain values to sensible minimums/maximums unless `-n' was passed # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_size_constrain "$__var_height" "$__var_width" } # f_dialog_inputbox_size [-n] $var_height $var_width \ # $title $backtitle $prompt $init [$hline] # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--inputbox' boxes sensibly. # # This function helps solve this issue by taking two sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height and width. The second set of arguments are the # title, backtitle, prompt, and [optionally] hline. The optimal height and # width for the described widget (not exceeding the actual terminal height or # width) is stored in $var_height and $var_width (respectively). # # If the first argument is `-n', the calculated sizes ($var_height and # $var_width) are not constrained to minimum/maximum values. # # Newline character sequences (``\n'') in $prompt are expanded as-is done by # dialog(1). # f_dialog_inputbox_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" local __title="$3" __btitle="$4" __prompt="$5" __init="$6" __hline="$7" # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" ] || return $FAILURE # Calculate height/width of buttonbox (adjusted/constrained below) # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_ibox_size __width_ibox_size f_dialog_buttonbox_size -n \ "${__var_height:+__height_ibox_size}" \ "${__var_width:+__width_ibox_size}" \ "$__title" "$__btitle" "$__prompt" "$__hline" # Adjust height if desired if [ "$__var_height" ]; then # Add height for input box (not needed for Xdialog(1)) [ ! "$USE_XDIALOG" ] && __height_ibox_size=$(( $__height_ibox_size + 3 )) setvar "$__var_height" $__height_ibox_size fi # Adjust width if desired if [ "$__var_width" ]; then # Bump width for initial text (something neither dialog(1) nor # Xdialog(1) do, but worth it!; add 16.6% if using Xdialog(1)) local __n=$(( ${#__init} + 7 )) [ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) [ $__n -gt $__width_ibox_size ] && __width_ibox_size=$__n setvar "$__var_width" $__width_ibox_size fi # Constrain values to sensible minimums/maximums unless `-n' was passed # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_size_constrain "$__var_height" "$__var_width" } # f_xdialog_2inputsbox_size [-n] $var_height $var_width \ # $title $backtitle $prompt \ # $label1 $init1 $label2 $init2 # # Xdialog(1) does not perform auto-sizing of the width and height of # `--2inputsbox' boxes sensibly. # # This function helps solve this issue by taking two sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height and width. The second set of arguments are the # title, backtitle, prompt, label for the first field, initial text for said # field, label for the second field, and initial text for said field. The # optimal height and width for the described widget (not exceeding the actual # terminal height or width) is stored in $var_height and $var_width # (respectively). # # If the first argument is `-n', the calculated sizes ($var_height and # $var_width) are not constrained to minimum/maximum values. # # Newline character sequences (``\n'') in $prompt are expanded as-is done by # Xdialog(1). # f_xdialog_2inputsbox_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" local __title="$3" __btitle="$4" __prompt="$5" local __label1="$6" __init1="$7" __label2="$8" __init2="$9" # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" ] || return $FAILURE # Calculate height/width of inputbox (adjusted/constrained below) # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_2ibox_size __width_2ibox_size f_dialog_inputbox_size -n \ "${__var_height:+__height_2ibox_size}" \ "${__var_width:+__width_2ibox_size}" \ "$__title" "$__btitle" "$__prompt" "$__hline" "$__init1" # Adjust height if desired if [ "$__var_height" ]; then # Add height for 1st label, 2nd label, and 2nd input box __height_2ibox_size=$(( $__height_2ibox_size + 2 + 2 + 2 )) setvar "$__var_height" $__height_2ibox_size fi # Adjust width if desired if [ "$__var_width" ]; then local __n # Bump width for first label text (+16.6% since Xdialog(1)) __n=$(( ${#__label1} + 7 )) __n=$(( $__n + $__n / 6 )) [ $__n -gt $__width_2ibox_size ] && __width_2ibox_size=$__n # Bump width for second label text (+16.6% since Xdialog(1)) __n=$(( ${#__label2} + 7 )) __n=$(( $__n + $__n / 6 )) [ $__n -gt $__width_2ibox_size ] && __width_2ibox_size=$__n # Bump width for 2nd initial text (something neither dialog(1) # nor Xdialog(1) do, but worth it!; +16.6% since Xdialog(1)) __n=$(( ${#__init2} + 7 )) __n=$(( $__n + $__n / 6 )) [ $__n -gt $__width_2ibox_size ] && __width_2ibox_size=$__n setvar "$__var_width" $__width_2ibox_size fi # Constrain values to sensible minimums/maximums unless `-n' was passed # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_size_constrain "$__var_height" "$__var_width" } # f_dialog_menu_size [-n] $var_height $var_width $var_rows \ # $title $backtitle $prompt $hline \ # $tag1 $item1 $tag2 $item2 ... # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--menu' boxes sensibly. # # This function helps solve this issue by taking three sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height, width, and rows. The second set of arguments # are the title, backtitle, prompt, and hline. The [optional] third set of # arguments are the menu list itself (comprised of tag/item couplets). The # optimal height, width, and rows for the described widget (not exceeding the # actual terminal height or width) is stored in $var_height, $var_width, and # $var_rows (respectively). # # If the first argument is `-n', the calculated sizes ($var_height, $var_width, # and $var_rows) are not constrained to minimum/maximum values. # f_dialog_menu_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" __var_rows="$3" local __title="$4" __btitle="$5" __prompt="$6" __hline="$7" shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" -o "$__var_rows" ] || return $FAILURE # Calculate height/width of infobox (adjusted/constrained below) # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_menu_size __width_menu_size f_dialog_infobox_size -n \ "${__var_height:+__height_menu_size}" \ "${__var_width:+__width_menu_size}" \ "$__title" "$__btitle" "$__prompt" "$__hline" # # Always process the menu-item arguments to get the longest tag-length, # longest item-length (both used to bump the width), and the number of # rows (used to bump the height). # local __longest_tag=0 __longest_item=0 __rows=0 while [ $# -ge 2 ]; do local __tag="$1" __item="$2" shift 2 # tag/item [ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag} [ ${#__item} -gt $__longest_item ] && __longest_item=${#__item} __rows=$(( $__rows + 1 )) done # Adjust rows early (for up-comning height calculation) if [ "$__var_height" -o "$__var_rows" ]; then # Add a row for visual aid if using Xdialog(1) [ "$USE_XDIALOG" ] && __rows=$(( $__rows + 1 )) fi # Adjust height if desired if [ "$__var_height" ]; then # Add rows to height if [ "$USE_XDIALOG" ]; then __height_menu_size=$(( $__height_menu_size + $__rows + 7 )) else __height_menu_size=$(( $__height_menu_size + $__rows + 4 )) fi setvar "$__var_height" $__height_menu_size fi # Adjust width if desired if [ "$__var_width" ]; then # The sum total between the longest tag-length and the # longest item-length should be used to bump menu width local __n=$(( $__longest_tag + $__longest_item + 10 )) [ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6% [ $__n -gt $__width_menu_size ] && __width_menu_size=$__n setvar "$__var_width" $__width_menu_size fi # Store adjusted rows if desired [ "$__var_rows" ] && setvar "$__var_rows" $__rows # Constrain height, width, and rows to sensible minimum/maximum values # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_menu_constrain \ "$__var_height" "$__var_width" "$__var_rows" "$__prompt" } # f_dialog_menu_with_help_size [-n] $var_height $var_width $var_rows \ # $title $backtitle $prompt $hline \ # $tag1 $item1 $help1 $tag2 $item2 $help2 ... # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--menu' boxes sensibly. # # This function helps solve this issue by taking three sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height, width, and rows. The second set of arguments # are the title, backtitle, prompt, and hline. The [optional] third set of # arguments are the menu list itself (comprised of tag/item/help triplets). The # optimal height, width, and rows for the described widget (not exceeding the # actual terminal height or width) is stored in $var_height, $var_width, and # $var_rows (respectively). # # If the first argument is `-n', the calculated sizes ($var_height, $var_width, # and $var_rows) are not constrained to minimum/maximum values. # f_dialog_menu_with_help_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" __var_rows="$3" local __title="$4" __btitle="$5" __prompt="$6" __hline="$7" shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" -o "$__var_rows" ] || return $FAILURE # Calculate height/width of infobox (adjusted/constrained below) # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_menu_with_help_size __width_menu_with_help_size f_dialog_infobox_size -n \ "${__var_height:+__height_menu_with_help_size}" \ "${__var_width:+__width_menu_with_help_size}" \ "$__title" "$__btitle" "$__prompt" "$__hline" # # Always process the menu-item arguments to get the longest tag-length, # longest item-length, longest help-length (help-length only considered # if using Xdialog(1), as it places the help string in the widget) -- # all used to bump the width -- and the number of rows (used to bump # the height). # local __longest_tag=0 __longest_item=0 __longest_help=0 __rows=0 while [ $# -ge 3 ]; do local __tag="$1" __item="$2" __help="$3" shift 3 # tag/item/help [ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag} [ ${#__item} -gt $__longest_item ] && __longest_item=${#__item} [ ${#__help} -gt $__longest_help ] && __longest_help=${#__help} __rows=$(( $__rows + 1 )) done # Adjust rows early (for up-coming height calculation) if [ "$__var_height" -o "$__var_rows" ]; then # Add a row for visual aid if using Xdialog(1) [ "$USE_XDIALOG" ] && __rows=$(( $__rows + 1 )) fi # Adjust height if desired if [ "$__var_height" ]; then # Add rows to height if [ "$USE_XDIALOG" ]; then __height_menu_with_help_size=$(( $__height_menu_with_help_size + $__rows + 8 )) else __height_menu_with_help_size=$(( $__height_menu_with_help_size + $__rows + 4 )) fi setvar "$__var_height" $__height_menu_with_help_size fi # Adjust width if desired if [ "$__var_width" ]; then # The sum total between the longest tag-length and the # longest item-length should be used to bump menu width local __n=$(( $__longest_tag + $__longest_item + 10 )) [ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6% [ $__n -gt $__width_menu_with_help_size ] && __width_menu_with_help_size=$__n # Update width for help text if using Xdialog(1) if [ "$USE_XDIALOG" ]; then __n=$(( $__longest_help + 10 )) __n=$(( $__n + $__n / 6 )) # plus 16.6% [ $__n -gt $__width_menu_with_help_size ] && __width_menu_with_help_size=$__n fi setvar "$__var_width" $__width_menu_with_help_size fi # Store adjusted rows if desired [ "$__var_rows" ] && setvar "$__var_rows" $__rows # Constrain height, width, and rows to sensible minimum/maximum values # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_menu_constrain \ "$__var_height" "$__var_width" "$__var_rows" "$__prompt" } # f_dialog_radiolist_size [-n] $var_height $var_width $var_rows \ # $title $backtitle $prompt $hline \ # $tag1 $item1 $status1 $tag2 $item2 $status2 ... # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--radiolist' boxes sensibly. # # This function helps solve this issue by taking three sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height, width, and rows. The second set of arguments # are the title, backtitle, prompt, and hline. The [optional] third set of # arguments are the radio list itself (comprised of tag/item/status triplets). # The optimal height, width, and rows for the described widget (not exceeding # the actual terminal height or width) is stored in $var_height, $var_width, # and $var_rows (respectively). # # If the first argument is `-n', the calculated sizes ($var_height, $var_width, # and $var_rows) are not constrained to minimum/maximum values. # f_dialog_radiolist_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" __var_rows="$3" local __title="$4" __btitle="$5" __prompt="$6" __hline="$7" shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" -o "$__var_rows" ] || return $FAILURE # Calculate height/width of infobox (adjusted/constrained below) # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_rlist_size __width_rlist_size f_dialog_infobox_size -n \ "${__var_height:+__height_rlist_size}" \ "${__var_width:+__width_rlist_size}" \ "$__title" "$__btitle" "$__prompt" "$__hline" # # Always process the menu-item arguments to get the longest tag-length, # longest item-length (both used to bump the width), and the number of # rows (used to bump the height). # local __longest_tag=0 __longest_item=0 __rows_rlist_size=0 while [ $# -ge 3 ]; do local __tag="$1" __item="$2" shift 3 # tag/item/status [ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag} [ ${#__item} -gt $__longest_item ] && __longest_item=${#__item} __rows_rlist_size=$(( $__rows_rlist_size + 1 )) done # Adjust rows early (for up-coming height calculation) if [ "$__var_height" -o "$__var_rows" ]; then # Add a row for visual aid if using Xdialog(1) [ "$USE_XDIALOG" ] && __rows_rlist_size=$(( $__rows_rlist_size + 1 )) fi # Adjust height if desired if [ "$__var_height" ]; then # Add rows to height if [ "$USE_XDIALOG" ]; then __height_rlist_size=$(( $__height_rlist_size + $__rows_rlist_size + 7 )) else __height_rlist_size=$(( $__height_rlist_size + $__rows_rlist_size + 4 )) fi setvar "$__var_height" $__height_rlist_size fi # Adjust width if desired if [ "$__var_width" ]; then # Sum total between longest tag-length, longest item-length, # and radio-button width should be used to bump menu width local __n=$(( $__longest_tag + $__longest_item + 13 )) [ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6% [ $__n -gt $__width_rlist_size ] && __width_rlist_size=$__n setvar "$__var_width" $__width_rlist_size fi # Store adjusted rows if desired [ "$__var_rows" ] && setvar "$__var_rows" $__rows_rlist_size # Constrain height, width, and rows to sensible minimum/maximum values # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_menu_constrain \ "$__var_height" "$__var_width" "$__var_rows" "$__prompt" } # f_dialog_checklist_size [-n] $var_height $var_width $var_rows \ # $title $backtitle $prompt $hline \ # $tag1 $item1 $status1 $tag2 $item2 $status2 ... # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--checklist' boxes sensibly. # # This function helps solve this issue by taking three sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height, width, and rows. The second set of arguments # are the title, backtitle, prompt, and hline. The [optional] third set of # arguments are the check list itself (comprised of tag/item/status triplets). # The optimal height, width, and rows for the described widget (not exceeding # the actual terminal height or width) is stored in $var_height, $var_width, # and $var_rows (respectively). # # If the first argument is `-n', the calculated sizes ($var_height, $var_width, # and $var_rows) are not constrained to minimum/maximum values. # f_dialog_checklist_size() { f_dialog_radiolist_size "$@" } # f_dialog_radiolist_with_help_size [-n] $var_height $var_width $var_rows \ # $title $backtitle $prompt $hline \ # $tag1 $item1 $status1 $help1 \ # $tag2 $item2 $status2 $help2 ... # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--radiolist' boxes sensibly. # # This function helps solve this issue by taking three sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height, width, and rows. The second set of arguments # are the title, backtitle, prompt, and hline. The [optional] third set of # arguments are the radio list itself (comprised of tag/item/status/help # quadruplets). The optimal height, width, and rows for the described widget # (not exceeding the actual terminal height or width) is stored in $var_height, # $var_width, and $var_rows (respectively). # # If the first argument is `-n', the calculated sizes ($var_height, $var_width, # and $var_rows) are not constrained to minimum/maximum values. # f_dialog_radiolist_with_help_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" __var_rows="$3" local __title="$4" __btitle="$5" __prompt="$6" __hline="$7" shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" -o "$__var_rows" ] || return $FAILURE # Calculate height/width of infobox (adjusted/constrained below) # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_rlist_with_help_size __width_rlist_with_help_size f_dialog_infobox_size -n \ "${__var_height:+__height_rlist_with_help_size}" \ "${__var_width:+__width_rlist_with_help_size}" \ "$__title" "$__btitle" "$__prompt" "$__hline" # # Always process the menu-item arguments to get the longest tag-length, # longest item-length, longest help-length (help-length only considered # if using Xdialog(1), as it places the help string in the widget) -- # all used to bump the width -- and the number of rows (used to bump # the height). # local __longest_tag=0 __longest_item=0 __longest_help=0 local __rows_rlist_with_help_size=0 while [ $# -ge 4 ]; do local __tag="$1" __item="$2" __status="$3" __help="$4" shift 4 # tag/item/status/help [ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag} [ ${#__item} -gt $__longest_item ] && __longest_item=${#__item} [ ${#__help} -gt $__longest_help ] && __longest_help=${#__help} __rows_rlist_with_help_size=$(( $__rows_rlist_with_help_size + 1 )) done # Adjust rows early (for up-coming height calculation) if [ "$__var_height" -o "$__var_rows" ]; then # Add a row for visual aid if using Xdialog(1) [ "$USE_XDIALOG" ] && __rows_rlist_with_help_size=$(( $__rows_rlist_with_help_size + 1 )) fi # Adjust height if desired if [ "$__var_height" ]; then # Add rows to height if [ "$USE_XDIALOG" ]; then __height_rlist_with_help_size=$(( $__height_rlist_with_help_size + $__rows_rlist_with_help_size + 7 )) else __height_rlist_with_help_size=$(( $__height_rlist_with_help_size + $__rows_rlist_with_help_size + 4 )) fi setvar "$__var_height" $__height fi # Adjust width if desired if [ "$__var_width" ]; then # Sum total between longest tag-length, longest item-length, # and radio-button width should be used to bump menu width local __n=$(( $__longest_tag + $__longest_item + 13 )) [ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6% [ $__n -gt $__width_rlist_with_help_size ] && __width_rlist_with_help_size=$__n # Update width for help text if using Xdialog(1) if [ "$USE_XDIALOG" ]; then __n=$(( $__longest_help + 10 )) __n=$(( $__n + $__n / 6 )) # plus 16.6% [ $__n -gt $__width_rlist_with_help_size ] && __width_rlist_with_help_size=$__n fi setvar "$__var_width" $__width_rlist_with_help_size fi # Store adjusted rows if desired [ "$__var_rows" ] && setvar "$__var_rows" $__rows_rlist_with_help_size # Constrain height, width, and rows to sensible minimum/maximum values # Return success if no-constrain, else return status from constrain [ ! "$__constrain" ] || f_dialog_menu_constrain \ "$__var_height" "$__var_width" "$__var_rows" "$__prompt" } # f_dialog_checklist_with_help_size [-n] $var_height $var_width $var_rows \ # $title $backtitle $prompt $hline \ # $tag1 $item1 $status1 $help1 \ # $tag2 $item2 $status2 $help2 ... # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--checklist' boxes sensibly. # # This function helps solve this issue by taking three sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height, width, and rows. The second set of arguments # are the title, backtitle, prompt, and hline. The [optional] third set of # arguments are the check list itself (comprised of tag/item/status/help # quadruplets). The optimal height, width, and rows for the described widget # (not exceeding the actual terminal height or width) is stored in $var_height, # $var_width, and $var_rows (respectively). # # If the first argument is `-n', the calculated sizes ($var_height, $var_width, # and $var_rows) are not constrained to minimum/maximum values. # f_dialog_checklist_with_help_size() { f_dialog_radiolist_with_help_size "$@" } # f_dialog_calendar_size [-n] $var_height $var_width \ # $title $backtitle $prompt [$hline] # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--calendar' boxes sensibly. # # This function helps solve this issue by taking two sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height and width. The second set of arguments are the # title, backtitle, prompt, and [optionally] hline. The optimal height and # width for the described widget (not exceeding the actual terminal height or # width) is stored in $var_height and $var_width (respectively). # # If the first argument is `-n', the calculated sizes ($var_height and # $var_width) are not constrained to minimum/maximum values. # # Newline character sequences (``\n'') in $prompt are expanded as-is done by # dialog(1). # f_dialog_calendar_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" local __title="$3" __btitle="$4" __prompt="$5" __hline="$6" # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" ] || return $FAILURE # # Obtain/Adjust minimum and maximum thresholds # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). # local __max_height_cal_size __max_width_cal_size f_dialog_max_size __max_height_cal_size __max_width_cal_size __max_width_cal_size=$(( $__max_width_cal_size - 2 )) # the calendar box will refuse to display if too wide local __min_width if [ "$USE_XDIALOG" ]; then __min_width=55 else __min_width=40 __max_height_cal_size=$(( $__max_height_cal_size - $DIALOG_CALENDAR_HEIGHT )) # When using dialog(1), we can't predict whether the user has # disabled shadow's in their `$HOME/.dialogrc' file, so we'll # subtract one for the potential shadow around the widget __max_height_cal_size=$(( $__max_height_cal_size - 1 )) fi # Calculate height if desired if [ "$__var_height" ]; then local __height __height=$( echo "$__prompt" | f_number_of_lines ) if [ "$USE_XDIALOG" ]; then # Add height to accomodate for embedded calendar widget __height=$(( $__height + $DIALOG_CALENDAR_HEIGHT - 1 )) # Also, bump height if backtitle is enabled if [ "$__btitle" ]; then local __n __n=$( echo "$__btitle" | f_number_of_lines ) __height=$(( $__height + $__n + 2 )) fi else [ "$__prompt" ] && __height=$(( $__height + 1 )) fi # Enforce maximum height, unless `-n' was passed [ "$__constrain" -a $__height -gt $__max_height_cal_size ] && __height=$__max_height_cal_size setvar "$__var_height" $__height fi # Calculate width if desired if [ "$__var_width" ]; then # NOTE: Function name appended to prevent __var_{height,width} # values from becoming local (and thus preventing setvar # from working). local __width_cal_size f_dialog_infobox_size -n "" __width_cal_size \ "$__title" "$__btitle" "$__prompt" "$__hline" # Enforce minimum/maximum width, unless `-n' was passed if [ "$__constrain" ]; then if [ $__width_cal_size -lt $__min_width ]; then __width_cal_size=$__min_width elif [ $__width_cal_size -gt $__max_width_cal_size ] then __width_cal_size=$__max_width_size fi fi setvar "$__var_width" $__width_cal_size fi return $SUCCESS } # f_dialog_timebox_size [-n] $var_height $var_width \ # $title $backtitle $prompt [$hline] # # Not all versions of dialog(1) perform auto-sizing of the width and height of # `--timebox' boxes sensibly. # # This function helps solve this issue by taking two sets of sequential # arguments. The first set of arguments are the variable names to use when # storing the calculated height and width. The second set of arguments are the # title, backtitle, prompt, and [optionally] hline. The optional height and # width for the described widget (not exceeding the actual terminal height or # width) is stored in $var_height and $var_width (respectively). # # If the first argument is `-n', the calculated sizes ($var_height and # $var_width) are not constrained to minimum/maximum values. # # Newline character sequences (``\n'') in $prompt are expanded as-is done by # dialog(1). # f_dialog_timebox_size() { local __constrain=1 [ "$1" = "-n" ] && __constrain= && shift 1 # -n local __var_height="$1" __var_width="$2" local __title="$3" __btitle="$4" __prompt="$5" __hline="$6" # Return unless at least one size aspect has been requested [ "$__var_height" -o "$__var_width" ] || return $FAILURE # # Obtain/Adjust minimum and maximum thresholds # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). # local __max_height_tbox_size __max_width_tbox_size f_dialog_max_size __max_height_tbox_size __max_width_tbox_size __max_width_tbox_size=$(( $__max_width_tbox_size - 2 )) # the timebox widget refuses to display if too wide local __min_width if [ "$USE_XDIALOG" ]; then __min_width=40 else __min_width=20 __max_height_tbox_size=$(( \ $__max_height_tbox_size - $DIALOG_TIMEBOX_HEIGHT )) # When using dialog(1), we can't predict whether the user has # disabled shadow's in their `$HOME/.dialogrc' file, so we'll # subtract one for the potential shadow around the widget __max_height_tbox_size=$(( $__max_height_tbox_size - 1 )) fi # Calculate height if desired if [ "$__var_height" -a "$USE_XDIALOG" ]; then # When using Xdialog(1), the height seems to have # no effect. All values provide the same results. setvar "$__var_height" 0 # autosize elif [ "$__var_height" ]; then local __height __height=$( echo "$__prompt" | f_number_of_lines ) __height=$(( $__height ${__prompt:++1} + 1 )) # Enforce maximum height, unless `-n' was passed [ "$__constrain" -a $__height -gt $__max_height_tbox_size ] && __height=$__max_height_tbox_size setvar "$__var_height" $__height fi # Calculate width if desired if [ "$__var_width" ]; then # NOTE: Function name appended to prevent __var_{height,width} # values from becoming local (and thus preventing setvar # from working). local __width_tbox_size f_dialog_infobox_size -n "" __width_tbox_size \ "$__title" "$__btitle" "$__prompt" "$__hline" # Enforce the minimum width for displaying the timebox if [ "$__constrain" ]; then if [ $__width_tbox_size -lt $__min_width ]; then __width_tbox_size=$__min_width elif [ $__width_tbox_size -ge $__max_width_tbox_size ] then __width_tbox_size=$__max_width_tbox_size fi fi setvar "$__var_width" $__width_tbox_size fi return $SUCCESS } ############################################################ CLEAR FUNCTIONS # f_dialog_clear # # Clears any/all previous dialog(1) displays. # f_dialog_clear() { $DIALOG --clear } ############################################################ INFO FUNCTIONS # f_dialog_info $info_text ... # # Throw up a dialog(1) infobox. The infobox remains until another dialog is # displayed or `dialog --clear' (or f_dialog_clear) is called. # f_dialog_info() { local info_text="$*" height width f_dialog_infobox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$info_text" $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ ${USE_XDIALOG:+--ignore-eof} \ ${USE_XDIALOG:+--no-buttons} \ --infobox "$info_text" $height $width } # f_xdialog_info $info_text ... # # Throw up an Xdialog(1) infobox and do not dismiss it until stdin produces # EOF. This implies that you must execute this either as an rvalue to a pipe, # lvalue to indirection or in a sub-shell that provides data on stdin. # # To open an Xdialog(1) infobox that does not disappear until expeclitly dis- # missed, use the following: # # f_xdialog_info "$info_text" < /dev/tty & # pid=$! # # Perform some lengthy actions # kill $pid # # NB: Check $USE_XDIALOG if you need to support both dialog(1) and Xdialog(1). # f_xdialog_info() { local info_text="$*" height width f_dialog_infobox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$info_text" exec $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --no-close --no-buttons \ --infobox "$info_text" $height $width \ -1 # timeout of -1 means abort when EOF on stdin } ############################################################ PAUSE FUNCTIONS # f_dialog_pause $msg_text $duration [$hline] # # Display a message in a widget with a progress bar that runs backward for # $duration seconds. # f_dialog_pause() { local pause_text="$1" duration="$2" hline="$3" height width f_isinteger "$duration" || return $FAILURE f_dialog_buttonbox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$pause_text" "$hline" if [ "$USE_XDIALOG" ]; then $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --ok-label "$msg_skip" \ --cancel-label "$msg_cancel" \ ${noCancel:+--no-cancel} \ --timeout "$duration" \ --yesno "$pause_text" \ $height $width else [ $duration -gt 0 ] && duration=$(( $duration - 1 )) - [ $duration -gt 1 ] && duration=$(( $duration - 1 )) height=$(( $height + 3 )) # Add height for progress bar $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --hline "$hline" \ --ok-label "$msg_skip" \ --cancel-label "$msg_cancel" \ ${noCancel:+--no-cancel} \ --pause "$pause_text" \ $height $width "$duration" fi } # f_dialog_pause_no_cancel $msg_text $duration [$hline] # # Display a message in a widget with a progress bar that runs backward for # $duration seconds. No cancel button is provided. Always returns success. # f_dialog_pause_no_cancel() { noCancel=1 f_dialog_pause "$@" return $SUCCESS } ############################################################ MSGBOX FUNCTIONS # f_dialog_msgbox $msg_text [$hline] # # Throw up a dialog(1) msgbox. The msgbox remains until the user presses ENTER # or ESC, acknowledging the modal dialog. # # If the user presses ENTER, the exit status is zero (success), otherwise if # the user presses ESC the exit status is 255. # f_dialog_msgbox() { local msg_text="$1" hline="$2" height width f_dialog_buttonbox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$msg_text" "$hline" $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --hline "$hline" \ --ok-label "$msg_ok" \ --msgbox "$msg_text" $height $width } ############################################################ TEXTBOX FUNCTIONS # f_dialog_textbox $file # # Display the contents of $file (or an error if $file does not exist, etc.) in # a dialog(1) textbox (which has a scrollable region for the text). The textbox # remains until the user presses ENTER or ESC, acknowledging the modal dialog. # # If the user presses ENTER, the exit status is zero (success), otherwise if # the user presses ESC the exit status is 255. # f_dialog_textbox() { local file="$1" local contents height width retval contents=$( cat "$file" 2>&1 ) retval=$? f_dialog_buttonbox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$contents" if [ $retval -eq $SUCCESS ]; then $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --exit-label "$msg_ok" \ --no-cancel \ --textbox "$file" $height $width else $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --ok-label "$msg_ok" \ --msgbox "$contents" $height $width fi } ############################################################ YESNO FUNCTIONS # f_dialog_yesno $msg_text [$hline] # # Display a dialog(1) Yes/No prompt to allow the user to make some decision. # The yesno prompt remains until the user presses ENTER or ESC, acknowledging # the modal dialog. # # If the user chooses YES the exit status is zero, or chooses NO the exit # status is one, or presses ESC the exit status is 255. # f_dialog_yesno() { local msg_text="$1" height width local hline="${2-$hline_arrows_tab_enter}" f_interactive || return 0 # If non-interactive, return YES all the time f_dialog_buttonbox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$msg_text" "$hline" if [ "$USE_XDIALOG" ]; then $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --hline "$hline" \ --ok-label "$msg_yes" \ --cancel-label "$msg_no" \ --yesno "$msg_text" $height $width else $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --hline "$hline" \ --yes-label "$msg_yes" \ --no-label "$msg_no" \ --yesno "$msg_text" $height $width fi } # f_dialog_noyes $msg_text [$hline] # # Display a dialog(1) No/Yes prompt to allow the user to make some decision. # The noyes prompt remains until the user presses ENTER or ESC, acknowledging # the modal dialog. # # If the user chooses YES the exit status is zero, or chooses NO the exit # status is one, or presses ESC the exit status is 255. # # NOTE: This is just like the f_dialog_yesno function except "No" is default. # f_dialog_noyes() { local msg_text="$1" height width local hline="${2-$hline_arrows_tab_enter}" f_interactive || return 1 # If non-interactive, return NO all the time f_dialog_buttonbox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$msg_text" "$hline" if [ "$USE_XDIALOG" ]; then $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --hline "$hline" \ --default-no \ --ok-label "$msg_yes" \ --cancel-label "$msg_no" \ --yesno "$msg_text" $height $width else $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --hline "$hline" \ --defaultno \ --yes-label "$msg_yes" \ --no-label "$msg_no" \ --yesno "$msg_text" $height $width fi } ############################################################ INPUT FUNCTIONS # f_dialog_inputstr_store [-s] $text # # Store some text from a dialog(1) inputbox to be retrieved later by # f_dialog_inputstr_fetch(). If the first argument is `-s', the text is # sanitized before being stored. # f_dialog_inputstr_store() { local sanitize= [ "$1" = "-s" ] && sanitize=1 && shift 1 # -s local text="$1" # Sanitize the line before storing it if desired [ "$sanitize" ] && f_dialog_line_sanitize text setvar DIALOG_INPUTBOX_$$ "$text" } # f_dialog_inputstr_fetch [$var_to_set] # # Obtain the inputstr entered by the user from the most recently displayed # dialog(1) inputbox (previously stored with f_dialog_inputstr_store() above). # If $var_to_set is NULL or missing, output is printed to stdout (which is less # recommended due to performance degradation; in a loop for example). # f_dialog_inputstr_fetch() { local __var_to_set="$1" __cp debug= f_getvar DIALOG_INPUTBOX_$$ "${__var_to_set:-__cp}" # get data setvar DIALOG_INPUTBOX_$$ "" # scrub memory in case data was sensitive # Return the line on standard-out if desired [ "$__var_to_set" ] || echo "$__cp" return $SUCCESS } # f_dialog_input $var_to_set $prompt [$init [$hline]] # # Prompt the user with a dialog(1) inputbox to enter some value. The inputbox # remains until the the user presses ENTER or ESC, or otherwise ends the # editing session (by selecting `Cancel' for example). # # If the user presses ENTER, the exit status is zero (success), otherwise if # the user presses ESC the exit status is 255, or if the user chose Cancel, the # exit status is instead 1. # # NOTE: The hline should correspond to the type of data you want from the user. # NOTE: Should not be used to edit multiline values. # f_dialog_input() { local __var_to_set="$1" __prompt="$2" __init="$3" __hline="$4" # NOTE: Function name appended to prevent __var_{height,width} values # from becoming local (and thus preventing setvar from working). local __height_input __width_input f_dialog_inputbox_size __height_input __width_input \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" \ "$__prompt" "$__init" "$__hline" local __opterm="--" [ "$USE_XDIALOG" ] && __opterm= local __dialog_input __dialog_input=$( $DIALOG \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --hline "$__hline" \ --ok-label "$msg_ok" \ --cancel-label "$msg_cancel" \ --inputbox "$__prompt" \ $__height_input $__width_input \ $__opterm "$__init" \ 2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD ) local __retval=$? # Remove warnings and leading/trailing whitespace from user input f_dialog_line_sanitize __dialog_input setvar "$__var_to_set" "$__dialog_input" return $__retval } ############################################################ MENU FUNCTIONS # f_dialog_menutag_store [-s] $text # # Store some text from a dialog(1) menu to be retrieved later by # f_dialog_menutag_fetch(). If the first argument is `-s', the text is # sanitized before being stored. # f_dialog_menutag_store() { local sanitize= [ "$1" = "-s" ] && sanitize=1 && shift 1 # -s local text="$1" # Sanitize the menutag before storing it if desired [ "$sanitize" ] && f_dialog_data_sanitize text setvar DIALOG_MENU_$$ "$text" } # f_dialog_menutag_fetch [$var_to_set] # # Obtain the menutag chosen by the user from the most recently displayed # dialog(1) menu (previously stored with f_dialog_menutag_store() above). If # $var_to_set is NULL or missing, output is printed to stdout (which is less # recommended due to performance degradation; in a loop for example). # f_dialog_menutag_fetch() { local __var_to_set="$1" __cp debug= f_getvar DIALOG_MENU_$$ "${__var_to_set:-__cp}" # get the data setvar DIALOG_MENU_$$ "" # scrub memory in case data was sensitive # Return the data on standard-out if desired [ "$__var_to_set" ] || echo "$__cp" return $SUCCESS } # f_dialog_menuitem_store [-s] $text # # Store the item from a dialog(1) menu (see f_dialog_menutag2item()) to be # retrieved later by f_dialog_menuitem_fetch(). If the first argument is `-s', # the text is sanitized before being stored. # f_dialog_menuitem_store() { local sanitize= [ "$1" = "-s" ] && sanitize=1 && shift 1 # -s local text="$1" # Sanitize the menuitem before storing it if desired [ "$sanitize" ] && f_dialog_data_sanitize text setvar DIALOG_MENUITEM_$$ "$text" } # f_dialog_menuitem_fetch [$var_to_set] # # Obtain the menuitem chosen by the user from the most recently displayed # dialog(1) menu (previously stored with f_dialog_menuitem_store() above). If # $var_to_set is NULL or missing, output is printed to stdout (which is less # recommended due to performance degradation; in a loop for example). # f_dialog_menuitem_fetch() { local __var_to_set="$1" __cp debug= f_getvar DIALOG_MENUITEM_$$ "${__var_to_set:-__cp}" # get data setvar DIALOG_MENUITEM_$$ "" # scrub memory in case data was sensitive # Return the data on standard-out if desired [ "$__var_to_set" ] || echo "$__cp" return $SUCCESS } # f_dialog_default_store [-s] $text # # Store some text to be used later as the --default-item argument to dialog(1) # (or Xdialog(1)) for --menu, --checklist, and --radiolist widgets. Retrieve # the text later with f_dialog_menutag_fetch(). If the first argument is `-s', # the text is sanitized before being stored. # f_dialog_default_store() { local sanitize= [ "$1" = "-s" ] && sanitize=1 && shift 1 # -s local text="$1" # Sanitize the defaulitem before storing it if desired [ "$sanitize" ] && f_dialog_data_sanitize text setvar DEFAULTITEM_$$ "$text" } # f_dialog_default_fetch [$var_to_set] # # Obtain text to be used with the --default-item argument of dialog(1) (or # Xdialog(1)) (previously stored with f_dialog_default_store() above). If # $var_to_set is NULL or missing, output is printed to stdout (which is less # recommended due to performance degradation; in a loop for example). # f_dialog_default_fetch() { local __var_to_set="$1" __cp debug= f_getvar DEFAULTITEM_$$ "${__var_to_set:-__cp}" # get the data setvar DEFAULTITEM_$$ "" # scrub memory in case data was sensitive # Return the data on standard-out if desired [ "$__var_to_set" ] || echo "$__cp" return $SUCCESS } # f_dialog_menutag2item $tag_chosen $tag1 $item1 $tag2 $item2 ... # # To use the `--menu' option of dialog(1) you must pass an ordered list of # tag/item pairs on the command-line. When the user selects a menu option the # tag for that item is printed to stderr. # # This function allows you to dereference the tag chosen by the user back into # the item associated with said tag. # # Pass the tag chosen by the user as the first argument, followed by the # ordered list of tag/item pairs (HINT: use the same tag/item list as was # passed to dialog(1) for consistency). # # If the tag cannot be found, NULL is returned. # f_dialog_menutag2item() { local tag="$1" tagn item shift 1 # tag while [ $# -gt 0 ]; do tagn="$1" item="$2" shift 2 # tagn/item if [ "$tag" = "$tagn" ]; then echo "$item" return $SUCCESS fi done return $FAILURE } # f_dialog_menutag2item_with_help $tag_chosen $tag1 $item1 $help1 \ # $tag2 $item2 $help2 ... # # To use the `--menu' option of dialog(1) with the `--item-help' option, you # must pass an ordered list of tag/item/help triplets on the command-line. When # the user selects a menu option the tag for that item is printed to stderr. # # This function allows you to dereference the tag chosen by the user back into # the item associated with said tag (help is discarded/ignored). # # Pass the tag chosen by the user as the first argument, followed by the # ordered list of tag/item/help triplets (HINT: use the same tag/item/help list # as was passed to dialog(1) for consistency). # # If the tag cannot be found, NULL is returned. # f_dialog_menutag2item_with_help() { local tag="$1" tagn item shift 1 # tag while [ $# -gt 0 ]; do tagn="$1" item="$2" shift 3 # tagn/item/help if [ "$tag" = "$tagn" ]; then echo "$item" return $SUCCESS fi done return $FAILURE } # f_dialog_menutag2index $tag_chosen $tag1 $item1 $tag2 $item2 ... # # To use the `--menu' option of dialog(1) you must pass an ordered list of # tag/item pairs on the command-line. When the user selects a menu option the # tag for that item is printed to stderr. # # This function allows you to dereference the tag chosen by the user back into # the index associated with said tag. The index is the one-based tag/item pair # array position within the ordered list of tag/item pairs passed to dialog(1). # # Pass the tag chosen by the user as the first argument, followed by the # ordered list of tag/item pairs (HINT: use the same tag/item list as was # passed to dialog(1) for consistency). # # If the tag cannot be found, NULL is returned. # f_dialog_menutag2index() { local tag="$1" tagn n=1 shift 1 # tag while [ $# -gt 0 ]; do tagn="$1" shift 2 # tagn/item if [ "$tag" = "$tagn" ]; then echo $n return $SUCCESS fi n=$(( $n + 1 )) done return $FAILURE } # f_dialog_menutag2index_with_help $tag_chosen $tag1 $item1 $help1 \ # $tag2 $item2 $help2 ... # # To use the `--menu' option of dialog(1) with the `--item-help' option, you # must pass an ordered list of tag/item/help triplets on the command-line. When # the user selects a menu option the tag for that item is printed to stderr. # # This function allows you to dereference the tag chosen by the user back into # the index associated with said tag. The index is the one-based tag/item/help # triplet array position within the ordered list of tag/item/help triplets # passed to dialog(1). # # Pass the tag chosen by the user as the first argument, followed by the # ordered list of tag/item/help triplets (HINT: use the same tag/item/help list # as was passed to dialog(1) for consistency). # # If the tag cannot be found, NULL is returned. # f_dialog_menutag2index_with_help() { local tag="$1" tagn n=1 shift 1 # tag while [ $# -gt 0 ]; do tagn="$1" shift 3 # tagn/item/help if [ "$tag" = "$tagn" ]; then echo $n return $SUCCESS fi n=$(( $n + 1 )) done return $FAILURE } # f_dialog_menutag2help $tag_chosen $tag1 $item1 $help1 $tag2 $item2 $help2 ... # # To use the `--menu' option of dialog(1) with the `--item-help' option, you # must pass an ordered list of tag/item/help triplets on the command-line. When # the user selects a menu option the tag for that item is printed to stderr. # # This function allows you to dereference the tag chosen by the user back into # the help associated with said tag (item is discarded/ignored). # # Pass the tag chosen by the user as the first argument, followed by the # ordered list of tag/item/help triplets (HINT: use the same tag/item/help list # as was passed to dialog(1) for consistency). # # If the tag cannot be found, NULL is returned. # f_dialog_menutag2help() { local tag="$1" tagn help shift 1 # tag while [ $# -gt 0 ]; do tagn="$1" help="$3" shift 3 # tagn/item/help if [ "$tag" = "$tagn" ]; then echo "$help" return $SUCCESS fi done return $FAILURE } ############################################################ INIT FUNCTIONS # f_dialog_init # # Initialize (or re-initialize) the dialog module after setting/changing any # of the following environment variables: # # USE_XDIALOG Either NULL or Non-NULL. If given a value will indicate # that Xdialog(1) should be used instead of dialog(1). # # SECURE Either NULL or Non-NULL. If given a value will indicate # that (while running as root) sudo(8) authentication is # required to proceed. # # Also reads ~/.dialogrc for the following information: # # NO_SHADOW Either NULL or Non-NULL. If use_shadow is OFF (case- # insensitive) in ~/.dialogrc this is set to "1" (otherwise # unset). # f_dialog_init() { local funcname=f_dialog_init DIALOG_SELF_INITIALIZE= USE_DIALOG=1 # # Clone terminal stdout so we can redirect to it from within sub-shells # eval exec $DIALOG_TERMINAL_PASSTHRU_FD\>\&1 # # Add `-S' and `-X' to the list of standard arguments supported by all # case "$GETOPTS_STDARGS" in *SX*) : good ;; # already present *) GETOPTS_STDARGS="${GETOPTS_STDARGS}SX" esac # # Process stored command-line arguments # # NB: Using backticks instead of $(...) for portability since Linux # bash(1) balks at the right parentheses encountered in the case- # statement (incorrectly interpreting it as the close of $(...)). # f_dprintf "f_dialog_init: ARGV=[%s] GETOPTS_STDARGS=[%s]" \ "$ARGV" "$GETOPTS_STDARGS" SECURE=`set -- $ARGV OPTIND=1 while getopts \ "$GETOPTS_STDARGS$GETOPTS_EXTRA$GETOPTS_ALLFLAGS" \ flag > /dev/null; do case "$flag" in S) echo 1 ;; esac done ` # END-BACKTICK USE_XDIALOG=`set -- $ARGV OPTIND=1 while getopts \ "$GETOPTS_STDARGS$GETOPTS_EXTRA$GETOPTS_ALLFLAGS" \ flag > /dev/null; do case "$flag" in S|X) echo 1 ;; esac done ` # END-BACKTICK f_dprintf "f_dialog_init: SECURE=[%s] USE_XDIALOG=[%s]" \ "$SECURE" "$USE_XDIALOG" # # Process `-X' command-line option # [ "$USE_XDIALOG" ] && DIALOG=Xdialog USE_DIALOG= # # Sanity check, or die gracefully # if ! f_have $DIALOG; then unset USE_XDIALOG local failed_dialog="$DIALOG" DIALOG=dialog f_die 1 "$msg_no_such_file_or_directory" "$pgm" "$failed_dialog" fi # # Read ~/.dialogrc (unless using Xdialog(1)) for properties # if [ -f ~/.dialogrc -a ! "$USE_XDIALOG" ]; then eval "$( awk -v param=use_shadow -v expect=OFF \ -v set="NO_SHADOW=1" ' !/^[[:space:]]*(#|$)/ && \ tolower($1) ~ "^"param"(=|$)" && \ /[^#]*=/ { sub(/^[^=]*=[[:space:]]*/, "") if ( toupper($1) == expect ) print set";" }' ~/.dialogrc )" fi # # If we're already running as root but we got there by way of sudo(8) # and we have X11, we should merge the xauth(1) credentials from our # original user. # if [ "$USE_XDIALOG" ] && [ "$( id -u )" = "0" ] && [ "$SUDO_USER" -a "$DISPLAY" ] then if ! f_have xauth; then # Die gracefully, as we [likely] can't use Xdialog(1) unset USE_XDIALOG DIALOG=dialog f_die 1 "$msg_no_such_file_or_directory" "$pgm" "xauth" fi HOSTNAME=$( hostname ) local displaynum="${DISPLAY#*:}" eval xauth -if \~$SUDO_USER/.Xauthority extract - \ \"\$HOSTNAME/unix:\$displaynum\" \ \"\$HOSTNAME:\$displaynum\" | sudo sh -c 'xauth -ivf \ ~root/.Xauthority merge - > /dev/null 2>&1' fi # # Probe Xdialog(1) for maximum height/width constraints, or die # gracefully # if [ "$USE_XDIALOG" ]; then local maxsize if ! f_eval_catch -dk maxsize $funcname "$DIALOG" \ 'LANG= LC_ALL= %s --print-maxsize' "$DIALOG" then # Xdialog(1) failed, fall back to dialog(1) unset USE_XDIALOG # Display the error message produced by Xdialog(1) local height width f_dialog_buttonbox_size height width \ "$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$maxsize" dialog \ --title "$DIALOG_TITLE" \ --backtitle "$DIALOG_BACKTITLE" \ --ok-label "$msg_ok" \ --msgbox "$maxsize" $height $width exit $FAILURE fi XDIALOG_MAXSIZE=$( set -- ${maxsize##*:} height=${1%,} width=$2 echo $height $width ) fi # # If using Xdialog(1), swap DIALOG_TITLE with DIALOG_BACKTITLE. # The reason for this is because many dialog(1) applications use # --backtitle for the program name (which is better suited as # --title with Xdialog(1)). # if [ "$USE_XDIALOG" ]; then local _DIALOG_TITLE="$DIALOG_TITLE" DIALOG_TITLE="$DIALOG_BACKTITLE" DIALOG_BACKTITLE="$_DIALOG_TITLE" fi f_dprintf "f_dialog_init: dialog(1) API initialized." } ############################################################ MAIN # # Self-initialize unless requested otherwise # f_dprintf "%s: DIALOG_SELF_INITIALIZE=[%s]" \ dialog.subr "$DIALOG_SELF_INITIALIZE" case "$DIALOG_SELF_INITIALIZE" in ""|0|[Nn][Oo]|[Oo][Ff][Ff]|[Ff][Aa][Ll][Ss][Ee]) : do nothing ;; *) f_dialog_init esac f_dprintf "%s: Successfully loaded." dialog.subr fi # ! $_DIALOG_SUBR Index: projects/clang380-import/usr.sbin/ntp/scripts/mkver =================================================================== --- projects/clang380-import/usr.sbin/ntp/scripts/mkver (revision 293686) +++ projects/clang380-import/usr.sbin/ntp/scripts/mkver (revision 293687) @@ -1,44 +1,44 @@ #!/bin/sh # # $FreeBSD$ # PROG=${1-UNKNOWN} ConfStr="$PROG" -ConfStr="$ConfStr 4.2.8p4" +ConfStr="$ConfStr 4.2.8p5" case "$CSET" in '') ;; *) ConfStr="$ConfStr@$CSET" ;; esac case "" in '') case "1" in '') ;; *) ConfStr="${ConfStr}-a" ;; esac ;; *) ConfStr="${ConfStr}-r" ;; esac if [ ! -f .version ]; then echo 0 > .version fi RUN="`cat .version`" RUN="`expr $RUN + 1`" echo $RUN > .version ConfStr="$ConfStr (${RUN})" echo "Version <${ConfStr}>"; rm -f version.c cat > version.c << -EoF- /* * version file for $PROG */ #include const char * Version = "${ConfStr}"; -EoF- Index: projects/clang380-import =================================================================== --- projects/clang380-import (revision 293686) +++ projects/clang380-import (revision 293687) Property changes on: projects/clang380-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r293430-293685