Index: user/alc/PQ_LAUNDRY/Makefile.inc1 =================================================================== --- user/alc/PQ_LAUNDRY/Makefile.inc1 (revision 303641) +++ user/alc/PQ_LAUNDRY/Makefile.inc1 (revision 303642) @@ -1,2580 +1,2583 @@ # # $FreeBSD$ # # Make command line options: # -DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir # -DNO_CLEAN do not clean at all # -DDB_FROM_SRC use the user/group databases in src/etc instead of # the system database when installing. # -DNO_SHARE do not go into share subdir # -DKERNFAST define NO_KERNEL{CONFIG,CLEAN,OBJ} # -DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel # -DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel # -DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel # -DNO_PORTSUPDATE do not update ports in ${MAKE} update # -DNO_ROOT install without using root privilege # -DNO_DOCUPDATE do not update doc in ${MAKE} update # -DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects # LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list # LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list # LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target # LOCAL_MTREE="list of mtree files" to process to allow local directories # to be created before files are installed # LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools # list # LOCAL_XTOOL_DIRS="list of dirs" to add additional dirs to the # cross-tools target # METALOG="path to metadata log" to write permission and ownership # when NO_ROOT is set. (default: ${DESTDIR}/METALOG) # TARGET="machine" to crossbuild world for a different machine type # TARGET_ARCH= may be required when a TARGET supports multiple endians # BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL}) # WORLD_FLAGS= additional flags to pass to make(1) during buildworld # KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel # SUBDIR_OVERRIDE="list of dirs" to build rather than everything. # All libraries and includes, and some build tools will still build. # # The intended user-driven targets are: # buildworld - rebuild *everything*, including glue to help do upgrades # installworld- install everything built by "buildworld" # checkworld - run test suite on installed world # doxygen - build API documentation of the kernel # update - convenient way to update your source tree (eg: svn/svnup) # # Standard targets (not defined here) are documented in the makefiles in # /usr/share/mk. These include: # obj depend all install clean cleandepend cleanobj .if !defined(TARGET) || !defined(TARGET_ARCH) .error "Both TARGET and TARGET_ARCH must be defined." .endif SRCDIR?= ${.CURDIR} LOCALBASE?= /usr/local # Cross toolchain changes must be in effect before bsd.compiler.mk # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes. .if defined(CROSS_TOOLCHAIN) .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk" CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}" .endif .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif XCOMPILERS= CC CXX CPP .for COMPILER in ${XCOMPILERS} .if defined(CROSS_COMPILER_PREFIX) X${COMPILER}?= ${CROSS_COMPILER_PREFIX}${${COMPILER}} .else X${COMPILER}?= ${${COMPILER}} .endif .endfor # If a full path to an external cross compiler is given, don't build # a cross compiler. .if ${XCC:N${CCACHE_BIN}:M/*} MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no .endif # Pull in COMPILER_TYPE and COMPILER_FREEBSD_VERSION early. .include .include "share/mk/src.opts.mk" # Check if there is a local compiler that can satisfy as an external compiler. # Which compiler is expected to be used? .if ${MK_CLANG_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= clang .elif ${MK_GCC_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= gcc .else WANT_COMPILER_TYPE= .endif .if !defined(WANT_COMPILER_FREEBSD_VERSION) .if ${WANT_COMPILER_TYPE} == "clang" WANT_COMPILER_FREEBSD_VERSION_FILE= lib/clang/freebsd_cc_version.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FREEBSD_CC_VERSION" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= lib/clang/include/clang/Basic/Version.inc WANT_COMPILER_VERSION!= \ awk '$$2 == "CLANG_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .elif ${WANT_COMPILER_TYPE} == "gcc" WANT_COMPILER_FREEBSD_VERSION_FILE= gnu/usr.bin/cc/cc_tools/freebsd-native.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FBSD_CC_VER" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= contrib/gcc/BASE-VER WANT_COMPILER_VERSION!= \ awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .endif .export WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_VERSION .endif # !defined(WANT_COMPILER_FREEBSD_VERSION) # It needs to be the same revision as we would build for the bootstrap. # If the expected vs CC is different then we can't skip. # GCC cannot be used for cross-arch yet. For clang we pass -target later if # TARGET_ARCH!=MACHINE_ARCH. .if ${MK_SYSTEM_COMPILER} == "yes" && \ (${MK_CLANG_BOOTSTRAP} == "yes" || ${MK_GCC_BOOTSTRAP} == "yes") && \ !make(showconfig) && !make(native-xtools) && !make(xdev*) && \ ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} && \ (${COMPILER_TYPE} == "clang" || ${TARGET_ARCH} == ${MACHINE_ARCH}) && \ ${COMPILER_VERSION} == ${WANT_COMPILER_VERSION} && \ ${COMPILER_FREEBSD_VERSION} == ${WANT_COMPILER_FREEBSD_VERSION} # Everything matches, disable the bootstrap compiler. MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no USING_SYSTEM_COMPILER= yes .endif # ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} USING_SYSTEM_COMPILER?= no TEST_SYSTEM_COMPILER_VARS= \ USING_SYSTEM_COMPILER MK_SYSTEM_COMPILER \ MK_CROSS_COMPILER MK_CLANG_BOOTSTRAP MK_GCC_BOOTSTRAP \ WANT_COMPILER_TYPE WANT_COMPILER_VERSION WANT_COMPILER_VERSION_FILE \ WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_FREEBSD_VERSION_FILE \ CC COMPILER_TYPE COMPILER_VERSION COMPILER_FREEBSD_VERSION test-system-compiler: .PHONY .for v in ${TEST_SYSTEM_COMPILER_VARS} ${_+_}@printf "%-35s= %s\n" "${v}" "${${v}}" .endfor .if ${USING_SYSTEM_COMPILER} == "yes" && \ (make(buildworld) || make(buildkernel) || make(kernel-toolchain) || \ make(toolchain) || make(_cross-tools)) .info SYSTEM_COMPILER: Determined that CC=${CC} matches the source tree. Not bootstrapping a cross-compiler. .endif # For installworld need to ensure that the looked-up compiler metadata is # passed along rather than trying to run cc from the restricted # STRICTTMPPATH. .if ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no" .if !defined(X_COMPILER_TYPE) CROSSENV+= COMPILER_VERSION=${COMPILER_VERSION} \ COMPILER_TYPE=${COMPILER_TYPE} \ COMPILER_FREEBSD_VERSION=${COMPILER_FREEBSD_VERSION} .else CROSSENV+= COMPILER_VERSION=${X_COMPILER_VERSION} \ COMPILER_TYPE=${X_COMPILER_TYPE} \ COMPILER_FREEBSD_VERSION=${X_COMPILER_FREEBSD_VERSION} .endif .endif # Handle external binutils. .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif # If we do not have a bootstrap binutils (because the in-tree one does not # support the target architecture), provide a default cross-binutils prefix. # This allows aarch64 builds, for example, to automatically use the # aarch64-binutils port or package. .if !make(showconfig) .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \ !defined(CROSS_BINUTILS_PREFIX) CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/ .if !exists(${CROSS_BINUTILS_PREFIX}) .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX. .endif .endif .endif XBINUTILS= AS AR LD NM OBJCOPY RANLIB SIZE STRINGS .for BINUTIL in ${XBINUTILS} .if defined(CROSS_BINUTILS_PREFIX) && \ exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}}) X${BINUTIL}?= ${CROSS_BINUTILS_PREFIX}${${BINUTIL}} .else X${BINUTIL}?= ${${BINUTIL}} .endif .endfor # We must do lib/ and libexec/ before bin/ in case of a mid-install error to # keep the users system reasonably usable. For static->dynamic root upgrades, # we don't want to install a dynamic binary without rtld and the needed # libraries. More commonly, for dynamic root, we don't want to install a # binary that requires a newer library version that hasn't been installed yet. # This ordering is not a guarantee though. The only guarantee of a working # system here would require fine-grained ordering of all components based # on their dependencies. .if !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .else SUBDIR= lib libexec .if !defined(NO_ROOT) && (make(installworld) || make(install)) # Ensure libraries are installed before progressing. SUBDIR+=.WAIT .endif SUBDIR+=bin .if ${MK_CDDL} != "no" SUBDIR+=cddl .endif SUBDIR+=gnu include .if ${MK_KERBEROS} != "no" SUBDIR+=kerberos5 .endif .if ${MK_RESCUE} != "no" SUBDIR+=rescue .endif SUBDIR+=sbin .if ${MK_CRYPT} != "no" SUBDIR+=secure .endif .if !defined(NO_SHARE) SUBDIR+=share .endif SUBDIR+=sys usr.bin usr.sbin .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .if ${MK_OFED} != "no" SUBDIR+=contrib/ofed .endif # Local directories are last, since it is nice to at least get the base # system rebuilt before you do them. .for _DIR in ${LOCAL_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR # of a LOCAL_DIRS directory. This allows LOCAL_DIRS=foo and # LOCAL_LIB_DIRS=foo/lib to behave as expected. .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|} _REDUNDENT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} .endfor .for _DIR in ${LOCAL_LIB_DIRS} .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .else .warning ${_DIR} not added to SUBDIR list. See UPDATING 20141121. .endif .endfor # We must do etc/ last as it hooks into building the man whatis file # by calling 'makedb' in share/man. This is only relevant for # install/distribute so they build the whatis file after every manpage is # installed. .if make(installworld) || make(install) SUBDIR+=.WAIT .endif SUBDIR+=etc .endif # !empty(SUBDIR_OVERRIDE) .if defined(NOCLEAN) .warning NOCLEAN option is deprecated. Use NO_CLEAN instead. NO_CLEAN= ${NOCLEAN} .endif .if defined(NO_CLEANDIR) CLEANDIR= clean cleandepend .else CLEANDIR= cleandir .endif .if ${MK_META_MODE} == "yes" # If filemon is used then we can rely on the build being incremental-safe. # The .meta files will also track the build command and rebuild should # it change. .if empty(.MAKE.MODE:Mnofilemon) NO_CLEAN= t .endif .endif LOCAL_TOOL_DIRS?= PACKAGEDIR?= ${DESTDIR}/${DISTDIR} .if empty(SHELL:M*csh*) BUILDENV_SHELL?=${SHELL} .else BUILDENV_SHELL?=/bin/sh .endif .if !defined(SVN) || empty(SVN) . for _P in /usr/bin /usr/local/bin . for _S in svn svnlite . if exists(${_P}/${_S}) SVN= ${_P}/${_S} . endif . endfor . endfor .endif SVNFLAGS?= -r HEAD MAKEOBJDIRPREFIX?= /usr/obj .if !defined(OSRELDATE) .if exists(/usr/include/osreldate.h) OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ /usr/include/osreldate.h .else OSRELDATE= 0 .endif .export OSRELDATE .endif # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION. .if !defined(_REVISION) _REVISION!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V REVISION .export _REVISION .endif .if !defined(_BRANCH) _BRANCH!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V BRANCH .export _BRANCH .endif .if !defined(SRCRELDATE) SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h .export SRCRELDATE .endif .if !defined(VERSION) VERSION= FreeBSD ${_REVISION}-${_BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE} .export VERSION .endif .if !defined(PKG_VERSION) .if ${_BRANCH:MSTABLE*} || ${_BRANCH:MCURRENT*} || ${_BRANCH:MALPHA*} TIMENOW= %Y%m%d%H%M%S EXTRA_REVISION= .s${TIMENOW:gmtime} .endif .if ${_BRANCH:M*-p*} EXTRA_REVISION= _${_BRANCH:C/.*-p([0-9]+$)/\1/} .endif PKG_VERSION= ${_REVISION}${EXTRA_REVISION} .endif KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ arm \ armeb/arm \ armv6/arm \ i386 \ i386/pc98 \ mips \ mipsel/mips \ mips64el/mips \ mipsn32el/mips \ mips64/mips \ mipsn32/mips \ powerpc \ powerpc64/powerpc \ riscv64/riscv \ sparc64 .if ${TARGET} == ${TARGET_ARCH} _t= ${TARGET} .else _t= ${TARGET_ARCH}/${TARGET} .endif .for _t in ${_t} .if empty(KNOWN_ARCHES:M${_t}) .error Unknown target ${TARGET_ARCH}:${TARGET}. .endif .endfor .if ${TARGET} == ${MACHINE} TARGET_CPUTYPE?=${CPUTYPE} .else TARGET_CPUTYPE?= .endif .if !empty(TARGET_CPUTYPE) _TARGET_CPUTYPE=${TARGET_CPUTYPE} .else _TARGET_CPUTYPE=dummy .endif _CPUTYPE!= MK_AUTO_OBJ=no MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \ -f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE .if ${_CPUTYPE} != ${_TARGET_CPUTYPE} .error CPUTYPE global should be set with ?=. .endif .if make(buildworld) BUILD_ARCH!= uname -p .if ${MACHINE_ARCH} != ${BUILD_ARCH} .error To cross-build, set TARGET_ARCH. .endif .endif .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING) OBJTREE= ${MAKEOBJDIRPREFIX} .else OBJTREE= ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH} .endif WORLDTMP= ${OBJTREE}${.CURDIR}/tmp BPATH= ${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin XPATH= ${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin STRICTTMPPATH= ${BPATH}:${XPATH} TMPPATH= ${STRICTTMPPATH}:${PATH} # # Avoid running mktemp(1) unless actually needed. # It may not be functional, e.g., due to new ABI # when in the middle of installing over this system. # .if make(distributeworld) || make(installworld) || make(stageworld) INSTALLTMP!= /usr/bin/mktemp -d -u -t install .endif .if make(stagekernel) || make(distributekernel) TAGS+= kernel PACKAGE= kernel .endif # # Building a world goes through the following stages # # 1. legacy stage [BMAKE] # This stage is responsible for creating compatibility # shims that are needed by the bootstrap-tools, # build-tools and cross-tools stages. These are generally # APIs that tools from one of those three stages need to # build that aren't present on the host. # 1. bootstrap-tools stage [BMAKE] # This stage is responsible for creating programs that # are needed for backward compatibility reasons. They # are not built as cross-tools. # 2. build-tools stage [TMAKE] # This stage is responsible for creating the object # tree and building any tools that are needed during # the build process. Some programs are listed during # this phase because they build binaries to generate # files needed to build these programs. This stage also # builds the 'build-tools' target rather than 'all'. # 3. cross-tools stage [XMAKE] # This stage is responsible for creating any tools that # are needed for building the system. A cross-compiler is one # of them. This differs from build tools in two ways: # 1. the 'all' target is built rather than 'build-tools' # 2. these tools are installed into TMPPATH for stage 4. # 4. world stage [WMAKE] # This stage actually builds the world. # 5. install stage (optional) [IMAKE] # This stage installs a previously built world. # BOOTSTRAPPING?= 0 # Keep these in sync MINIMUM_SUPPORTED_OSREL?= 900044 MINIMUM_SUPPORTED_REL?= 9.1 # Common environment for world related stages CROSSENV+= MAKEOBJDIRPREFIX=${OBJTREE} \ MACHINE_ARCH=${TARGET_ARCH} \ MACHINE=${TARGET} \ CPUTYPE=${TARGET_CPUTYPE} .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. CROSSENV+= BUILD_TOOLS_META=.NOMETA_CMP .endif .if ${MK_GROFF} != "no" CROSSENV+= GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \ GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \ GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac .endif .if defined(TARGET_CFLAGS) CROSSENV+= ${TARGET_CFLAGS} .endif # bootstrap-tools stage BMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ TOOLS_PREFIX=${WORLDTMP} \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile BSARGS= DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no \ MK_INCLUDES=yes BMAKE= MAKEOBJDIRPREFIX=${WORLDTMP} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ ${BSARGS} # build-tools stage TMAKE= MAKEOBJDIRPREFIX=${OBJTREE} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ -DNO_LINT \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no # cross-tools stage XMAKE= TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no # kernel-tools stage KTMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} KTMAKE= TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \ ${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no -DNO_LINT MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no # world stage WMAKEENV= ${CROSSENV} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} # make hierarchy HMAKE= PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q} .if defined(NO_ROOT) HMAKE+= PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT .endif CROSSENV+= CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCXXFLAGS} ${XCFLAGS}" \ CPP="${XCPP} ${XCFLAGS}" \ AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \ OBJCOPY="${XOBJCOPY}" \ RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \ SIZE="${XSIZE}" .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX}) # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a # directory, but the compiler will look in the right place for its # tools so we don't need to tell it where to look. BFLAGS+= -B${CROSS_BINUTILS_PREFIX} .endif # External compiler needs sysroot and target flags. .if ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no" .if !defined(CROSS_BINUTILS_PREFIX) || !exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${WORLDTMP}/usr/bin .endif .if ${TARGET} == "arm" .if ${TARGET_ARCH:Marmv6*} != "" && ${TARGET_CPUTYPE:M*soft*} == "" TARGET_ABI= gnueabihf .else TARGET_ABI= gnueabi .endif .endif .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc # GCC requires -isystem and -L when using a cross-compiler. --sysroot # won't set header path and -L is used to ensure the base library path # is added before the port PREFIX library path. XCFLAGS+= -isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib # Force using libc++ for external GCC. # XXX: This should be checking MK_GNUCXX == no .if ${X_COMPILER_VERSION} >= 40800 XCXXFLAGS+= -isystem ${WORLDTMP}/usr/include/c++/v1 -std=c++11 \ -nostdinc++ -L${WORLDTMP}/../lib/libc++ .endif .else TARGET_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd12.0 XCFLAGS+= -target ${TARGET_TRIPLE} .endif XCFLAGS+= --sysroot=${WORLDTMP} .endif # ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no" .if !empty(BFLAGS) XCFLAGS+= ${BFLAGS} .endif .if ${MK_LIB32} != "no" && (${TARGET_ARCH} == "amd64" || \ ${TARGET_ARCH} == "powerpc64") LIBCOMPAT= 32 .include "Makefile.libcompat" .elif ${MK_LIBSOFT} != "no" && ${TARGET_ARCH} == "armv6" LIBCOMPAT= SOFT .include "Makefile.libcompat" .endif WMAKE= ${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP} IMAKEENV= ${CROSSENV} IMAKE= ${IMAKEENV} ${MAKE} -f Makefile.inc1 \ ${IMAKE_INSTALL} ${IMAKE_MTREE} .if empty(.MAKEFLAGS:M-n) IMAKEENV+= PATH=${STRICTTMPPATH}:${INSTALLTMP} \ LD_LIBRARY_PATH=${INSTALLTMP} \ PATH_LOCALE=${INSTALLTMP}/locale IMAKE+= __MAKE_SHELL=${INSTALLTMP}/sh .else IMAKEENV+= PATH=${TMPPATH}:${INSTALLTMP} .endif .if defined(DB_FROM_SRC) INSTALLFLAGS+= -N ${.CURDIR}/etc MTREEFLAGS+= -N ${.CURDIR}/etc .endif _INSTALL_DDIR= ${DESTDIR}/${DISTDIR} INSTALL_DDIR= ${_INSTALL_DDIR:S://:/:g:C:/$::} .if defined(NO_ROOT) METALOG?= ${DESTDIR}/${DISTDIR}/METALOG IMAKE+= -DNO_ROOT METALOG=${METALOG} INSTALLFLAGS+= -U -M ${METALOG} -D ${INSTALL_DDIR} MTREEFLAGS+= -W .endif .if defined(BUILD_PKGS) INSTALLFLAGS+= -h sha256 .endif .if defined(DB_FROM_SRC) || defined(NO_ROOT) IMAKE_INSTALL= INSTALL="install ${INSTALLFLAGS}" IMAKE_MTREE= MTREE_CMD="mtree ${MTREEFLAGS}" .endif # kernel stage KMAKEENV= ${WMAKEENV} KMAKE= ${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME} # # buildworld # # Attempt to rebuild the entire system, with reasonable chance of # success, regardless of how old your existing system is. # _worldtmp: .PHONY .if ${.CURDIR:C/[^,]//g} != "" # The m4 build of sendmail files doesn't like it if ',' is used # anywhere in the path of it's files. @echo @echo "*** Error: path to source tree contains a comma ','" @echo false .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Rebuilding the temporary build tree" @echo "--------------------------------------------------------------" .if !defined(NO_CLEAN) rm -rf ${WORLDTMP} .if defined(LIBCOMPAT) rm -rf ${LIBCOMPATTMP} .endif .else rm -rf ${WORLDTMP}/legacy/usr/include # XXX - These can depend on any header file. rm -f ${OBJTREE}${.CURDIR}/lib/libsysdecode/ioctl.c rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c .endif .for _dir in \ lib lib/casper usr legacy/bin legacy/usr mkdir -p ${WORLDTMP}/${_dir} .endfor mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .if ${MK_GROFF} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .endif mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/legacy/usr/include >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/usr/include >/dev/null ln -sf ${.CURDIR}/sys ${WORLDTMP} .if ${MK_DEBUG_FILES} != "no" # We could instead disable debug files for these build stages mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/legacy/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" mkdir -p ${WORLDTMP}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .for _mtree in ${LOCAL_MTREE} mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null .endfor _legacy: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.1: legacy release compatibility shims" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} legacy _bootstrap-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.2: bootstrap tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools _cleanobj: .if !defined(NO_CLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR} .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${LIBCOMPATWMAKE} -f Makefile.inc1 ${CLEANDIR} .endif .endif _obj: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} obj _build-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${TMAKE} build-tools _cross-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3: cross tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools ${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools _includes: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.1: building includes" @echo "--------------------------------------------------------------" # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need # headers from default SUBDIR. Do SUBDIR_OVERRIDE includes last. ${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \ MK_INCLUDES=yes includes .if !empty(SUBDIR_OVERRIDE) && make(buildworld) ${_+_}cd ${.CURDIR}; ${WMAKE} MK_INCLUDES=yes SHARED=symlinks includes .endif _libraries: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.2: building libraries" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; \ ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \ MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries everything: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.3: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all WMAKE_TGTS= WMAKE_TGTS+= _worldtmp _legacy .if empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= _bootstrap-tools .endif WMAKE_TGTS+= _cleanobj _obj _build-tools _cross-tools WMAKE_TGTS+= _includes _libraries WMAKE_TGTS+= everything .if defined(LIBCOMPAT) && empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= build${libcompat} .endif buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .PHONY .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue buildworld_prologue: .PHONY @echo "--------------------------------------------------------------" @echo ">>> World build started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" buildworld_epilogue: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> World build completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" # # We need to have this as a target because the indirection between Makefile # and Makefile.inc1 causes the correct PATH to be used, rather than a # modification of the current environment's PATH. In addition, we need # to quote multiword values. # buildenvvars: .PHONY @echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@} .if ${.TARGETS:Mbuildenv} .if ${.MAKEFLAGS:M-j} .error The buildenv target is incompatible with -j .endif .endif BUILDENV_DIR?= ${.CURDIR} buildenv: .PHONY @echo Entering world for ${TARGET_ARCH}:${TARGET} .if ${BUILDENV_SHELL:M*zsh*} @echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE} .endif @cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \ || true TOOLCHAIN_TGTS= ${WMAKE_TGTS:Neverything:Nbuild${libcompat}} toolchain: ${TOOLCHAIN_TGTS} .PHONY kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries} .PHONY # # installcheck # # Checks to be sure system is ready for installworld/installkernel. # installcheck: _installcheck_world _installcheck_kernel .PHONY _installcheck_world: .PHONY _installcheck_kernel: .PHONY # # Require DESTDIR to be set if installing for a different architecture or # using the user/group database in the source tree. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \ defined(DB_FROM_SRC) .if !make(distributeworld) _installcheck_world: __installcheck_DESTDIR _installcheck_kernel: __installcheck_DESTDIR __installcheck_DESTDIR: .PHONY .if !defined(DESTDIR) || empty(DESTDIR) @echo "ERROR: Please set DESTDIR!"; \ false .endif .endif .endif .if !defined(DB_FROM_SRC) # # Check for missing UIDs/GIDs. # CHECK_UIDS= auditdistd CHECK_GIDS= audit .if ${MK_SENDMAIL} != "no" CHECK_UIDS+= smmsp CHECK_GIDS+= smmsp .endif .if ${MK_PF} != "no" CHECK_UIDS+= proxy CHECK_GIDS+= proxy authpf .endif .if ${MK_UNBOUND} != "no" CHECK_UIDS+= unbound CHECK_GIDS+= unbound .endif _installcheck_world: __installcheck_UGID __installcheck_UGID: .PHONY .for uid in ${CHECK_UIDS} @if ! `id -u ${uid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .for gid in ${CHECK_GIDS} @if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .endif # # Required install tools to be saved in a scratch dir for safety. # .if ${MK_ZONEINFO} != "no" _zoneinfo= zic tzsetup .endif ITOOLS= [ awk cap_mkdb cat chflags chmod chown cmp cp \ date echo egrep find grep id install ${_install-info} \ ln make mkdir mtree mv pwd_mkdb \ rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \ ${LOCAL_ITOOLS} # Needed for share/man .if ${MK_MAN_UTILS} != "no" ITOOLS+=makewhatis .endif # # distributeworld # # Distributes everything compiled by a `buildworld'. # # installworld # # Installs everything compiled by a 'buildworld'. # # Non-base distributions produced by the base system EXTRA_DISTRIBUTIONS= doc .if defined(LIBCOMPAT) EXTRA_DISTRIBUTIONS+= lib${libcompat} .endif .if ${MK_TESTS} != "no" EXTRA_DISTRIBUTIONS+= tests .endif DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 distributeworld installworld stageworld: _installcheck_world .PHONY mkdir -p ${INSTALLTMP} progs=$$(for prog in ${ITOOLS}; do \ if progpath=`which $$prog`; then \ echo $$progpath; \ else \ echo "Required tool $$prog not found in PATH." >&2; \ exit 1; \ fi; \ done); \ libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \ while read line; do \ set -- $$line; \ if [ "$$2 $$3" != "not found" ]; then \ echo $$2; \ else \ echo "Required library $$1 not found." >&2; \ exit 1; \ fi; \ done); \ cp $$libs $$progs ${INSTALLTMP} cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale .if defined(NO_ROOT) -mkdir -p ${METALOG:H} echo "#${MTREE_MAGIC}" > ${METALOG} .endif .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} -mkdir ${DESTDIR}/${DISTDIR}/${dist} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" && ${dist} == "tests" -mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .if defined(NO_ROOT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \ sed -e 's#^\./#./${dist}/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \ sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG} .if defined(LIBCOMPAT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} .endif .endif .endfor -mkdir ${DESTDIR}/${DISTDIR}/base ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist} | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.meta .endfor .for dist in ${DEBUG_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.debug.meta .endfor .endif .endif packageworld: .PHONY .for dist in base ${EXTRA_DISTRIBUTIONS} .if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug \ @${DESTDIR}/${DISTDIR}/${dist}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug . | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .endif .endfor .for dist in ${DEBUG_DISTRIBUTIONS} . if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvLf - usr/lib/debug | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . endif .endfor # # reinstall # # If you have a build server, you can NFS mount the source and obj directories # and do a 'make reinstall' on the *client* to install new binaries from the # most recent server build. # restage reinstall: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy .if make(restage) @echo "--------------------------------------------------------------" @echo ">>> Making distribution" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} distribution .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install${libcompat} .endif redistribute: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Distributing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute${libcompat} \ DISTRIBUTION=lib${libcompat} .endif distrib-dirs distribution: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} .if make(distribution) ${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \ METALOG=${METALOG} MK_TESTS=no installconfig .endif # # buildkernel and installkernel # # Which kernels to build and/or install is specified by setting # KERNCONF. If not defined a GENERIC kernel is built/installed. # Only the existing (depending TARGET) config files are used # for building kernels and only the first of these is designated # as the one being installed. # # Note that we have to use TARGET instead of TARGET_ARCH when # we're in kernel-land. Since only TARGET_ARCH is (expected) to # be set to cross-build, we have to make sure TARGET is set # properly. .if defined(KERNFAST) NO_KERNELCLEAN= t NO_KERNELCONFIG= t NO_KERNELOBJ= t # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah .if !defined(KERNCONF) && ${KERNFAST} != "1" KERNCONF=${KERNFAST} .endif .endif .if ${TARGET_ARCH} == "powerpc64" KERNCONF?= GENERIC64 .else KERNCONF?= GENERIC .endif INSTKERNNAME?= kernel KERNSRCDIR?= ${.CURDIR}/sys KRNLCONFDIR= ${KERNSRCDIR}/${TARGET}/conf KRNLOBJDIR= ${OBJTREE}${KERNSRCDIR} KERNCONFDIR?= ${KRNLCONFDIR} BUILDKERNELS= INSTALLKERNEL= .if defined(NO_INSTALLKERNEL) # All of the BUILDKERNELS loops start at index 1. BUILDKERNELS+= dummy .endif .for _kernel in ${KERNCONF} .if exists(${KERNCONFDIR}/${_kernel}) BUILDKERNELS+= ${_kernel} .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL) INSTALLKERNEL= ${_kernel} .endif .endif .endfor ${WMAKE_TGTS:N_worldtmp:Nbuild${libcompat}} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY # # buildkernel # # Builds all kernels defined by BUILDKERNELS. # buildkernel: .MAKE .PHONY .if empty(BUILDKERNELS:Ndummy) @echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \ false .endif @echo .for _kernel in ${BUILDKERNELS:Ndummy} @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" @echo "===> ${_kernel}" mkdir -p ${KRNLOBJDIR} .if !defined(NO_KERNELCONFIG) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1: configuring the kernel" @echo "--------------------------------------------------------------" cd ${KRNLCONFDIR}; \ PATH=${TMPPATH} \ config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \ -I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}' .endif .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR} .endif .if !defined(NO_KERNELOBJ) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" .endfor NO_INSTALLEXTRAKERNELS?= yes # # installkernel, etc. # # Install the kernel defined by INSTALLKERNEL # installkernel installkernel.debug \ reinstallkernel reinstallkernel.debug: _installcheck_kernel .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${_kernel}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//} .endfor .endif distributekernel distributekernel.debug: .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif mkdir -p ${DESTDIR}/${DISTDIR} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta .endif cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \ DESTDIR=${INSTALL_DDIR}/kernel \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta .endif cd ${KRNLOBJDIR}/${_kernel}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \ KERNEL=${INSTKERNNAME}.${_kernel} \ DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e "s|^./kernel.${_kernel}|.|" \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta .endif .endfor .endif packagekernel: .PHONY .if defined(NO_ROOT) .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .else .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .endif stagekernel: .PHONY ${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel PORTSDIR?= /usr/ports WSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/worldstage KSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/kernelstage REPODIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/repo PKGSIGNKEY?= # empty .ORDER: stage-packages create-packages .ORDER: create-packages create-world-packages .ORDER: create-packages create-kernel-packages .ORDER: create-packages sign-packages _pkgbootstrap: .PHONY .if !exists(${LOCALBASE}/sbin/pkg) @env ASSUME_ALWAYS_YES=YES pkg bootstrap .endif packages: .PHONY ${_+_}${MAKE} -C ${.CURDIR} PKG_VERSION=${PKG_VERSION} real-packages package-pkg: .PHONY rm -rf /tmp/ports.${TARGET} || : env ${WMAKEENV:Q} SRCDIR=${.CURDIR} PORTSDIR=${PORTSDIR} REVISION=${_REVISION} \ PKG_VERSION=${PKG_VERSION} REPODIR=${REPODIR} WSTAGEDIR=${WSTAGEDIR} \ sh ${.CURDIR}/release/scripts/make-pkg-package.sh real-packages: stage-packages create-packages sign-packages .PHONY stage-packages: .PHONY @mkdir -p ${REPODIR} ${WSTAGEDIR} ${KSTAGEDIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} -DNO_ROOT -B stageworld ; \ ${MAKE} DESTDIR=${KSTAGEDIR} -DNO_ROOT -B stagekernel create-packages: _pkgbootstrap .PHONY @mkdir -p ${REPODIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} create-world-packages ; \ ${MAKE} DESTDIR=${KSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} DISTDIR=kernel \ create-kernel-packages create-world-packages: _pkgbootstrap .PHONY @rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || : @cd ${WSTAGEDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ ${WSTAGEDIR}/METALOG @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ sh ${SRCDIR}/release/packages/generate-ucl.sh -o $${pkgname} \ -s ${SRCDIR} -u ${WSTAGEDIR}/$${pkgname}.ucl ; \ done @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ awk -F\" ' \ /^name/ { printf("===> Creating %s-", $$2); next } \ /^version/ { print $$2; next } \ ' ${WSTAGEDIR}/$${pkgname}.ucl ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${WSTAGEDIR}/$${pkgname}.ucl \ -p ${WSTAGEDIR}/$${pkgname}.plist \ -r ${WSTAGEDIR} \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} ; \ done create-kernel-packages: _pkgbootstrap .PHONY .if exists(${KSTAGEDIR}/kernel.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/${DISTDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${INSTALLKERNEL} \ ${KSTAGEDIR}/kernel.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${INSTALLKERNEL:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \ -p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \ -r ${KSTAGEDIR}/${DISTDIR} \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if exists(${KSTAGEDIR}/kernel.${_kernel}.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/kernel.${_kernel} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${_kernel} \ ${KSTAGEDIR}/kernel.${_kernel}.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${_kernel:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \ -p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \ -r ${KSTAGEDIR}/kernel.${_kernel} \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .endfor .endif sign-packages: _pkgbootstrap .PHONY @[ -L "${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest" ] && \ unlink ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh repo \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${PKGSIGNKEY} ; \ ln -s ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest # # # checkworld # # Run test suite on installed world. # checkworld: .PHONY @if [ ! -x ${LOCALBASE}/bin/kyua ]; then \ echo "You need kyua (devel/kyua) to run the test suite." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}${LOCALBASE}/bin/kyua test -k ${TESTSBASE}/Kyuafile # # # doxygen # # Build the API documentation with doxygen # doxygen: .PHONY @if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \ echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all # # update # # Update the source tree(s), by running svn/svnup to update to the # latest copy. # update: .PHONY .if defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo ">>> Updating ${.CURDIR} using Subversion" @echo "--------------------------------------------------------------" @(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS}) .endif # # ------------------------------------------------------------------------ # # From here onwards are utility targets used by the 'make world' and # related targets. If your 'world' breaks, you may like to try to fix # the problem and manually run the following targets to attempt to # complete the build. Beware, this is *not* guaranteed to work, you # need to have a pretty good grip on the current state of the system # to attempt to manually finish it. If in doubt, 'make world' again. # # # legacy: Build compatibility shims for the next three targets. This is a # minimal set of tools and shims necessary to compensate for older systems # which don't have the APIs required by the targets built in bootstrap-tools, # build-tools or cross-tools. # # ELF Tool Chain libraries are needed for ELF tools and dtrace tools. # r296685 fix cross-endian objcopy .if ${BOOTSTRAPPING} < 1100102 _elftoolchain_libs= lib/libelf lib/libdwarf .endif legacy: .PHONY .if ${BOOTSTRAPPING} < ${MINIMUM_SUPPORTED_OSREL} && ${BOOTSTRAPPING} != 0 @echo "ERROR: Source upgrades from versions prior to ${MINIMUM_SUPPORTED_REL} are not supported."; \ false .endif .for _tool in tools/build ${_elftoolchain_libs} ${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no all; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no \ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install .endfor # # bootstrap-tools: Build tools needed for compatibility. These are binaries that # are built to build other binaries in the system. However, the focus of these # binaries is usually quite narrow. Bootstrap tools use the host's compiler and # libraries, augmented by -legacy. # _bt= _bootstrap-tools .if ${MK_GAMES} != "no" _strfile= usr.bin/fortune/strfile .endif .if ${MK_GCC} != "no" && ${MK_CXX} != "no" _gperf= gnu/usr.bin/gperf .endif .if ${MK_GROFF} != "no" _groff= gnu/usr.bin/groff \ usr.bin/soelim .endif .if ${MK_VT} != "no" _vtfontcvt= usr.bin/vtfontcvt .endif .if ${BOOTSTRAPPING} < 900002 _sed= usr.bin/sed .endif .if ${BOOTSTRAPPING} < 1000033 _libopenbsd= lib/libopenbsd _m4= usr.bin/m4 _lex= usr.bin/lex ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd ${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif .if ${BOOTSTRAPPING} < 1000026 _nmtree= lib/libnetbsd \ usr.sbin/nmtree ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd .endif .if ${BOOTSTRAPPING} < 1000027 _cat= bin/cat .endif # r264059 support for status= .if ${BOOTSTRAPPING} < 1100017 _dd= bin/dd .endif # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support .if ${BOOTSTRAPPING} < 1100078 _crunchide= usr.sbin/crunch/crunchide .endif # r285986 crunchen: use STRIPBIN rather than STRIP # 1100113: Support MK_AUTO_OBJ .if ${BOOTSTRAPPING} < 1100078 || \ (${MK_AUTO_OBJ} == "yes" && ${BOOTSTRAPPING} < 1100114) _crunchgen= usr.sbin/crunch/crunchgen .endif .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041 _awk= usr.bin/awk .endif # r296926 -P keymap search path, MFC to stable/10 in r298297 .if ${BOOTSTRAPPING} < 1003501 || \ (${BOOTSTRAPPING} >= 1100000 && ${BOOTSTRAPPING} < 1100103) _kbdcontrol= usr.sbin/kbdcontrol .endif _yacc= lib/liby \ usr.bin/yacc ${_bt}-usr.bin/yacc: ${_bt}-lib/liby .if ${MK_BSNMP} != "no" _gensnmptree= usr.sbin/bsnmpd/gensnmptree .endif # We need to build tblgen when we're building clang either as # the bootstrap compiler, or as the part of the normal build. .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" _clang_tblgen= \ lib/clang/libllvmsupport \ lib/clang/libllvmtablegen \ usr.bin/clang/llvm-tblgen \ usr.bin/clang/clang-tblgen ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport .endif # Default to building the GPL DTC, but build the BSDL one if users explicitly # request it. _dtc= usr.bin/dtc .if ${MK_GPL_DTC} != "no" _dtc= gnu/usr.bin/dtc .endif .if ${MK_KERBEROS} != "no" _kerberos5_bootstrap_tools= \ kerberos5/tools/make-roken \ kerberos5/lib/libroken \ kerberos5/lib/libvers \ kerberos5/tools/asn1_compile \ kerberos5/tools/slc \ usr.bin/compile_et .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g} .endif # r283777 makewhatis(1) replaced with mandoc version which builds a database. .if ${MK_MANDOCDB} != "no" && ${BOOTSTRAPPING} < 1100075 _libopenbsd?= lib/libopenbsd _makewhatis= lib/libsqlite3 \ usr.bin/mandoc ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd ${_bt}-lib/libsqlite3 .endif bootstrap-tools: .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. # Try to bound the building of the bootstrap-tool to just the # FreeBSD versions that need the tool built at this stage of the build. .for _tool in \ ${_clang_tblgen} \ ${_kerberos5_bootstrap_tools} \ ${_strfile} \ ${_gperf} \ ${_groff} \ ${_dtc} \ ${_awk} \ ${_cat} \ ${_dd} \ ${_kbdcontrol} \ usr.bin/lorder \ ${_libopenbsd} \ ${_makewhatis} \ usr.bin/rpcgen \ ${_sed} \ ${_yacc} \ ${_m4} \ ${_lex} \ usr.bin/xinstall \ ${_gensnmptree} \ usr.sbin/config \ ${_crunchide} \ ${_crunchgen} \ ${_nmtree} \ ${_vtfontcvt} \ usr.bin/localedef ${_bt}-${_tool}: .PHONY .MAKE ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install bootstrap-tools: ${_bt}-${_tool} .endfor # # build-tools: Build special purpose build tools # .if !defined(NO_SHARE) _share= share/syscons/scrnmaps .endif .if ${MK_GCC} != "no" _gcc_tools= gnu/usr.bin/cc/cc_tools .endif .if ${MK_RESCUE} != "no" # rescue includes programs that have build-tools targets _rescue=rescue/rescue .endif .for _tool in \ bin/csh \ bin/sh \ ${LOCAL_TOOL_DIRS} \ lib/ncurses/ncurses \ lib/ncurses/ncursesw \ ${_rescue} \ ${_share} \ usr.bin/awk \ lib/libmagic \ usr.bin/mkesdb_static \ usr.bin/mkcsmapper_static \ usr.bin/vi/catalog build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ build-tools build-tools: build-tools_${_tool} .endfor .for _tool in \ ${_gcc_tools} build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all build-tools: build-tools_${_tool} .endfor # # kernel-tools: Build kernel-building tools # kernel-tools: .PHONY mkdir -p ${MAKEOBJDIRPREFIX}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${MAKEOBJDIRPREFIX}/usr >/dev/null # # cross-tools: All the tools needed to build the rest of the system after # we get done with the earlier stages. It is the last set of tools needed # to begin building the target binaries. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" _btxld= usr.sbin/btxld .endif .endif # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures # resulting from missing bug fixes or ELF Toolchain updates. .if ${MK_CDDL} != "no" _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \ cddl/usr.bin/ctfmerge .endif # If we're given an XAS, don't build binutils. .if ${XAS:M/*} == "" .if ${MK_BINUTILS_BOOTSTRAP} != "no" _binutils= gnu/usr.bin/binutils .endif .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy \ usr.bin/nm \ usr.bin/size \ usr.bin/strings # These are not required by the build, but can be useful for developers who # cross-build on a FreeBSD 10 host: _elftctools+= usr.bin/addr2line .endif .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" # If cross-building with an external binutils we still need to build strip for # the target (for at least crunchide). _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy .endif .if ${MK_CLANG_BOOTSTRAP} != "no" _clang= usr.bin/clang _clang_libs= lib/clang .endif .if ${MK_GCC_BOOTSTRAP} != "no" _cc= gnu/usr.bin/cc .endif .if ${MK_USB} != "no" _usb_tools= sys/boot/usb/tools .endif cross-tools: .MAKE .PHONY .for _tool in \ ${LOCAL_XTOOL_DIRS} \ ${_clang_libs} \ ${_clang} \ ${_binutils} \ ${_elftctools} \ ${_dtrace_tools} \ ${_cc} \ ${_btxld} \ ${_usb_tools} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install .endfor NXBDESTDIR= ${OBJTREE}/nxb-bin NXBENV= MAKEOBJDIRPREFIX=${OBJTREE}/nxb \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin NXBMAKE= ${NXBENV} ${MAKE} \ LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \ CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_DEBUG_FILES=no # native-xtools is the current target for qemu-user cross builds of ports # via poudriere and the imgact_binmisc kernel module. # For non-clang enabled targets that are still using the in tree gcc # we must build a gperf binary for one instance of its Makefiles. On # clang-enabled systems, the gperf binary is obsolete. native-xtools: .PHONY .if ${MK_GCC_BOOTSTRAP} != "no" mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin ${_+_}@${ECHODIR} "===> ${_gperf} (obj,all,install)"; \ cd ${.CURDIR}/${_gperf}; \ ${NXBMAKE} DIRPRFX=${_gperf}/ obj; \ ${NXBMAKE} DIRPRFX=${_gperf}/ all; \ ${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install .endif mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${NXBDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${NXBDESTDIR}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${NXBDESTDIR}/usr/lib >/dev/null .endif .for _tool in \ bin/cat \ bin/chmod \ bin/cp \ bin/csh \ bin/echo \ bin/expr \ bin/hostname \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/ps \ bin/realpath \ bin/rm \ bin/rmdir \ bin/sh \ bin/sleep \ ${_clang_tblgen} \ usr.bin/ar \ ${_binutils} \ ${_elftctools} \ ${_cc} \ ${_gcc_tools} \ ${_clang_libs} \ ${_clang} \ sbin/md5 \ sbin/sysctl \ gnu/usr.bin/diff \ usr.bin/awk \ usr.bin/basename \ usr.bin/bmake \ usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/dirname \ usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ usr.bin/gzip \ usr.bin/id \ usr.bin/lex \ usr.bin/lorder \ usr.bin/mktemp \ usr.bin/mt \ usr.bin/patch \ usr.bin/sed \ usr.bin/sort \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ usr.bin/true \ usr.bin/uniq \ usr.bin/unzip \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xz \ usr.bin/yacc \ usr.sbin/chown ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${NXBMAKE} DIRPRFX=${_tool}/ obj; \ ${NXBMAKE} DIRPRFX=${_tool}/ all; \ ${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install .endfor # # hierarchy - ensure that all the needed directories are present # hierarchy hier: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs # # libraries - build all libraries, and install them under ${DESTDIR}. # # The list of libraries with dependents (${_prebuild_libs}) and their # interdependencies (__L) are built automatically by the # ${.CURDIR}/tools/make_libdeps.sh script. # libraries: .MAKE .PHONY ${_+_}cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 _prereq_libs; \ ${MAKE} -f Makefile.inc1 _startup_libs; \ ${MAKE} -f Makefile.inc1 _prebuild_libs; \ ${MAKE} -f Makefile.inc1 _generic_libs # # static libgcc.a prerequisite for shared libc # _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt # These dependencies are not automatically generated: # # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before # all shared libraries for ELF. # _startup_libs= gnu/lib/csu _startup_libs+= lib/csu _startup_libs+= gnu/lib/libgcc _startup_libs+= lib/libcompiler_rt _startup_libs+= lib/libc _startup_libs+= lib/libc_nonshared .if ${MK_LIBCPLUSPLUS} != "no" _startup_libs+= lib/libcxxrt .endif gnu/lib/libgcc__L: lib/libc__L gnu/lib/libgcc__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: gnu/lib/libgcc__L .endif _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_kerberos5_lib_libhdb} \ ${_kerberos5_lib_libheimbase} \ ${_kerberos5_lib_libheimntlm} \ ${_libsqlite3} \ ${_kerberos5_lib_libheimipcc} \ ${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \ ${_kerberos5_lib_libroken} \ ${_kerberos5_lib_libwind} \ lib/libbz2 ${_libcom_err} lib/libcrypt \ lib/libelf lib/libexpat \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ ${_lib_casper} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam/libpam ${_lib_libthr} \ ${_lib_libradius} lib/libsbuf lib/libtacplus \ lib/libgeom \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} \ ${_cddl_lib_libctf} \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_lib_libldns} \ ${_secure_lib_libssh} ${_secure_lib_libssl} \ gnu/lib/libdialog .if ${MK_GNUCXX} != "no" _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++ gnu/lib/libstdc++__L: lib/msun__L gnu/lib/libsupc++__L: gnu/lib/libstdc++__L .endif .if ${MK_LIBCPLUSPLUS} != "no" _prebuild_libs+= lib/libc++ .endif lib/libgeom__L: lib/libexpat__L lib/libkvm__L: lib/libelf__L .if ${MK_LIBTHR} != "no" _lib_libthr= lib/libthr .endif .if ${MK_RADIUS_SUPPORT} != "no" _lib_libradius= lib/libradius .endif .if ${MK_OFED} != "no" _ofed_lib= contrib/ofed/usr.lib _prebuild_libs+= contrib/ofed/usr.lib/libosmcomp _prebuild_libs+= contrib/ofed/usr.lib/libopensm _prebuild_libs+= contrib/ofed/usr.lib/libibcommon _prebuild_libs+= contrib/ofed/usr.lib/libibverbs _prebuild_libs+= contrib/ofed/usr.lib/libibumad contrib/ofed/usr.lib/libopensm__L: lib/libthr__L contrib/ofed/usr.lib/libosmcomp__L: lib/libthr__L contrib/ofed/usr.lib/libibumad__L: contrib/ofed/usr.lib/libibcommon__L .endif .if ${MK_CASPER} != "no" _lib_casper= lib/libcasper .endif lib/libpjdlog__L: lib/libutil__L lib/libcasper__L: lib/libnv__L lib/liblzma__L: lib/libthr__L _generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib} .for _DIR in ${LOCAL_LIB_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) && empty(_generic_libs:M${_DIR}) _generic_libs+= ${_DIR} .endif .endfor lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: lib/libgeom__L cddl/lib/libctf__L: lib/libz__L .endif # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built # on select architectures though (see cddl/lib/Makefile) .if ${MACHINE_CPUARCH} != "sparc64" -_prebuild_libs+= lib/libproc lib/librtld_db +_prebuild_libs+= lib/libprocstat lib/libproc lib/librtld_db +lib/libprocstat__L: lib/libelf__L lib/libkvm__L lib/libutil__L +lib/libproc__L: lib/libprocstat__L +lib/librtld_db__L: lib/libprocstat__L .endif .if ${MK_CRYPT} != "no" .if ${MK_OPENSSL} != "no" _secure_lib_libcrypto= secure/lib/libcrypto _secure_lib_libssl= secure/lib/libssl lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L .if ${MK_LDNS} != "no" _lib_libldns= lib/libldns lib/libldns__L: secure/lib/libcrypto__L .endif .if ${MK_OPENSSH} != "no" _secure_lib_libssh= secure/lib/libssh secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L .if ${MK_LDNS} != "no" secure/lib/libssh__L: lib/libldns__L .endif .if ${MK_KERBEROS_SUPPORT} != "no" secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libmd__L kerberos5/lib/libroken__L .endif .endif .endif _secure_lib= secure/lib .endif .if ${MK_KERBEROS} != "no" kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \ kerberos5/lib/libwind__L lib/libsqlite3__L kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \ kerberos5/lib/libroken__L kerberos5/lib/libwind__L \ kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L kerberos5/lib/libroken__L: lib/libcrypt__L kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libheimbase__L: lib/libthr__L kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L .endif lib/libsqlite3__L: lib/libthr__L .if ${MK_GSSAPI} != "no" _lib_libgssapi= lib/libgssapi .endif .if ${MK_KERBEROS} != "no" _kerberos5_lib= kerberos5/lib _kerberos5_lib_libasn1= kerberos5/lib/libasn1 _kerberos5_lib_libhdb= kerberos5/lib/libhdb _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5 _kerberos5_lib_libhx509= kerberos5/lib/libhx509 _kerberos5_lib_libroken= kerberos5/lib/libroken _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm _libsqlite3= lib/libsqlite3 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc _kerberos5_lib_libwind= kerberos5/lib/libwind _libcom_err= lib/libcom_err .endif .if ${MK_NIS} != "no" _lib_libypclnt= lib/libypclnt .endif .if ${MK_OPENSSL} == "no" lib/libradius__L: lib/libmd__L .endif lib/libproc__L: \ ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L .if ${MK_CXX} != "no" .if ${MK_LIBCPLUSPLUS} != "no" lib/libproc__L: lib/libcxxrt__L .else # This implies MK_GNUCXX != "no"; see lib/libproc lib/libproc__L: gnu/lib/libsupc++__L .endif .endif gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L .for _lib in ${_prereq_libs} ${_lib}__PL: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ install .endif .endfor .for _lib in ${_startup_libs} ${_prebuild_libs} ${_generic_libs} ${_lib}__L: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install .endif .endfor _prereq_libs: ${_prereq_libs:S/$/__PL/} _startup_libs: ${_startup_libs:S/$/__L/} _prebuild_libs: ${_prebuild_libs:S/$/__L/} _generic_libs: ${_generic_libs:S/$/__L/} # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from # 'everything' with _PARALLEL_SUBDIR_OK set. This is because it is unlikely # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in # parallel. This is safe for the world stage of buildworld though since it has # already built libraries in a proper order and installed includes into # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to # avoid trashing a system if it crashes mid-install. .if !make(all) || defined(_PARALLEL_SUBDIR_OK) SUBDIR_PARALLEL= .endif .include .if make(check-old) || make(check-old-dirs) || \ make(check-old-files) || make(check-old-libs) || \ make(delete-old) || make(delete-old-dirs) || \ make(delete-old-files) || make(delete-old-libs) # # check for / delete old files section # .include "ObsoleteFiles.inc" OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \ else you can not start such an application. Consult UPDATING for more \ information regarding how to cope with the removal/revision bump of a \ specific library." .if !defined(BATCH_DELETE_OLD_FILES) RM_I=-i .else RM_I=-v .endif delete-old-files: .PHONY @echo ">>> Removing old files (only deletes safe to delete libs)" # Ask for every old file if the user really wants to remove it. # It's annoying, but better safe than sorry. # NB: We cannot pass the list of OLD_FILES as a parameter because the # argument list will get too long. Using .for/.endfor make "loops" will make # the Makefile parser segfault. @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done # Remove catpages without corresponding manpages. @exec 3<&0; \ find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ rm ${RM_I} $${catpage} <&3; \ fi; \ done @echo ">>> Old files removed" check-old-files: .PHONY @echo ">>> Checking for old files" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done # Check for catpages without corresponding manpages. @find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ echo $${catpage}; \ fi; \ done delete-old-libs: .PHONY @echo ">>> Removing old libraries" @echo "${OLD_LIBS_MESSAGE}" | fmt @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done @echo ">>> Old libraries removed" check-old-libs: .PHONY @echo ">>> Checking for old libraries" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done delete-old-dirs: .PHONY @echo ">>> Removing old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done @echo ">>> Old directories removed" check-old-dirs: .PHONY @echo ">>> Checking for old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir}"; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done delete-old: delete-old-files delete-old-dirs .PHONY @echo "To remove old libraries run '${MAKE} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs .PHONY @echo "To remove old files and directories run '${MAKE} delete-old'." @echo "To remove old libraries run '${MAKE} delete-old-libs'." .endif # # showconfig - show build configuration. # showconfig: .PHONY @(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \ ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u .if !empty(KRNLOBJDIR) && !empty(KERNCONF) DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/ .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE) .if exists(${KERNCONFDIR}/${KERNCONF}) FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \ '${KERNCONFDIR}/${KERNCONF}' ; echo .endif .endif .endif .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH}) DTBOUTPUTPATH= ${.CURDIR} .endif # # Build 'standalone' Device Tree Blob # builddtb: .PHONY @PATH=${TMPPATH} MACHINE=${TARGET} \ ${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \ "${FDT_DTS_FILE}" ${DTBOUTPUTPATH} ############### # cleanworld # In the following, the first 'rm' in a series will usually remove all # files and directories. If it does not, then there are probably some # files with file flags set, so this unsets them and tries the 'rm' a # second time. There are situations where this target will be cleaning # some directories via more than one method, but that duplication is # needed to correctly handle all the possible situations. Removing all # files without file flags set in the first 'rm' instance saves time, # because 'chflags' will need to operate on fewer files afterwards. # # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be # created by bsd.obj.mk, except that we don't want to .include that file # in this makefile. # BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR} cleanworld: .PHONY .if exists(${BW_CANONICALOBJDIR}/) -rm -rf ${BW_CANONICALOBJDIR}/* -chflags -R 0 ${BW_CANONICALOBJDIR} rm -rf ${BW_CANONICALOBJDIR}/* .endif .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR} # To be safe in this case, fall back to a 'make cleandir' ${_+_}@cd ${.CURDIR}; ${MAKE} cleandir .endif .if defined(TARGET) && defined(TARGET_ARCH) .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH} XDEV_CPUTYPE?=${CPUTYPE} .else XDEV_CPUTYPE?=${TARGET_CPUTYPE} .endif NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \ MK_MAN=no MK_NLS=no MK_PROFILE=no \ MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ CPUTYPE=${XDEV_CPUTYPE} XDDIR=${TARGET_ARCH}-freebsd XDTP?=/usr/${XDDIR} .if ${XDTP:N/*} .error XDTP variable should be an absolute path .endif CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \ INSTALL="sh ${.CURDIR}/tools/install.sh" CDENV= ${CDBENV} \ TOOLS_PREFIX=${XDTP} CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \ --sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \ -B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \ CPP="${CPP} ${CD2CFLAGS}" \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} CDTMP= ${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN} CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN} XDDESTDIR=${DESTDIR}/${XDTP} .if !defined(OSREL) OSREL!= uname -r | sed -e 's/[-(].*//' .endif .ORDER: xdev-build xdev-install xdev-links xdev: xdev-build xdev-install .PHONY .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools .PHONY _xb-worldtmp: .PHONY mkdir -p ${CDTMP}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${CDTMP}/usr >/dev/null _xb-bootstrap-tools: .PHONY .for _tool in \ ${_clang_tblgen} \ ${_gperf} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ all; \ ${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install .endfor _xb-build-tools: .PHONY ${_+_}@cd ${.CURDIR}; \ ${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools _xb-cross-tools: .PHONY .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ all .endfor _xi-mtree: .PHONY ${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}" mkdir -p ${XDDESTDIR} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${XDDESTDIR} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${XDDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${XDDESTDIR}/usr/include >/dev/null .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${XDDESTDIR}/usr >/dev/null .endif .if ${MK_TESTS} != "no" mkdir -p ${XDDESTDIR}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${XDDESTDIR}${TESTSBASE} >/dev/null .endif .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries .PHONY _xi-cross-tools: .PHONY @echo "_xi-cross-tools" .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR} .endfor _xi-includes: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \ DESTDIR=${XDDESTDIR} _xi-libraries: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \ DESTDIR=${XDDESTDIR} xdev-links: .PHONY ${_+_}cd ${XDDESTDIR}/usr/bin; \ mkdir -p ../../../../usr/bin; \ for i in *; do \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}-$$i; \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \ done .else xdev xdev-build xdev-install xdev-links: .PHONY @echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target" .endif Index: user/alc/PQ_LAUNDRY/bin/sh/expand.c =================================================================== --- user/alc/PQ_LAUNDRY/bin/sh/expand.c (revision 303641) +++ user/alc/PQ_LAUNDRY/bin/sh/expand.c (revision 303642) @@ -1,1544 +1,1545 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1997-2005 * Herbert Xu . All rights reserved. * Copyright (c) 2010-2015 * Jilles Tjoelker . All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Routines to expand arguments to commands. We have to deal with * backquotes, shell variables, and file metacharacters. */ #include "shell.h" #include "main.h" #include "nodes.h" #include "eval.h" #include "expand.h" #include "syntax.h" #include "parser.h" #include "jobs.h" #include "options.h" #include "var.h" #include "input.h" #include "output.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "arith.h" #include "show.h" #include "builtins.h" enum wordstate { WORD_IDLE, WORD_WS_DELIMITED, WORD_QUOTEMARK }; struct worddest { struct arglist *list; enum wordstate state; }; static char *expdest; /* output of current string */ static struct nodelist *argbackq; /* list of back quote expressions */ static const char *argstr(const char *, int, struct worddest *); static const char *exptilde(const char *, int); static const char *expari(const char *, int, struct worddest *); static void expbackq(union node *, int, int, struct worddest *); static void subevalvar_trim(const char *, int, int, int); static int subevalvar_misc(const char *, const char *, int, int, int); static const char *evalvar(const char *, int, struct worddest *); static int varisset(const char *, int); static void strtodest(const char *, int, int, int, struct worddest *); static void reprocess(int, int, int, int, struct worddest *); static void varvalue(const char *, int, int, int, struct worddest *); static void expandmeta(char *, struct arglist *); static void expmeta(char *, char *, struct arglist *); static int expsortcmp(const void *, const void *); static int patmatch(const char *, const char *); static void cvtnum(int, char *); static int collate_range_cmp(wchar_t, wchar_t); void emptyarglist(struct arglist *list) { list->args = list->smallarg; list->count = 0; list->capacity = sizeof(list->smallarg) / sizeof(list->smallarg[0]); } void appendarglist(struct arglist *list, char *str) { char **newargs; int newcapacity; if (list->count >= list->capacity) { newcapacity = list->capacity * 2; if (newcapacity < 16) newcapacity = 16; if (newcapacity > INT_MAX / (int)sizeof(newargs[0])) error("Too many entries in arglist"); newargs = stalloc(newcapacity * sizeof(newargs[0])); memcpy(newargs, list->args, list->count * sizeof(newargs[0])); list->args = newargs; list->capacity = newcapacity; } list->args[list->count++] = str; } static int collate_range_cmp(wchar_t c1, wchar_t c2) { static wchar_t s1[2], s2[2]; s1[0] = c1; s2[0] = c2; return (wcscoll(s1, s2)); } static char * stputs_quotes(const char *data, const char *syntax, char *p) { while (*data) { CHECKSTRSPACE(2, p); if (syntax[(int)*data] == CCTL) USTPUTC(CTLESC, p); USTPUTC(*data++, p); } return (p); } #define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p) static char * nextword(char c, int flag, char *p, struct worddest *dst) { int is_ws; is_ws = c == '\t' || c == '\n' || c == ' '; if (p != stackblock() || (is_ws ? dst->state == WORD_QUOTEMARK : dst->state != WORD_WS_DELIMITED) || c == '\0') { STPUTC('\0', p); if (flag & EXP_GLOB) expandmeta(grabstackstr(p), dst->list); else appendarglist(dst->list, grabstackstr(p)); dst->state = is_ws ? WORD_WS_DELIMITED : WORD_IDLE; } else if (!is_ws && dst->state == WORD_WS_DELIMITED) dst->state = WORD_IDLE; /* Reserve space while the stack string is empty. */ appendarglist(dst->list, NULL); dst->list->count--; STARTSTACKSTR(p); return p; } #define NEXTWORD(c, flag, p, dstlist) p = nextword(c, flag, p, dstlist) static char * stputs_split(const char *data, const char *syntax, int flag, char *p, struct worddest *dst) { const char *ifs; char c; ifs = ifsset() ? ifsval() : " \t\n"; while (*data) { CHECKSTRSPACE(2, p); c = *data++; if (strchr(ifs, c) != NULL) { NEXTWORD(c, flag, p, dst); continue; } if (flag & EXP_GLOB && syntax[(int)c] == CCTL) USTPUTC(CTLESC, p); USTPUTC(c, p); } return (p); } #define STPUTS_SPLIT(data, syntax, flag, p, dst) p = stputs_split((data), syntax, flag, p, dst) /* * Perform expansions on an argument, placing the resulting list of arguments * in arglist. Parameter expansion, command substitution and arithmetic * expansion are always performed; additional expansions can be requested * via flag (EXP_*). * The result is left in the stack string. * When arglist is NULL, perform here document expansion. * * Caution: this function uses global state and is not reentrant. * However, a new invocation after an interrupted invocation is safe * and will reset the global state for the new call. */ void expandarg(union node *arg, struct arglist *arglist, int flag) { struct worddest exparg; if (fflag) flag &= ~EXP_GLOB; argbackq = arg->narg.backquote; exparg.list = arglist; exparg.state = WORD_IDLE; STARTSTACKSTR(expdest); argstr(arg->narg.text, flag, &exparg); if (arglist == NULL) { STACKSTRNUL(expdest); return; /* here document expanded */ } if ((flag & EXP_SPLIT) == 0 || expdest != stackblock() || exparg.state == WORD_QUOTEMARK) { STPUTC('\0', expdest); if (flag & EXP_SPLIT) { if (flag & EXP_GLOB) expandmeta(grabstackstr(expdest), exparg.list); else appendarglist(exparg.list, grabstackstr(expdest)); } } if ((flag & EXP_SPLIT) == 0) appendarglist(arglist, grabstackstr(expdest)); } /* * Perform parameter expansion, command substitution and arithmetic * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE. * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'. * This is used to expand word in ${var+word} etc. * If EXP_GLOB or EXP_CASE are set, keep and/or generate CTLESC * characters to allow for further processing. * * If EXP_SPLIT is set, dst receives any complete words produced. */ static const char * argstr(const char *p, int flag, struct worddest *dst) { char c; int quotes = flag & (EXP_GLOB | EXP_CASE); /* do CTLESC */ int firsteq = 1; int split_lit; int lit_quoted; split_lit = flag & EXP_SPLIT_LIT; lit_quoted = flag & EXP_LIT_QUOTED; flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED); if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE))) p = exptilde(p, flag); for (;;) { CHECKSTRSPACE(2, expdest); switch (c = *p++) { case '\0': return (p - 1); case CTLENDVAR: case CTLENDARI: return (p); case CTLQUOTEMARK: lit_quoted = 1; /* "$@" syntax adherence hack */ if (p[0] == CTLVAR && (p[1] & VSQUOTE) != 0 && p[2] == '@' && p[3] == '=') break; if ((flag & EXP_SPLIT) != 0 && expdest == stackblock()) dst->state = WORD_QUOTEMARK; break; case CTLQUOTEEND: lit_quoted = 0; break; case CTLESC: c = *p++; if (split_lit && !lit_quoted && strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) { NEXTWORD(c, flag, expdest, dst); break; } if (quotes) USTPUTC(CTLESC, expdest); USTPUTC(c, expdest); break; case CTLVAR: p = evalvar(p, flag, dst); break; case CTLBACKQ: case CTLBACKQ|CTLQUOTE: expbackq(argbackq->n, c & CTLQUOTE, flag, dst); argbackq = argbackq->next; break; case CTLARI: p = expari(p, flag, dst); break; case ':': case '=': /* * sort of a hack - expand tildes in variable * assignments (after the first '=' and after ':'s). */ if (split_lit && !lit_quoted && strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) { NEXTWORD(c, flag, expdest, dst); break; } USTPUTC(c, expdest); if (flag & EXP_VARTILDE && *p == '~' && (c != '=' || firsteq)) { if (c == '=') firsteq = 0; p = exptilde(p, flag); } break; default: if (split_lit && !lit_quoted && strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) { NEXTWORD(c, flag, expdest, dst); break; } USTPUTC(c, expdest); } } } /* * Perform tilde expansion, placing the result in the stack string and * returning the next position in the input string to process. */ static const char * exptilde(const char *p, int flag) { char c; const char *startp = p; const char *user; struct passwd *pw; char *home; int len; for (;;) { c = *p; switch(c) { case CTLESC: /* This means CTL* are always considered quoted. */ case CTLVAR: case CTLBACKQ: case CTLBACKQ | CTLQUOTE: case CTLARI: case CTLENDARI: case CTLQUOTEMARK: return (startp); case ':': if ((flag & EXP_VARTILDE) == 0) break; /* FALLTHROUGH */ case '\0': case '/': case CTLENDVAR: len = p - startp - 1; STPUTBIN(startp + 1, len, expdest); STACKSTRNUL(expdest); user = expdest - len; if (*user == '\0') { home = lookupvar("HOME"); } else { pw = getpwnam(user); home = pw != NULL ? pw->pw_dir : NULL; } STADJUST(-len, expdest); if (home == NULL || *home == '\0') return (startp); strtodest(home, flag, VSNORMAL, 1, NULL); return (p); } p++; } } /* * Expand arithmetic expression. */ static const char * expari(const char *p, int flag, struct worddest *dst) { char *q, *start; arith_t result; int begoff; int quoted; int adj; quoted = *p++ == '"'; begoff = expdest - stackblock(); p = argstr(p, 0, NULL); STPUTC('\0', expdest); start = stackblock() + begoff; q = grabstackstr(expdest); result = arith(start); ungrabstackstr(q, expdest); start = stackblock() + begoff; adj = start - expdest; STADJUST(adj, expdest); CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest); fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result); adj = strlen(expdest); STADJUST(adj, expdest); if (!quoted) reprocess(expdest - adj - stackblock(), flag, VSNORMAL, 0, dst); return p; } /* * Perform command substitution. */ static void expbackq(union node *cmd, int quoted, int flag, struct worddest *dst) { struct backcmd in; int i; char buf[128]; char *p; char *dest = expdest; struct nodelist *saveargbackq; char lastc; char const *syntax = quoted? DQSYNTAX : BASESYNTAX; int quotes = flag & (EXP_GLOB | EXP_CASE); size_t nnl; const char *ifs; INTOFF; saveargbackq = argbackq; p = grabstackstr(dest); evalbackcmd(cmd, &in); ungrabstackstr(p, dest); argbackq = saveargbackq; p = in.buf; nnl = 0; if (!quoted && flag & EXP_SPLIT) ifs = ifsset() ? ifsval() : " \t\n"; else ifs = ""; /* Don't copy trailing newlines */ for (;;) { if (--in.nleft < 0) { if (in.fd < 0) break; - while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR); + while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR) + ; TRACE(("expbackq: read returns %d\n", i)); if (i <= 0) break; p = buf; in.nleft = i - 1; } lastc = *p++; if (lastc == '\0') continue; if (lastc == '\n') { nnl++; } else { if (nnl > 0) { if (strchr(ifs, '\n') != NULL) { NEXTWORD('\n', flag, dest, dst); nnl = 0; } else { CHECKSTRSPACE(nnl + 2, dest); while (nnl > 0) { nnl--; USTPUTC('\n', dest); } } } if (strchr(ifs, lastc) != NULL) NEXTWORD(lastc, flag, dest, dst); else { CHECKSTRSPACE(2, dest); if (quotes && syntax[(int)lastc] == CCTL) USTPUTC(CTLESC, dest); USTPUTC(lastc, dest); } } } if (in.fd >= 0) close(in.fd); if (in.buf) ckfree(in.buf); if (in.jp) exitstatus = waitforjob(in.jp, (int *)NULL); TRACE(("expbackq: size=%td: \"%.*s\"\n", ((dest - stackblock()) - startloc), (int)((dest - stackblock()) - startloc), stackblock() + startloc)); expdest = dest; INTON; } static void recordleft(const char *str, const char *loc, char *startp) { int amount; amount = ((str - 1) - (loc - startp)) - expdest; STADJUST(amount, expdest); while (loc != str - 1) *startp++ = *loc++; } static void subevalvar_trim(const char *p, int strloc, int subtype, int startloc) { char *startp; char *loc = NULL; char *str; int c = 0; struct nodelist *saveargbackq = argbackq; int amount; argstr(p, EXP_CASE | EXP_TILDE, NULL); STACKSTRNUL(expdest); argbackq = saveargbackq; startp = stackblock() + startloc; str = stackblock() + strloc; switch (subtype) { case VSTRIMLEFT: for (loc = startp; loc < str; loc++) { c = *loc; *loc = '\0'; if (patmatch(str, startp)) { *loc = c; recordleft(str, loc, startp); return; } *loc = c; } break; case VSTRIMLEFTMAX: for (loc = str - 1; loc >= startp;) { c = *loc; *loc = '\0'; if (patmatch(str, startp)) { *loc = c; recordleft(str, loc, startp); return; } *loc = c; loc--; } break; case VSTRIMRIGHT: for (loc = str - 1; loc >= startp;) { if (patmatch(str, loc)) { amount = loc - expdest; STADJUST(amount, expdest); return; } loc--; } break; case VSTRIMRIGHTMAX: for (loc = startp; loc < str - 1; loc++) { if (patmatch(str, loc)) { amount = loc - expdest; STADJUST(amount, expdest); return; } } break; default: abort(); } amount = (expdest - stackblock() - strloc) + 1; STADJUST(-amount, expdest); } static int subevalvar_misc(const char *p, const char *var, int subtype, int startloc, int varflags) { char *startp; struct nodelist *saveargbackq = argbackq; int amount; argstr(p, EXP_TILDE, NULL); STACKSTRNUL(expdest); argbackq = saveargbackq; startp = stackblock() + startloc; switch (subtype) { case VSASSIGN: setvar(var, startp, 0); amount = startp - expdest; STADJUST(amount, expdest); return 1; case VSQUESTION: if (*p != CTLENDVAR) { outfmt(out2, "%s\n", startp); error((char *)NULL); } error("%.*s: parameter %snot set", (int)(p - var - 1), var, (varflags & VSNUL) ? "null or " : ""); return 0; default: abort(); } } /* * Expand a variable, and return a pointer to the next character in the * input string. */ static const char * evalvar(const char *p, int flag, struct worddest *dst) { int subtype; int varflags; const char *var; const char *val; int patloc; int c; int set; int special; int startloc; int varlen; int varlenb; char buf[21]; varflags = (unsigned char)*p++; subtype = varflags & VSTYPE; var = p; special = 0; if (! is_name(*p)) special = 1; p = strchr(p, '=') + 1; again: /* jump here after setting a variable with ${var=text} */ if (varflags & VSLINENO) { set = 1; special = 1; val = NULL; } else if (special) { set = varisset(var, varflags & VSNUL); val = NULL; } else { val = bltinlookup(var, 1); if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) { val = NULL; set = 0; } else set = 1; } varlen = 0; startloc = expdest - stackblock(); if (!set && uflag && *var != '@' && *var != '*') { switch (subtype) { case VSNORMAL: case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: case VSLENGTH: error("%.*s: parameter not set", (int)(p - var - 1), var); } } if (set && subtype != VSPLUS) { /* insert the value of the variable */ if (special) { if (varflags & VSLINENO) { if (p - var > (ptrdiff_t)sizeof(buf)) abort(); memcpy(buf, var, p - var - 1); buf[p - var - 1] = '\0'; strtodest(buf, flag, subtype, varflags & VSQUOTE, dst); } else varvalue(var, varflags & VSQUOTE, subtype, flag, dst); if (subtype == VSLENGTH) { varlenb = expdest - stackblock() - startloc; varlen = varlenb; if (localeisutf8) { val = stackblock() + startloc; for (;val != expdest; val++) if ((*val & 0xC0) == 0x80) varlen--; } STADJUST(-varlenb, expdest); } } else { if (subtype == VSLENGTH) { for (;*val; val++) if (!localeisutf8 || (*val & 0xC0) != 0x80) varlen++; } else strtodest(val, flag, subtype, varflags & VSQUOTE, dst); } } if (subtype == VSPLUS) set = ! set; switch (subtype) { case VSLENGTH: cvtnum(varlen, buf); strtodest(buf, flag, VSNORMAL, varflags & VSQUOTE, dst); break; case VSNORMAL: break; case VSPLUS: case VSMINUS: if (!set) { argstr(p, flag | (flag & EXP_SPLIT ? EXP_SPLIT_LIT : 0) | (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0), dst); break; } break; case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: if (!set) break; /* * Terminate the string and start recording the pattern * right after it */ STPUTC('\0', expdest); patloc = expdest - stackblock(); subevalvar_trim(p, patloc, subtype, startloc); reprocess(startloc, flag, VSNORMAL, varflags & VSQUOTE, dst); if (flag & EXP_SPLIT && *var == '@' && varflags & VSQUOTE) dst->state = WORD_QUOTEMARK; break; case VSASSIGN: case VSQUESTION: if (!set) { if (subevalvar_misc(p, var, subtype, startloc, varflags)) { varflags &= ~VSNUL; goto again; } break; } break; case VSERROR: c = p - var - 1; error("${%.*s%s}: Bad substitution", c, var, (c > 0 && *p != CTLENDVAR) ? "..." : ""); default: abort(); } if (subtype != VSNORMAL) { /* skip to end of alternative */ int nesting = 1; for (;;) { if ((c = *p++) == CTLESC) p++; else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) { if (set) argbackq = argbackq->next; } else if (c == CTLVAR) { if ((*p++ & VSTYPE) != VSNORMAL) nesting++; } else if (c == CTLENDVAR) { if (--nesting == 0) break; } } } return p; } /* * Test whether a specialized variable is set. */ static int varisset(const char *name, int nulok) { if (*name == '!') return backgndpidset(); else if (*name == '@' || *name == '*') { if (*shellparam.p == NULL) return 0; if (nulok) { char **av; for (av = shellparam.p; *av; av++) if (**av != '\0') return 1; return 0; } } else if (is_digit(*name)) { char *ap; long num; errno = 0; num = strtol(name, NULL, 10); if (errno != 0 || num > shellparam.nparam) return 0; if (num == 0) ap = arg0; else ap = shellparam.p[num - 1]; if (nulok && (ap == NULL || *ap == '\0')) return 0; } return 1; } static void strtodest(const char *p, int flag, int subtype, int quoted, struct worddest *dst) { if (subtype == VSLENGTH || subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX) STPUTS(p, expdest); else if (flag & EXP_SPLIT && !quoted && dst != NULL) STPUTS_SPLIT(p, BASESYNTAX, flag, expdest, dst); else if (flag & (EXP_GLOB | EXP_CASE)) STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest); else STPUTS(p, expdest); } static void reprocess(int startloc, int flag, int subtype, int quoted, struct worddest *dst) { static char *buf = NULL; static size_t buflen = 0; char *startp; size_t len, zpos, zlen; startp = stackblock() + startloc; len = expdest - startp; if (len >= SIZE_MAX / 2) abort(); INTOFF; if (len >= buflen) { ckfree(buf); buf = NULL; } if (buflen < 128) buflen = 128; while (len >= buflen) buflen <<= 1; if (buf == NULL) buf = ckmalloc(buflen); INTON; memcpy(buf, startp, len); buf[len] = '\0'; STADJUST(-len, expdest); for (zpos = 0;;) { zlen = strlen(buf + zpos); strtodest(buf + zpos, flag, subtype, quoted, dst); zpos += zlen + 1; if (zpos == len + 1) break; if (flag & EXP_SPLIT && (quoted || (zlen > 0 && zpos < len))) NEXTWORD('\0', flag, expdest, dst); } } /* * Add the value of a specialized variable to the stack string. */ static void varvalue(const char *name, int quoted, int subtype, int flag, struct worddest *dst) { int num; char *p; int i; int splitlater; char sep[2]; char **ap; char buf[(NSHORTOPTS > 10 ? NSHORTOPTS : 10) + 1]; if (subtype == VSLENGTH) flag &= ~EXP_FULL; splitlater = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX; switch (*name) { case '$': num = rootpid; break; case '?': num = oexitstatus; break; case '#': num = shellparam.nparam; break; case '!': num = backgndpidval(); break; case '-': p = buf; for (i = 0 ; i < NSHORTOPTS ; i++) { if (optval[i]) *p++ = optletter[i]; } *p = '\0'; strtodest(buf, flag, subtype, quoted, dst); return; case '@': if (flag & EXP_SPLIT && quoted) { for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { strtodest(p, flag, subtype, quoted, dst); if (*ap) { if (splitlater) STPUTC('\0', expdest); else NEXTWORD('\0', flag, expdest, dst); } } if (shellparam.nparam > 0) dst->state = WORD_QUOTEMARK; return; } /* FALLTHROUGH */ case '*': if (ifsset()) sep[0] = ifsval()[0]; else sep[0] = ' '; sep[1] = '\0'; for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { strtodest(p, flag, subtype, quoted, dst); if (!*ap) break; if (sep[0]) strtodest(sep, flag, subtype, quoted, dst); else if (flag & EXP_SPLIT && !quoted && **ap != '\0') { if (splitlater) STPUTC('\0', expdest); else NEXTWORD('\0', flag, expdest, dst); } } return; default: if (is_digit(*name)) { num = atoi(name); if (num == 0) p = arg0; else if (num > 0 && num <= shellparam.nparam) p = shellparam.p[num - 1]; else return; strtodest(p, flag, subtype, quoted, dst); } return; } cvtnum(num, buf); strtodest(buf, flag, subtype, quoted, dst); } static char expdir[PATH_MAX]; #define expdir_end (expdir + sizeof(expdir)) /* * Perform pathname generation and remove control characters. * At this point, the only control characters should be CTLESC. * The results are stored in the list dstlist. */ static void expandmeta(char *pattern, struct arglist *dstlist) { char *p; int firstmatch; char c; firstmatch = dstlist->count; p = pattern; for (; (c = *p) != '\0'; p++) { /* fast check for meta chars */ if (c == '*' || c == '?' || c == '[') { INTOFF; expmeta(expdir, pattern, dstlist); INTON; break; } } if (dstlist->count == firstmatch) { /* * no matches */ rmescapes(pattern); appendarglist(dstlist, pattern); } else { qsort(&dstlist->args[firstmatch], dstlist->count - firstmatch, sizeof(dstlist->args[0]), expsortcmp); } } /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ static void expmeta(char *enddir, char *name, struct arglist *arglist) { const char *p; const char *q; const char *start; char *endname; int metaflag; struct stat statb; DIR *dirp; struct dirent *dp; int atend; int matchdot; int esc; int namlen; metaflag = 0; start = name; for (p = name; esc = 0, *p; p += esc + 1) { if (*p == '*' || *p == '?') metaflag = 1; else if (*p == '[') { q = p + 1; if (*q == '!' || *q == '^') q++; for (;;) { if (*q == CTLESC) q++; if (*q == '/' || *q == '\0') break; if (*++q == ']') { metaflag = 1; break; } } } else if (*p == '\0') break; else { if (*p == CTLESC) esc++; if (p[esc] == '/') { if (metaflag) break; start = p + esc + 1; } } } if (metaflag == 0) { /* we've reached the end of the file name */ if (enddir != expdir) metaflag++; for (p = name ; ; p++) { if (*p == CTLESC) p++; *enddir++ = *p; if (*p == '\0') break; if (enddir == expdir_end) return; } if (metaflag == 0 || lstat(expdir, &statb) >= 0) appendarglist(arglist, stsavestr(expdir)); return; } endname = name + (p - name); if (start != name) { p = name; while (p < start) { if (*p == CTLESC) p++; *enddir++ = *p++; if (enddir == expdir_end) return; } } if (enddir == expdir) { p = "."; } else if (enddir == expdir + 1 && *expdir == '/') { p = "/"; } else { p = expdir; enddir[-1] = '\0'; } if ((dirp = opendir(p)) == NULL) return; if (enddir != expdir) enddir[-1] = '/'; if (*endname == 0) { atend = 1; } else { atend = 0; *endname = '\0'; endname += esc + 1; } matchdot = 0; p = start; if (*p == CTLESC) p++; if (*p == '.') matchdot++; while (! int_pending() && (dp = readdir(dirp)) != NULL) { if (dp->d_name[0] == '.' && ! matchdot) continue; if (patmatch(start, dp->d_name)) { namlen = dp->d_namlen; if (enddir + namlen + 1 > expdir_end) continue; memcpy(enddir, dp->d_name, namlen + 1); if (atend) appendarglist(arglist, stsavestr(expdir)); else { if (dp->d_type != DT_UNKNOWN && dp->d_type != DT_DIR && dp->d_type != DT_LNK) continue; if (enddir + namlen + 2 > expdir_end) continue; enddir[namlen] = '/'; enddir[namlen + 1] = '\0'; expmeta(enddir + namlen + 1, endname, arglist); } } } closedir(dirp); if (! atend) endname[-esc - 1] = esc ? CTLESC : '/'; } static int expsortcmp(const void *p1, const void *p2) { const char *s1 = *(const char * const *)p1; const char *s2 = *(const char * const *)p2; return (strcoll(s1, s2)); } static wchar_t get_wc(const char **p) { wchar_t c; int chrlen; chrlen = mbtowc(&c, *p, 4); if (chrlen == 0) return 0; else if (chrlen == -1) c = 0; else *p += chrlen; return c; } /* * See if a character matches a character class, starting at the first colon * of "[:class:]". * If a valid character class is recognized, a pointer to the next character * after the final closing bracket is stored into *end, otherwise a null * pointer is stored into *end. */ static int match_charclass(const char *p, wchar_t chr, const char **end) { char name[20]; const char *nameend; wctype_t cclass; *end = NULL; p++; nameend = strstr(p, ":]"); if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) || nameend == p) return 0; memcpy(name, p, nameend - p); name[nameend - p] = '\0'; *end = nameend + 2; cclass = wctype(name); /* An unknown class matches nothing but is valid nevertheless. */ if (cclass == 0) return 0; return iswctype(chr, cclass); } /* * Returns true if the pattern matches the string. */ static int patmatch(const char *pattern, const char *string) { const char *p, *q, *end; const char *bt_p, *bt_q; char c; wchar_t wc, wc2; p = pattern; q = string; bt_p = NULL; bt_q = NULL; for (;;) { switch (c = *p++) { case '\0': if (*q != '\0') goto backtrack; return 1; case CTLESC: if (*q++ != *p++) goto backtrack; break; case '?': if (*q == '\0') return 0; if (localeisutf8) { wc = get_wc(&q); /* * A '?' does not match invalid UTF-8 but a * '*' does, so backtrack. */ if (wc == 0) goto backtrack; } else q++; break; case '*': c = *p; while (c == '*') c = *++p; /* * If the pattern ends here, we know the string * matches without needing to look at the rest of it. */ if (c == '\0') return 1; /* * First try the shortest match for the '*' that * could work. We can forget any earlier '*' since * there is no way having it match more characters * can help us, given that we are already here. */ bt_p = p; bt_q = q; break; case '[': { const char *savep, *saveq; int invert, found; wchar_t chr; savep = p, saveq = q; invert = 0; if (*p == '!' || *p == '^') { invert++; p++; } found = 0; if (*q == '\0') return 0; if (localeisutf8) { chr = get_wc(&q); if (chr == 0) goto backtrack; } else chr = (unsigned char)*q++; c = *p++; do { if (c == '\0') { p = savep, q = saveq; c = '['; goto dft; } if (c == '[' && *p == ':') { found |= match_charclass(p, chr, &end); if (end != NULL) p = end; } if (c == CTLESC) c = *p++; if (localeisutf8 && c & 0x80) { p--; wc = get_wc(&p); if (wc == 0) /* bad utf-8 */ return 0; } else wc = (unsigned char)c; if (*p == '-' && p[1] != ']') { p++; if (*p == CTLESC) p++; if (localeisutf8) { wc2 = get_wc(&p); if (wc2 == 0) /* bad utf-8 */ return 0; } else wc2 = (unsigned char)*p++; if ( collate_range_cmp(chr, wc) >= 0 && collate_range_cmp(chr, wc2) <= 0 ) found = 1; } else { if (chr == wc) found = 1; } } while ((c = *p++) != ']'); if (found == invert) goto backtrack; break; } dft: default: if (*q == '\0') return 0; if (*q++ == c) break; backtrack: /* * If we have a mismatch (other than hitting the end * of the string), go back to the last '*' seen and * have it match one additional character. */ if (bt_p == NULL) return 0; if (*bt_q == '\0') return 0; bt_q++; p = bt_p; q = bt_q; break; } } } /* * Remove any CTLESC and CTLQUOTEMARK characters from a string. */ void rmescapes(char *str) { char *p, *q; p = str; while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) { if (*p++ == '\0') return; } q = p; while (*p) { if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) { p++; continue; } if (*p == CTLESC) p++; *q++ = *p++; } *q = '\0'; } /* * See if a pattern matches in a case statement. */ int casematch(union node *pattern, const char *val) { struct stackmark smark; int result; char *p; setstackmark(&smark); argbackq = pattern->narg.backquote; STARTSTACKSTR(expdest); argstr(pattern->narg.text, EXP_TILDE | EXP_CASE, NULL); STPUTC('\0', expdest); p = grabstackstr(expdest); result = patmatch(p, val); popstackmark(&smark); return result; } /* * Our own itoa(). */ static void cvtnum(int num, char *buf) { char temp[32]; int neg = num < 0; char *p = temp + 31; temp[31] = '\0'; do { *--p = num % 10 + '0'; } while ((num /= 10) != 0); if (neg) *--p = '-'; memcpy(buf, p, temp + 32 - p); } /* * Do most of the work for wordexp(3). */ int wordexpcmd(int argc, char **argv) { size_t len; int i; out1fmt("%08x", argc - 1); for (i = 1, len = 0; i < argc; i++) len += strlen(argv[i]); out1fmt("%08x", (int)len); for (i = 1; i < argc; i++) outbin(argv[i], strlen(argv[i]) + 1, out1); return (0); } /* * Do most of the work for wordexp(3), new version. */ int freebsd_wordexpcmd(int argc __unused, char **argv __unused) { struct arglist arglist; union node *args, *n; size_t len; int ch; int protected = 0; int fd = -1; int i; while ((ch = nextopt("f:p")) != '\0') { switch (ch) { case 'f': fd = number(shoptarg); break; case 'p': protected = 1; break; } } if (*argptr != NULL) error("wrong number of arguments"); if (fd < 0) error("missing fd"); INTOFF; setinputfd(fd, 1); INTON; args = parsewordexp(); popfile(); /* will also close fd */ if (protected) for (n = args; n != NULL; n = n->narg.next) { if (n->narg.backquote != NULL) { outcslow('C', out1); error("command substitution disabled"); } } outcslow(' ', out1); emptyarglist(&arglist); for (n = args; n != NULL; n = n->narg.next) expandarg(n, &arglist, EXP_FULL | EXP_TILDE); for (i = 0, len = 0; i < arglist.count; i++) len += strlen(arglist.args[i]); out1fmt("%016x %016zx", arglist.count, len); for (i = 0; i < arglist.count; i++) outbin(arglist.args[i], strlen(arglist.args[i]) + 1, out1); return (0); } Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/arm/dt_isadep.c =================================================================== --- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/arm/dt_isadep.c (revision 303641) +++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/arm/dt_isadep.c (revision 303642) @@ -1,190 +1,188 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright 2014 Howard Su * Copyright 2015 George V. Neville-Neil * */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include -#if !defined(sun) -#define PR_MODEL_ILP32 1 -#define PR_MODEL_LP64 2 +#ifdef __FreeBSD__ #include #endif #define OP(x) ((x) >> 30) #define OP2(x) (((x) >> 22) & 0x07) #define COND(x) (((x) >> 25) & 0x0f) #define A(x) (((x) >> 29) & 0x01) #define OP_BRANCH 0 #define OP2_BPcc 0x1 #define OP2_Bicc 0x2 #define OP2_BPr 0x3 #define OP2_FBPfcc 0x5 #define OP2_FBfcc 0x6 /*ARGSUSED*/ int dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) { ftp->ftps_type = DTFTP_ENTRY; ftp->ftps_pc = (uintptr_t)symp->st_value; ftp->ftps_size = (size_t)symp->st_size; ftp->ftps_noffs = 1; ftp->ftps_offs[0] = 0; if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } return (1); } int dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret) { uint32_t *text; int i; int srdepth = 0; dt_dprintf("%s: unimplemented\n", __func__); return (DT_PROC_ERR); if ((text = malloc(symp->st_size + 4)) == NULL) { dt_dprintf("mr sparkle: malloc() failed\n"); return (DT_PROC_ERR); } if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { dt_dprintf("mr sparkle: Pread() failed\n"); free(text); return (DT_PROC_ERR); } /* * Leave a dummy instruction in the last slot to simplify edge * conditions. */ text[symp->st_size / 4] = 0; ftp->ftps_type = DTFTP_RETURN; ftp->ftps_pc = symp->st_value; ftp->ftps_size = symp->st_size; ftp->ftps_noffs = 0; free(text); if (ftp->ftps_noffs > 0) { if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } } return (ftp->ftps_noffs); } /*ARGSUSED*/ int dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off) { if (off & 0x3) return (DT_PROC_ALIGN); ftp->ftps_type = DTFTP_OFFSETS; ftp->ftps_pc = (uintptr_t)symp->st_value; ftp->ftps_size = (size_t)symp->st_size; ftp->ftps_noffs = 1; ftp->ftps_offs[0] = off; if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } return (1); } /*ARGSUSED*/ int dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern) { ulong_t i; ftp->ftps_type = DTFTP_OFFSETS; ftp->ftps_pc = (uintptr_t)symp->st_value; ftp->ftps_size = (size_t)symp->st_size; ftp->ftps_noffs = 0; /* * If we're matching against everything, just iterate through each * instruction in the function, otherwise look for matching offset * names by constructing the string and comparing it against the * pattern. */ if (strcmp("*", pattern) == 0) { for (i = 0; i < symp->st_size; i += 4) { ftp->ftps_offs[ftp->ftps_noffs++] = i; } } else { char name[sizeof (i) * 2 + 1]; for (i = 0; i < symp->st_size; i += 4) { (void) sprintf(name, "%lx", i); if (gmatch(name, pattern)) ftp->ftps_offs[ftp->ftps_noffs++] = i; } } if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } return (ftp->ftps_noffs); } Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c =================================================================== --- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c (revision 303641) +++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c (revision 303642) @@ -1,537 +1,520 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2012 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include -#ifndef illumos -#define PR_MODEL_ILP32 1 -#define PR_MODEL_LP64 2 +#ifdef __FreeBSD__ +#include #include #endif #define DT_POPL_EBP 0x5d #define DT_RET 0xc3 #define DT_RET16 0xc2 #define DT_LEAVE 0xc9 #define DT_JMP32 0xe9 #define DT_JMP8 0xeb #define DT_REP 0xf3 #define DT_MOVL_EBP_ESP 0xe58b #define DT_ISJ32(op16) (((op16) & 0xfff0) == 0x0f80) #define DT_ISJ8(op8) (((op8) & 0xf0) == 0x70) #define DT_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) static int dt_instr_size(uchar_t *, dtrace_hdl_t *, pid_t, uintptr_t, char); /*ARGSUSED*/ int dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) { ftp->ftps_type = DTFTP_ENTRY; ftp->ftps_pc = (uintptr_t)symp->st_value; ftp->ftps_size = (size_t)symp->st_size; ftp->ftps_noffs = 1; ftp->ftps_offs[0] = 0; if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } return (1); } static int dt_pid_has_jump_table(struct ps_prochandle *P, dtrace_hdl_t *dtp, uint8_t *text, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) { ulong_t i; int size; #ifdef illumos pid_t pid = Pstatus(P)->pr_pid; char dmodel = Pstatus(P)->pr_dmodel; #else pid_t pid = proc_getpid(P); -#if __i386__ - char dmodel = PR_MODEL_ILP32; -#elif __amd64__ - char dmodel = PR_MODEL_LP64; + char dmodel = proc_getmodel(P); #endif -#endif /* * Take a pass through the function looking for a register-dependant * jmp instruction. This could be a jump table so we have to be * ultra conservative. */ for (i = 0; i < ftp->ftps_size; i += size) { size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i, dmodel); /* * Assume the worst if we hit an illegal instruction. */ if (size <= 0) { dt_dprintf("error at %#lx (assuming jump table)\n", i); return (1); } #ifdef notyet /* * Register-dependant jmp instructions start with a 0xff byte * and have the modrm.reg field set to 4. They can have an * optional REX prefix on the 64-bit ISA. */ if ((text[i] == 0xff && DT_MODRM_REG(text[i + 1]) == 4) || (dmodel == PR_MODEL_LP64 && (text[i] & 0xf0) == 0x40 && text[i + 1] == 0xff && DT_MODRM_REG(text[i + 2]) == 4)) { dt_dprintf("found a suspected jump table at %s:%lx\n", ftp->ftps_func, i); return (1); } #endif } return (0); } /*ARGSUSED*/ int dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret) { uint8_t *text; ulong_t i, end; int size; #ifdef illumos pid_t pid = Pstatus(P)->pr_pid; char dmodel = Pstatus(P)->pr_dmodel; #else pid_t pid = proc_getpid(P); -#if __i386__ - char dmodel = PR_MODEL_ILP32; -#elif __amd64__ - char dmodel = PR_MODEL_LP64; + char dmodel = proc_getmodel(P); #endif -#endif /* * We allocate a few extra bytes at the end so we don't have to check * for overrunning the buffer. */ if ((text = calloc(1, symp->st_size + 4)) == NULL) { dt_dprintf("mr sparkle: malloc() failed\n"); return (DT_PROC_ERR); } if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { dt_dprintf("mr sparkle: Pread() failed\n"); free(text); return (DT_PROC_ERR); } ftp->ftps_type = DTFTP_RETURN; ftp->ftps_pc = (uintptr_t)symp->st_value; ftp->ftps_size = (size_t)symp->st_size; ftp->ftps_noffs = 0; /* * If there's a jump table in the function we're only willing to * instrument these specific (and equivalent) instruction sequences: * leave * [rep] ret * and * movl %ebp,%esp * popl %ebp * [rep] ret * * We do this to avoid accidentally interpreting jump table * offsets as actual instructions. */ if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) { for (i = 0, end = ftp->ftps_size; i < end; i += size) { size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i, dmodel); /* bail if we hit an invalid opcode */ if (size <= 0) break; if (text[i] == DT_LEAVE && text[i + 1] == DT_RET) { dt_dprintf("leave/ret at %lx\n", i + 1); ftp->ftps_offs[ftp->ftps_noffs++] = i + 1; size = 2; } else if (text[i] == DT_LEAVE && text[i + 1] == DT_REP && text[i + 2] == DT_RET) { dt_dprintf("leave/rep ret at %lx\n", i + 1); ftp->ftps_offs[ftp->ftps_noffs++] = i + 1; size = 3; } else if (*(uint16_t *)&text[i] == DT_MOVL_EBP_ESP && text[i + 2] == DT_POPL_EBP && text[i + 3] == DT_RET) { dt_dprintf("movl/popl/ret at %lx\n", i + 3); ftp->ftps_offs[ftp->ftps_noffs++] = i + 3; size = 4; } else if (*(uint16_t *)&text[i] == DT_MOVL_EBP_ESP && text[i + 2] == DT_POPL_EBP && text[i + 3] == DT_REP && text[i + 4] == DT_RET) { dt_dprintf("movl/popl/rep ret at %lx\n", i + 3); ftp->ftps_offs[ftp->ftps_noffs++] = i + 3; size = 5; } } } else { for (i = 0, end = ftp->ftps_size; i < end; i += size) { size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i, dmodel); /* bail if we hit an invalid opcode */ if (size <= 0) break; /* ordinary ret */ if (size == 1 && text[i] == DT_RET) goto is_ret; /* two-byte ret */ if (size == 2 && text[i] == DT_REP && text[i + 1] == DT_RET) goto is_ret; /* ret */ if (size == 3 && text[i] == DT_RET16) goto is_ret; /* two-byte ret */ if (size == 4 && text[i] == DT_REP && text[i + 1] == DT_RET16) goto is_ret; /* 32-bit displacement jmp outside of the function */ if (size == 5 && text[i] == DT_JMP32 && symp->st_size <= (uintptr_t)(i + size + *(int32_t *)&text[i + 1])) goto is_ret; /* 8-bit displacement jmp outside of the function */ if (size == 2 && text[i] == DT_JMP8 && symp->st_size <= (uintptr_t)(i + size + *(int8_t *)&text[i + 1])) goto is_ret; /* 32-bit disp. conditional jmp outside of the func. */ if (size == 6 && DT_ISJ32(*(uint16_t *)&text[i]) && symp->st_size <= (uintptr_t)(i + size + *(int32_t *)&text[i + 2])) goto is_ret; /* 8-bit disp. conditional jmp outside of the func. */ if (size == 2 && DT_ISJ8(text[i]) && symp->st_size <= (uintptr_t)(i + size + *(int8_t *)&text[i + 1])) goto is_ret; continue; is_ret: dt_dprintf("return at offset %lx\n", i); ftp->ftps_offs[ftp->ftps_noffs++] = i; } } free(text); if (ftp->ftps_noffs > 0) { if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } } return (ftp->ftps_noffs); } /*ARGSUSED*/ int dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off) { ftp->ftps_type = DTFTP_OFFSETS; ftp->ftps_pc = (uintptr_t)symp->st_value; ftp->ftps_size = (size_t)symp->st_size; ftp->ftps_noffs = 1; if (strcmp("-", ftp->ftps_func) == 0) { ftp->ftps_offs[0] = off; } else { uint8_t *text; ulong_t i; int size; #ifdef illumos pid_t pid = Pstatus(P)->pr_pid; char dmodel = Pstatus(P)->pr_dmodel; #else pid_t pid = proc_getpid(P); -#if __i386__ - char dmodel = PR_MODEL_ILP32; -#elif __amd64__ - char dmodel = PR_MODEL_LP64; + char dmodel = proc_getmodel(P); #endif -#endif if ((text = malloc(symp->st_size)) == NULL) { dt_dprintf("mr sparkle: malloc() failed\n"); return (DT_PROC_ERR); } if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { dt_dprintf("mr sparkle: Pread() failed\n"); free(text); return (DT_PROC_ERR); } /* * We can't instrument offsets in functions with jump tables * as we might interpret a jump table offset as an * instruction. */ if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) { free(text); return (0); } for (i = 0; i < symp->st_size; i += size) { if (i == off) { ftp->ftps_offs[0] = i; break; } /* * If we've passed the desired offset without a * match, then the given offset must not lie on a * instruction boundary. */ if (i > off) { free(text); return (DT_PROC_ALIGN); } size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i, dmodel); /* * If we hit an invalid instruction, bail as if we * couldn't find the offset. */ if (size <= 0) { free(text); return (DT_PROC_ALIGN); } } free(text); } if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } return (ftp->ftps_noffs); } /*ARGSUSED*/ int dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern) { uint8_t *text; int size; ulong_t i, end = symp->st_size; #ifdef illumos pid_t pid = Pstatus(P)->pr_pid; char dmodel = Pstatus(P)->pr_dmodel; #else pid_t pid = proc_getpid(P); -#if __i386__ - char dmodel = PR_MODEL_ILP32; -#elif __amd64__ - char dmodel = PR_MODEL_LP64; -#endif + char dmodel = proc_getmodel(P); #endif ftp->ftps_type = DTFTP_OFFSETS; ftp->ftps_pc = (uintptr_t)symp->st_value; ftp->ftps_size = (size_t)symp->st_size; ftp->ftps_noffs = 0; if ((text = malloc(symp->st_size)) == NULL) { dt_dprintf("mr sparkle: malloc() failed\n"); return (DT_PROC_ERR); } if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { dt_dprintf("mr sparkle: Pread() failed\n"); free(text); return (DT_PROC_ERR); } /* * We can't instrument offsets in functions with jump tables as * we might interpret a jump table offset as an instruction. */ if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) { free(text); return (0); } if (strcmp("*", pattern) == 0) { for (i = 0; i < end; i += size) { ftp->ftps_offs[ftp->ftps_noffs++] = i; size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i, dmodel); /* bail if we hit an invalid opcode */ if (size <= 0) break; } } else { char name[sizeof (i) * 2 + 1]; for (i = 0; i < end; i += size) { (void) snprintf(name, sizeof (name), "%lx", i); if (gmatch(name, pattern)) ftp->ftps_offs[ftp->ftps_noffs++] = i; size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i, dmodel); /* bail if we hit an invalid opcode */ if (size <= 0) break; } } free(text); if (ftp->ftps_noffs > 0) { if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { dt_dprintf("fasttrap probe creation ioctl failed: %s\n", strerror(errno)); return (dt_set_errno(dtp, errno)); } } return (ftp->ftps_noffs); } typedef struct dtrace_dis { uchar_t *instr; dtrace_hdl_t *dtp; pid_t pid; uintptr_t addr; } dtrace_dis_t; static int dt_getbyte(void *data) { dtrace_dis_t *dis = data; int ret = *dis->instr; if (ret == FASTTRAP_INSTR) { fasttrap_instr_query_t instr; instr.ftiq_pid = dis->pid; instr.ftiq_pc = dis->addr; /* * If we hit a byte that looks like the fasttrap provider's * trap instruction (which doubles as the breakpoint * instruction for debuggers) we need to query the kernel * for the real value. This may just be part of an immediate * value so there's no need to return an error if the * kernel doesn't know about this address. */ if (ioctl(dis->dtp->dt_ftfd, FASTTRAPIOC_GETINSTR, &instr) == 0) ret = instr.ftiq_instr; } dis->addr++; dis->instr++; return (ret); } static int dt_instr_size(uchar_t *instr, dtrace_hdl_t *dtp, pid_t pid, uintptr_t addr, char dmodel) { dtrace_dis_t data; dis86_t x86dis; uint_t cpu_mode; data.instr = instr; data.dtp = dtp; data.pid = pid; data.addr = addr; x86dis.d86_data = &data; x86dis.d86_get_byte = dt_getbyte; x86dis.d86_check_func = NULL; cpu_mode = (dmodel == PR_MODEL_ILP32) ? SIZE32 : SIZE64; if (dtrace_disx86(&x86dis, cpu_mode) != 0) return (-1); /* * If the instruction was a single-byte breakpoint, there may be * another debugger attached to this process. The original instruction * can't be recovered so this must fail. */ if (x86dis.d86_len == 1 && (uchar_t)x86dis.d86_bytes[0] == FASTTRAP_INSTR) return (-1); return (x86dis.d86_len); } Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libzpool/common/util.c =================================================================== --- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libzpool/common/util.c (revision 303641) +++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libzpool/common/util.c (revision 303642) @@ -1,155 +1,155 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include /* * Routines needed by more than one client of libzpool. */ void nicenum(uint64_t num, char *buf) { uint64_t n = num; int index = 0; char u; while (n >= 1024) { n = (n + (1024 / 2)) / 1024; /* Round up or down */ index++; } u = " KMGTPE"[index]; if (index == 0) { (void) sprintf(buf, "%llu", (u_longlong_t)n); } else if (n < 10 && (num & (num - 1)) != 0) { (void) sprintf(buf, "%.2f%c", (double)num / (1ULL << 10 * index), u); } else if (n < 100 && (num & (num - 1)) != 0) { (void) sprintf(buf, "%.1f%c", (double)num / (1ULL << 10 * index), u); } else { (void) sprintf(buf, "%llu%c", (u_longlong_t)n, u); } } static void show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent) { vdev_stat_t *vs; vdev_stat_t v0 = { 0 }; uint64_t sec; uint64_t is_log = 0; nvlist_t **child; uint_t c, children; char used[6], avail[6]; char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6]; char *prefix = ""; if (indent == 0 && desc != NULL) { (void) printf(" " " capacity operations bandwidth ---- errors ----\n"); (void) printf("description " "used avail read write read write read write cksum\n"); } if (desc != NULL) { (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); if (is_log) prefix = "log "; if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) != 0) vs = &v0; sec = MAX(1, vs->vs_timestamp / NANOSEC); nicenum(vs->vs_alloc, used); nicenum(vs->vs_space - vs->vs_alloc, avail); nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops); nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops); nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes); nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes); nicenum(vs->vs_read_errors, rerr); nicenum(vs->vs_write_errors, werr); nicenum(vs->vs_checksum_errors, cerr); (void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n", indent, "", prefix, - indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12), + (int)(indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12)), desc, vs->vs_space ? 6 : 0, vs->vs_space ? used : "", vs->vs_space ? 6 : 0, vs->vs_space ? avail : "", rops, wops, rbytes, wbytes, rerr, werr, cerr); } if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0) return; for (c = 0; c < children; c++) { nvlist_t *cnv = child[c]; char *cname, *tname; uint64_t np; if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) && nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname)) cname = ""; tname = calloc(1, strlen(cname) + 2); (void) strcpy(tname, cname); if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0) tname[strlen(tname)] = '0' + np; show_vdev_stats(tname, ctype, cnv, indent + 2); free(tname); } } void show_pool_stats(spa_t *spa) { nvlist_t *config, *nvroot; char *name; VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0); VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0); show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0); show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0); nvlist_free(config); } Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris =================================================================== --- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris (revision 303641) +++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris (revision 303642) Property changes on: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl/contrib/opensolaris:r303205-303641 Index: user/alc/PQ_LAUNDRY/cddl =================================================================== --- user/alc/PQ_LAUNDRY/cddl (revision 303641) +++ user/alc/PQ_LAUNDRY/cddl (revision 303642) Property changes on: user/alc/PQ_LAUNDRY/cddl ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl:r303205-303641 Index: user/alc/PQ_LAUNDRY/contrib/blacklist/lib/bl.c =================================================================== --- user/alc/PQ_LAUNDRY/contrib/blacklist/lib/bl.c (revision 303641) +++ user/alc/PQ_LAUNDRY/contrib/blacklist/lib/bl.c (revision 303642) @@ -1,524 +1,527 @@ /* $NetBSD: bl.c,v 1.27 2015/12/30 16:42:48 christos Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include __RCSID("$NetBSD: bl.c,v 1.27 2015/12/30 16:42:48 christos Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _REENTRANT #include #endif #include "bl.h" typedef struct { uint32_t bl_len; uint32_t bl_version; uint32_t bl_type; uint32_t bl_salen; struct sockaddr_storage bl_ss; char bl_data[]; } bl_message_t; struct blacklist { #ifdef _REENTRANT pthread_mutex_t b_mutex; # define BL_INIT(b) pthread_mutex_init(&b->b_mutex, NULL) # define BL_LOCK(b) pthread_mutex_lock(&b->b_mutex) # define BL_UNLOCK(b) pthread_mutex_unlock(&b->b_mutex) #else # define BL_INIT(b) do {} while(/*CONSTCOND*/0) # define BL_LOCK(b) BL_INIT(b) # define BL_UNLOCK(b) BL_INIT(b) #endif int b_fd; int b_connected; struct sockaddr_un b_sun; void (*b_fun)(int, const char *, va_list); bl_info_t b_info; }; #define BL_VERSION 1 bool bl_isconnected(bl_t b) { return b->b_connected == 0; } int bl_getfd(bl_t b) { return b->b_fd; } static void bl_reset(bl_t b, bool locked) { int serrno = errno; if (!locked) BL_LOCK(b); close(b->b_fd); errno = serrno; b->b_fd = -1; b->b_connected = -1; if (!locked) BL_UNLOCK(b); } static void bl_log(void (*fun)(int, const char *, va_list), int level, const char *fmt, ...) { va_list ap; int serrno = errno; va_start(ap, fmt); (*fun)(level, fmt, ap); va_end(ap); errno = serrno; } static int bl_init(bl_t b, bool srv) { static int one = 1; /* AF_UNIX address of local logger */ mode_t om; int rv, serrno; struct sockaddr_un *sun = &b->b_sun; #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK 0 #endif #ifndef SOCK_CLOEXEC #define SOCK_CLOEXEC 0 #endif #ifndef SOCK_NOSIGPIPE #define SOCK_NOSIGPIPE 0 #endif BL_LOCK(b); if (b->b_fd == -1) { b->b_fd = socket(PF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK|SOCK_NOSIGPIPE, 0); if (b->b_fd == -1) { - bl_log(b->b_fun, LOG_ERR, "%s: socket failed (%m)", - __func__); + bl_log(b->b_fun, LOG_ERR, "%s: socket failed (%s)", + __func__, strerror(errno)); BL_UNLOCK(b); return -1; } #if SOCK_CLOEXEC == 0 fcntl(b->b_fd, F_SETFD, FD_CLOEXEC); #endif #if SOCK_NONBLOCK == 0 fcntl(b->b_fd, F_SETFL, fcntl(b->b_fd, F_GETFL) | O_NONBLOCK); #endif #if SOCK_NOSIGPIPE == 0 #ifdef SO_NOSIGPIPE int o = 1; setsockopt(b->b_fd, SOL_SOCKET, SO_NOSIGPIPE, &o, sizeof(o)); #else signal(SIGPIPE, SIG_IGN); #endif #endif } if (bl_isconnected(b)) { BL_UNLOCK(b); return 0; } /* * We try to connect anyway even when we are a server to verify * that no other server is listening to the socket. If we succeed * to connect and we are a server, someone else owns it. */ rv = connect(b->b_fd, (const void *)sun, (socklen_t)sizeof(*sun)); if (rv == 0) { if (srv) { bl_log(b->b_fun, LOG_ERR, "%s: another daemon is handling `%s'", __func__, sun->sun_path); goto out; } } else { if (!srv) { /* * If the daemon is not running, we just try a * connect, so leave the socket alone until it does * and only log once. */ if (b->b_connected != 1) { bl_log(b->b_fun, LOG_DEBUG, - "%s: connect failed for `%s' (%m)", - __func__, sun->sun_path); + "%s: connect failed for `%s' (%s)", + __func__, sun->sun_path, strerror(errno)); b->b_connected = 1; } BL_UNLOCK(b); return -1; } bl_log(b->b_fun, LOG_DEBUG, "Connected to blacklist server", __func__); } if (srv) { (void)unlink(sun->sun_path); om = umask(0); rv = bind(b->b_fd, (const void *)sun, (socklen_t)sizeof(*sun)); serrno = errno; (void)umask(om); errno = serrno; if (rv == -1) { bl_log(b->b_fun, LOG_ERR, - "%s: bind failed for `%s' (%m)", - __func__, sun->sun_path); + "%s: bind failed for `%s' (%s)", + __func__, sun->sun_path, strerror(errno)); goto out; } } b->b_connected = 0; #define GOT_FD 1 #if defined(LOCAL_CREDS) #define CRED_LEVEL 0 #define CRED_NAME LOCAL_CREDS #define CRED_SC_UID sc_euid #define CRED_SC_GID sc_egid #define CRED_MESSAGE SCM_CREDS #define CRED_SIZE SOCKCREDSIZE(NGROUPS_MAX) #define CRED_TYPE struct sockcred #define GOT_CRED 2 #elif defined(SO_PASSCRED) #define CRED_LEVEL SOL_SOCKET #define CRED_NAME SO_PASSCRED #define CRED_SC_UID uid #define CRED_SC_GID gid #define CRED_MESSAGE SCM_CREDENTIALS #define CRED_SIZE sizeof(struct ucred) #define CRED_TYPE struct ucred #define GOT_CRED 2 #else #define GOT_CRED 0 /* * getpeereid() and LOCAL_PEERCRED don't help here * because we are not a stream socket! */ #define CRED_SIZE 0 #define CRED_TYPE void * __unused #endif #ifdef CRED_LEVEL if (setsockopt(b->b_fd, CRED_LEVEL, CRED_NAME, &one, (socklen_t)sizeof(one)) == -1) { bl_log(b->b_fun, LOG_ERR, "%s: setsockopt %s " - "failed (%m)", __func__, __STRING(CRED_NAME)); + "failed (%s)", __func__, __STRING(CRED_NAME), + strerror(errno)); goto out; } #endif BL_UNLOCK(b); return 0; out: bl_reset(b, true); BL_UNLOCK(b); return -1; } bl_t bl_create(bool srv, const char *path, void (*fun)(int, const char *, va_list)) { bl_t b = calloc(1, sizeof(*b)); if (b == NULL) goto out; b->b_fun = fun == NULL ? vsyslog : fun; b->b_fd = -1; b->b_connected = -1; BL_INIT(b); memset(&b->b_sun, 0, sizeof(b->b_sun)); b->b_sun.sun_family = AF_LOCAL; #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN b->b_sun.sun_len = sizeof(b->b_sun); #endif strlcpy(b->b_sun.sun_path, path ? path : _PATH_BLSOCK, sizeof(b->b_sun.sun_path)); bl_init(b, srv); return b; out: free(b); - bl_log(fun, LOG_ERR, "%s: malloc failed (%m)", __func__); + bl_log(fun, LOG_ERR, "%s: malloc failed (%s)", __func__, + strerror(errno)); return NULL; } void bl_destroy(bl_t b) { bl_reset(b, false); free(b); } static int bl_getsock(bl_t b, struct sockaddr_storage *ss, const struct sockaddr *sa, socklen_t slen, const char *ctx) { uint8_t family; memset(ss, 0, sizeof(*ss)); switch (slen) { case 0: return 0; case sizeof(struct sockaddr_in): family = AF_INET; break; case sizeof(struct sockaddr_in6): family = AF_INET6; break; default: bl_log(b->b_fun, LOG_ERR, "%s: invalid socket len %u (%s)", __func__, (unsigned)slen, ctx); errno = EINVAL; return -1; } memcpy(ss, sa, slen); if (ss->ss_family != family) { bl_log(b->b_fun, LOG_INFO, "%s: correcting socket family %d to %d (%s)", __func__, ss->ss_family, family, ctx); ss->ss_family = family; } #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN if (ss->ss_len != slen) { bl_log(b->b_fun, LOG_INFO, "%s: correcting socket len %u to %u (%s)", __func__, ss->ss_len, (unsigned)slen, ctx); ss->ss_len = (uint8_t)slen; } #endif return 0; } int bl_send(bl_t b, bl_type_t e, int pfd, const struct sockaddr *sa, socklen_t slen, const char *ctx) { struct msghdr msg; struct iovec iov; union { char ctrl[CMSG_SPACE(sizeof(int))]; uint32_t fd; } ua; struct cmsghdr *cmsg; union { bl_message_t bl; char buf[512]; } ub; size_t ctxlen, tried; #define NTRIES 5 ctxlen = strlen(ctx); if (ctxlen > 128) ctxlen = 128; iov.iov_base = ub.buf; iov.iov_len = sizeof(bl_message_t) + ctxlen; ub.bl.bl_len = (uint32_t)iov.iov_len; ub.bl.bl_version = BL_VERSION; ub.bl.bl_type = (uint32_t)e; if (bl_getsock(b, &ub.bl.bl_ss, sa, slen, ctx) == -1) return -1; ub.bl.bl_salen = slen; memcpy(ub.bl.bl_data, ctx, ctxlen); msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_flags = 0; msg.msg_control = ua.ctrl; msg.msg_controllen = sizeof(ua.ctrl); cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_len = CMSG_LEN(sizeof(int)); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; memcpy(CMSG_DATA(cmsg), &pfd, sizeof(pfd)); tried = 0; again: if (bl_init(b, false) == -1) return -1; if ((sendmsg(b->b_fd, &msg, 0) == -1) && tried++ < NTRIES) { bl_reset(b, false); goto again; } return tried >= NTRIES ? -1 : 0; } bl_info_t * bl_recv(bl_t b) { struct msghdr msg; struct iovec iov; union { char ctrl[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(CRED_SIZE)]; uint32_t fd; CRED_TYPE sc; } ua; struct cmsghdr *cmsg; CRED_TYPE *sc; union { bl_message_t bl; char buf[512]; } ub; int got; ssize_t rlen; bl_info_t *bi = &b->b_info; got = 0; memset(bi, 0, sizeof(*bi)); iov.iov_base = ub.buf; iov.iov_len = sizeof(ub); msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_flags = 0; msg.msg_control = ua.ctrl; msg.msg_controllen = sizeof(ua.ctrl) + 100; rlen = recvmsg(b->b_fd, &msg, 0); if (rlen == -1) { - bl_log(b->b_fun, LOG_ERR, "%s: recvmsg failed (%m)", __func__); + bl_log(b->b_fun, LOG_ERR, "%s: recvmsg failed (%s)", __func__, + strerror(errno)); return NULL; } for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level != SOL_SOCKET) { bl_log(b->b_fun, LOG_ERR, "%s: unexpected cmsg_level %d", __func__, cmsg->cmsg_level); continue; } switch (cmsg->cmsg_type) { case SCM_RIGHTS: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { bl_log(b->b_fun, LOG_ERR, "%s: unexpected cmsg_len %d != %zu", __func__, cmsg->cmsg_len, CMSG_LEN(2 * sizeof(int))); continue; } memcpy(&bi->bi_fd, CMSG_DATA(cmsg), sizeof(bi->bi_fd)); got |= GOT_FD; break; #ifdef CRED_MESSAGE case CRED_MESSAGE: sc = (void *)CMSG_DATA(cmsg); bi->bi_uid = sc->CRED_SC_UID; bi->bi_gid = sc->CRED_SC_GID; got |= GOT_CRED; break; #endif default: bl_log(b->b_fun, LOG_ERR, "%s: unexpected cmsg_type %d", __func__, cmsg->cmsg_type); continue; } } if (got != (GOT_CRED|GOT_FD)) { bl_log(b->b_fun, LOG_ERR, "message missing %s %s", #if GOT_CRED != 0 (got & GOT_CRED) == 0 ? "cred" : #endif "", (got & GOT_FD) == 0 ? "fd" : ""); return NULL; } if ((size_t)rlen <= sizeof(ub.bl)) { bl_log(b->b_fun, LOG_ERR, "message too short %zd", rlen); return NULL; } if (ub.bl.bl_version != BL_VERSION) { bl_log(b->b_fun, LOG_ERR, "bad version %d", ub.bl.bl_version); return NULL; } bi->bi_type = ub.bl.bl_type; bi->bi_slen = ub.bl.bl_salen; bi->bi_ss = ub.bl.bl_ss; #ifndef CRED_MESSAGE bi->bi_uid = -1; bi->bi_gid = -1; #endif strlcpy(bi->bi_msg, ub.bl.bl_data, MIN(sizeof(bi->bi_msg), ((size_t)rlen - sizeof(ub.bl) + 1))); return bi; } Index: user/alc/PQ_LAUNDRY/contrib/libexecinfo/backtrace.c =================================================================== --- user/alc/PQ_LAUNDRY/contrib/libexecinfo/backtrace.c (revision 303641) +++ user/alc/PQ_LAUNDRY/contrib/libexecinfo/backtrace.c (revision 303642) @@ -1,250 +1,249 @@ /* $NetBSD: backtrace.c,v 1.3 2013/08/29 14:58:56 christos Exp $ */ /*- * Copyright (c) 2012 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __RCSID("$NetBSD: backtrace.c,v 1.3 2013/08/29 14:58:56 christos Exp $"); #include #include -#define _WITH_DPRINTF #include #include #include #include #include #include #include #include #include #include #include "execinfo.h" #include "symtab.h" #ifdef __linux__ #define SELF "/proc/self/exe" #else #include #define SELF "/proc/curproc/file" #endif static int open_self(int flags) { const char *pathname = SELF; #ifdef KERN_PROC_PATHNAME static const int name[] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1, }; char path[MAXPATHLEN]; size_t len; len = sizeof(path); if (sysctl(name, 4, path, &len, NULL, 0) != -1) pathname = path; #endif return open(pathname, flags); } static int __printflike(4, 5) rasprintf(char **buf, size_t *bufsiz, size_t offs, const char *fmt, ...) { for (;;) { size_t nbufsiz; char *nbuf; if (*buf && offs < *bufsiz) { va_list ap; int len; va_start(ap, fmt); len = vsnprintf(*buf + offs, *bufsiz - offs, fmt, ap); va_end(ap); if (len < 0 || (size_t)len + 1 < *bufsiz - offs) return len; nbufsiz = MAX(*bufsiz + 512, (size_t)len + 1); } else nbufsiz = MAX(offs, *bufsiz) + 512; nbuf = realloc(*buf, nbufsiz); if (nbuf == NULL) return -1; *buf = nbuf; *bufsiz = nbufsiz; } } /* * format specifiers: * %a = address * %n = symbol_name * %d = symbol_address - address * %D = if symbol_address == address "" else +%d * %f = filename */ static ssize_t format_string(char **buf, size_t *bufsiz, size_t offs, const char *fmt, Dl_info *dli, const void *addr) { ptrdiff_t diff = (const char *)addr - (const char *)dli->dli_saddr; size_t o = offs; int len; for (; *fmt; fmt++) { if (*fmt != '%') goto printone; switch (*++fmt) { case 'a': len = rasprintf(buf, bufsiz, o, "%p", addr); break; case 'n': len = rasprintf(buf, bufsiz, o, "%s", dli->dli_sname); break; case 'D': if (diff) len = rasprintf(buf, bufsiz, o, "+0x%tx", diff); else len = 0; break; case 'd': len = rasprintf(buf, bufsiz, o, "0x%tx", diff); break; case 'f': len = rasprintf(buf, bufsiz, o, "%s", dli->dli_fname); break; default: printone: len = rasprintf(buf, bufsiz, o, "%c", *fmt); break; } if (len == -1) return -1; o += len; } return o - offs; } static ssize_t format_address(symtab_t *st, char **buf, size_t *bufsiz, size_t offs, const char *fmt, const void *addr) { Dl_info dli; memset(&dli, 0, sizeof(dli)); (void)dladdr(addr, &dli); if (st) symtab_find(st, addr, &dli); if (dli.dli_sname == NULL) dli.dli_sname = "???"; if (dli.dli_fname == NULL) dli.dli_fname = "???"; if (dli.dli_saddr == NULL) dli.dli_saddr = (void *)(intptr_t)addr; return format_string(buf, bufsiz, offs, fmt, &dli, addr); } char ** backtrace_symbols_fmt(void *const *trace, size_t len, const char *fmt) { static const size_t slen = sizeof(char *) + 64; /* estimate */ char *ptr; symtab_t *st; int fd; if ((fd = open_self(O_RDONLY)) != -1) st = symtab_create(fd, -1, STT_FUNC); else st = NULL; if ((ptr = calloc(len, slen)) == NULL) goto out; size_t psize = len * slen; size_t offs = len * sizeof(char *); /* We store only offsets in the first pass because of realloc */ for (size_t i = 0; i < len; i++) { ssize_t x; ((char **)(void *)ptr)[i] = (void *)offs; x = format_address(st, &ptr, &psize, offs, fmt, trace[i]); if (x == -1) { free(ptr); ptr = NULL; goto out; } offs += x; ptr[offs++] = '\0'; assert(offs < psize); } /* Change offsets to pointers */ for (size_t j = 0; j < len; j++) ((char **)(void *)ptr)[j] += (intptr_t)ptr; out: symtab_destroy(st); if (fd != -1) (void)close(fd); return (void *)ptr; } int backtrace_symbols_fd_fmt(void *const *trace, size_t len, int fd, const char *fmt) { char **s = backtrace_symbols_fmt(trace, len, fmt); if (s == NULL) return -1; for (size_t i = 0; i < len; i++) if (dprintf(fd, "%s\n", s[i]) < 0) break; free(s); return 0; } static const char fmt[] = "%a <%n%D> at %f"; char ** backtrace_symbols(void *const *trace, size_t len) { return backtrace_symbols_fmt(trace, len, fmt); } int backtrace_symbols_fd(void *const *trace, size_t len, int fd) { return backtrace_symbols_fd_fmt(trace, len, fd, fmt); } Index: user/alc/PQ_LAUNDRY/contrib/libexecinfo =================================================================== --- user/alc/PQ_LAUNDRY/contrib/libexecinfo (revision 303641) +++ user/alc/PQ_LAUNDRY/contrib/libexecinfo (revision 303642) Property changes on: user/alc/PQ_LAUNDRY/contrib/libexecinfo ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/libexecinfo:r293456-303641 Index: user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind/src/Unwind-EHABI.cpp =================================================================== --- user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind/src/Unwind-EHABI.cpp (revision 303641) +++ user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind/src/Unwind-EHABI.cpp (revision 303642) @@ -1,977 +1,977 @@ //===--------------------------- Unwind-EHABI.cpp -------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.TXT for details. // // // Implements ARM zero-cost C++ exceptions // //===----------------------------------------------------------------------===// #include "Unwind-EHABI.h" #if _LIBUNWIND_ARM_EHABI #include #include #include #include #include #include #include "config.h" #include "libunwind.h" #include "libunwind_ext.h" #include "unwind.h" namespace { // Strange order: take words in order, but inside word, take from most to least // signinficant byte. uint8_t getByte(const uint32_t* data, size_t offset) { const uint8_t* byteData = reinterpret_cast(data); return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))]; } const char* getNextWord(const char* data, uint32_t* out) { *out = *reinterpret_cast(data); return data + 4; } const char* getNextNibble(const char* data, uint32_t* out) { *out = *reinterpret_cast(data); return data + 2; } struct Descriptor { // See # 9.2 typedef enum { SU16 = 0, // Short descriptor, 16-bit entries LU16 = 1, // Long descriptor, 16-bit entries LU32 = 3, // Long descriptor, 32-bit entries RESERVED0 = 4, RESERVED1 = 5, RESERVED2 = 6, RESERVED3 = 7, RESERVED4 = 8, RESERVED5 = 9, RESERVED6 = 10, RESERVED7 = 11, RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15 } Format; // See # 9.2 typedef enum { CLEANUP = 0x0, FUNC = 0x1, CATCH = 0x2, INVALID = 0x4 } Kind; }; _Unwind_Reason_Code ProcessDescriptors( _Unwind_State state, _Unwind_Control_Block* ucbp, struct _Unwind_Context* context, Descriptor::Format format, const char* descriptorStart, uint32_t flags) { // EHT is inlined in the index using compact form. No descriptors. #5 if (flags & 0x1) return _URC_CONTINUE_UNWIND; // TODO: We should check the state here, and determine whether we need to // perform phase1 or phase2 unwinding. (void)state; const char* descriptor = descriptorStart; uint32_t descriptorWord; getNextWord(descriptor, &descriptorWord); while (descriptorWord) { // Read descriptor based on # 9.2. uint32_t length; uint32_t offset; switch (format) { case Descriptor::LU32: descriptor = getNextWord(descriptor, &length); descriptor = getNextWord(descriptor, &offset); case Descriptor::LU16: descriptor = getNextNibble(descriptor, &length); descriptor = getNextNibble(descriptor, &offset); default: assert(false); return _URC_FAILURE; } // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value. Descriptor::Kind kind = static_cast((length & 0x1) | ((offset & 0x1) << 1)); // Clear off flag from last bit. length &= ~1u; offset &= ~1u; uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset; uintptr_t scopeEnd = scopeStart + length; uintptr_t pc = _Unwind_GetIP(context); bool isInScope = (scopeStart <= pc) && (pc < scopeEnd); switch (kind) { case Descriptor::CLEANUP: { // TODO(ajwong): Handle cleanup descriptors. break; } case Descriptor::FUNC: { // TODO(ajwong): Handle function descriptors. break; } case Descriptor::CATCH: { // Catch descriptors require gobbling one more word. uint32_t landing_pad; descriptor = getNextWord(descriptor, &landing_pad); if (isInScope) { // TODO(ajwong): This is only phase1 compatible logic. Implement // phase2. landing_pad = signExtendPrel31(landing_pad & ~0x80000000); if (landing_pad == 0xffffffff) { return _URC_HANDLER_FOUND; } else if (landing_pad == 0xfffffffe) { return _URC_FAILURE; } else { /* bool is_reference_type = landing_pad & 0x80000000; void* matched_object; if (__cxxabiv1::__cxa_type_match( ucbp, reinterpret_cast(landing_pad), is_reference_type, &matched_object) != __cxxabiv1::ctm_failed) return _URC_HANDLER_FOUND; */ _LIBUNWIND_ABORT("Type matching not implemented"); } } break; } default: _LIBUNWIND_ABORT("Invalid descriptor kind found."); } getNextWord(descriptor, &descriptorWord); } return _URC_CONTINUE_UNWIND; } static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state, _Unwind_Control_Block* ucbp, struct _Unwind_Context* context) { // Read the compact model EHT entry's header # 6.3 const uint32_t* unwindingData = ucbp->pr_cache.ehtp; assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry"); Descriptor::Format format = static_cast((*unwindingData & 0x0f000000) >> 24); const char *lsda = reinterpret_cast(_Unwind_GetLanguageSpecificData(context)); // Handle descriptors before unwinding so they are processed in the context // of the correct stack frame. _Unwind_Reason_Code result = ProcessDescriptors(state, ucbp, context, format, lsda, ucbp->pr_cache.additional); if (result != _URC_CONTINUE_UNWIND) return result; if (unw_step(reinterpret_cast(context)) != UNW_STEP_SUCCESS) return _URC_FAILURE; return _URC_CONTINUE_UNWIND; } // Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE / // _UVRSD_UINT32. uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) { return ((1U << (count_minus_one + 1)) - 1) << start; } // Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP / // _UVRSD_DOUBLE. uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) { return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1); } } // end anonymous namespace /** * Decodes an EHT entry. * * @param data Pointer to EHT. * @param[out] off Offset from return value (in bytes) to begin interpretation. * @param[out] len Number of bytes in unwind code. * @return Pointer to beginning of unwind code. */ extern "C" const uint32_t* decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) { if ((*data & 0x80000000) == 0) { // 6.2: Generic Model // // EHT entry is a prel31 pointing to the PR, followed by data understood // only by the personality routine. Fortunately, all existing assembler // implementations, including GNU assembler, LLVM integrated assembler, // and ARM assembler, assume that the unwind opcodes come after the // personality rountine address. *off = 1; // First byte is size data. *len = (((data[1] >> 24) & 0xff) + 1) * 4; data++; // Skip the first word, which is the prel31 offset. } else { // 6.3: ARM Compact Model // // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded // by format: Descriptor::Format format = static_cast((*data & 0x0f000000) >> 24); switch (format) { case Descriptor::SU16: *len = 4; *off = 1; break; case Descriptor::LU16: case Descriptor::LU32: *len = 4 + 4 * ((*data & 0x00ff0000) >> 16); *off = 2; break; default: return nullptr; } } return data; } _Unwind_Reason_Code _Unwind_VRS_Interpret( _Unwind_Context* context, const uint32_t* data, size_t offset, size_t len) { bool wrotePC = false; bool finish = false; while (offset < len && !finish) { uint8_t byte = getByte(data, offset++); if ((byte & 0x80) == 0) { uint32_t sp; _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); if (byte & 0x40) sp -= (((uint32_t)byte & 0x3f) << 2) + 4; else sp += ((uint32_t)byte << 2) + 4; _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); } else { switch (byte & 0xf0) { case 0x80: { if (offset >= len) return _URC_FAILURE; uint32_t registers = (((uint32_t)byte & 0x0f) << 12) | (((uint32_t)getByte(data, offset++)) << 4); if (!registers) return _URC_FAILURE; if (registers & (1 << 15)) wrotePC = true; _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); break; } case 0x90: { uint8_t reg = byte & 0x0f; if (reg == 13 || reg == 15) return _URC_FAILURE; uint32_t sp; _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg, _UVRSD_UINT32, &sp); _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); break; } case 0xa0: { uint32_t registers = RegisterMask(4, byte & 0x07); if (byte & 0x08) registers |= 1 << 14; _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); break; } case 0xb0: { switch (byte) { case 0xb0: finish = true; break; case 0xb1: { if (offset >= len) return _URC_FAILURE; uint8_t registers = getByte(data, offset++); if (registers & 0xf0 || !registers) return _URC_FAILURE; _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); break; } case 0xb2: { uint32_t addend = 0; uint32_t shift = 0; // This decodes a uleb128 value. while (true) { if (offset >= len) return _URC_FAILURE; uint32_t v = getByte(data, offset++); addend |= (v & 0x7f) << shift; if ((v & 0x80) == 0) break; shift += 7; } uint32_t sp; _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); sp += 0x204 + (addend << 2); _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); break; } case 0xb3: { uint8_t v = getByte(data, offset++); _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(static_cast(v >> 4), v & 0x0f), _UVRSD_VFPX); break; } case 0xb4: case 0xb5: case 0xb6: case 0xb7: return _URC_FAILURE; default: _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x07), _UVRSD_VFPX); break; } break; } case 0xc0: { switch (byte) { case 0xc0: case 0xc1: case 0xc2: case 0xc3: case 0xc4: case 0xc5: _Unwind_VRS_Pop(context, _UVRSC_WMMXD, RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE); break; case 0xc6: { uint8_t v = getByte(data, offset++); uint8_t start = static_cast(v >> 4); uint8_t count_minus_one = v & 0xf; if (start + count_minus_one >= 16) return _URC_FAILURE; _Unwind_VRS_Pop(context, _UVRSC_WMMXD, RegisterRange(start, count_minus_one), _UVRSD_DOUBLE); break; } case 0xc7: { uint8_t v = getByte(data, offset++); if (!v || v & 0xf0) return _URC_FAILURE; _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE); break; } case 0xc8: case 0xc9: { uint8_t v = getByte(data, offset++); uint8_t start = static_cast(((byte == 0xc8) ? 16 : 0) + (v >> 4)); uint8_t count_minus_one = v & 0xf; if (start + count_minus_one >= 32) return _URC_FAILURE; _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(start, count_minus_one), _UVRSD_DOUBLE); break; } default: return _URC_FAILURE; } break; } case 0xd0: { if (byte & 0x08) return _URC_FAILURE; _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7), _UVRSD_DOUBLE); break; } default: return _URC_FAILURE; } } } if (!wrotePC) { uint32_t lr; _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr); _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr); } return _URC_CONTINUE_UNWIND; } extern "C" _Unwind_Reason_Code __aeabi_unwind_cpp_pr0( _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context) { return unwindOneFrame(state, ucbp, context); } extern "C" _Unwind_Reason_Code __aeabi_unwind_cpp_pr1( _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context) { return unwindOneFrame(state, ucbp, context); } extern "C" _Unwind_Reason_Code __aeabi_unwind_cpp_pr2( _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context) { return unwindOneFrame(state, ucbp, context); } static _Unwind_Reason_Code unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during // phase 1 and then restoring it to the "primary VRS" for phase 2. The // effect is phase 2 doesn't see any of the VRS manipulations from phase 1. // In this implementation, the phases don't share the VRS backing store. // Instead, they are passed the original |uc| and they create a new VRS // from scratch thus achieving the same effect. unw_init_local(cursor, uc); // Walk each frame looking for a place to stop. for (bool handlerNotFound = true; handlerNotFound;) { // See if frame has code to run (has personality routine). unw_proc_info_t frameInfo; if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): unw_get_proc_info " "failed => _URC_FATAL_PHASE1_ERROR\n", static_cast(exception_object)); return _URC_FATAL_PHASE1_ERROR; } // When tracing, print state information. if (_LIBUNWIND_TRACING_UNWINDING) { char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; unw_word_t pc; unw_get_reg(cursor, UNW_REG_IP, &pc); _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): pc=0x%llX, start_ip=0x%llX, func=%s, " "lsda=0x%llX, personality=0x%llX\n", static_cast(exception_object), (long long)pc, (long long)frameInfo.start_ip, functionName, (long long)frameInfo.lsda, (long long)frameInfo.handler); } // If there is a personality routine, ask it if it will want to stop at // this frame. if (frameInfo.handler != 0) { __personality_routine p = (__personality_routine)(long)(frameInfo.handler); _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): calling personality function %p\n", static_cast(exception_object), reinterpret_cast(reinterpret_cast(p))); struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor); exception_object->pr_cache.fnstart = frameInfo.start_ip; exception_object->pr_cache.ehtp = (_Unwind_EHT_Header *)frameInfo.unwind_info; exception_object->pr_cache.additional = frameInfo.flags; _Unwind_Reason_Code personalityResult = (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context); _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p " "additional %x\n", static_cast(exception_object), personalityResult, exception_object->pr_cache.fnstart, static_cast(exception_object->pr_cache.ehtp), exception_object->pr_cache.additional); switch (personalityResult) { case _URC_HANDLER_FOUND: // found a catch clause or locals that need destructing in this frame // stop search and remember stack pointer at the frame handlerNotFound = false; // p should have initialized barrier_cache. EHABI #7.3.5 _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND \n", static_cast(exception_object)); return _URC_NO_REASON; case _URC_CONTINUE_UNWIND: _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND\n", static_cast(exception_object)); // continue unwinding break; // EHABI #7.3.3 case _URC_FAILURE: return _URC_FAILURE; default: // something went wrong _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR\n", static_cast(exception_object)); return _URC_FATAL_PHASE1_ERROR; } } } return _URC_NO_REASON; } static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object, bool resume) { // See comment at the start of unwind_phase1 regarding VRS integrity. unw_init_local(cursor, uc); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)\n", static_cast(exception_object)); int frame_count = 0; // Walk each frame until we reach where search phase said to stop. while (true) { // Ask libuwind to get next frame (skip over first which is // _Unwind_RaiseException or _Unwind_Resume). // // Resume only ever makes sense for 1 frame. _Unwind_State state = resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING; if (resume && frame_count == 1) { // On a resume, first unwind the _Unwind_Resume() frame. The next frame // is now the landing pad for the cleanup from a previous execution of // phase2. To continue unwindingly correctly, replace VRS[15] with the // IP of the frame that the previous run of phase2 installed the context // for. After this, continue unwinding as if normal. // // See #7.4.6 for details. unw_set_reg(cursor, UNW_REG_IP, exception_object->unwinder_cache.reserved2); resume = false; } // Get info about this frame. unw_word_t sp; unw_proc_info_t frameInfo; unw_get_reg(cursor, UNW_REG_SP, &sp); if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_get_proc_info " - "failed => _URC_FATAL_PHASE1_ERROR\n", + "failed => _URC_FATAL_PHASE2_ERROR\n", static_cast(exception_object)); return _URC_FATAL_PHASE2_ERROR; } // When tracing, print state information. if (_LIBUNWIND_TRACING_UNWINDING) { char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): start_ip=0x%llX, func=%s, sp=0x%llX, " "lsda=0x%llX, personality=0x%llX\n", static_cast(exception_object), (long long)frameInfo.start_ip, functionName, (long long)sp, (long long)frameInfo.lsda, (long long)frameInfo.handler); } // If there is a personality routine, tell it we are unwinding. if (frameInfo.handler != 0) { __personality_routine p = (__personality_routine)(long)(frameInfo.handler); struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor); // EHABI #7.2 exception_object->pr_cache.fnstart = frameInfo.start_ip; exception_object->pr_cache.ehtp = (_Unwind_EHT_Header *)frameInfo.unwind_info; exception_object->pr_cache.additional = frameInfo.flags; _Unwind_Reason_Code personalityResult = (*p)(state, exception_object, context); switch (personalityResult) { case _URC_CONTINUE_UNWIND: // Continue unwinding _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND\n", static_cast(exception_object)); // EHABI #7.2 if (sp == exception_object->barrier_cache.sp) { // Phase 1 said we would stop at this frame, but we did not... _LIBUNWIND_ABORT("during phase1 personality function said it would " "stop here, but now in phase2 it did not stop here"); } break; case _URC_INSTALL_CONTEXT: _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT\n", static_cast(exception_object)); // Personality routine says to transfer control to landing pad. // We may get control back if landing pad calls _Unwind_Resume(). if (_LIBUNWIND_TRACING_UNWINDING) { unw_word_t pc; unw_get_reg(cursor, UNW_REG_IP, &pc); unw_get_reg(cursor, UNW_REG_SP, &sp); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " "user code with ip=0x%llX, sp=0x%llX\n", static_cast(exception_object), (long long)pc, (long long)sp); } { // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume // is called back, to find this same frame. unw_word_t pc; unw_get_reg(cursor, UNW_REG_IP, &pc); exception_object->unwinder_cache.reserved2 = (uint32_t)pc; } unw_resume(cursor); // unw_resume() only returns if there was an error. return _URC_FATAL_PHASE2_ERROR; // # EHABI #7.4.3 case _URC_FAILURE: abort(); default: // Personality routine returned an unknown result code. _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", personalityResult); return _URC_FATAL_PHASE2_ERROR; } } frame_count++; } // Clean up phase did not resume at the frame that the search phase // said it would... return _URC_FATAL_PHASE2_ERROR; } /// Called by __cxa_throw. Only returns if there is a fatal error. _LIBUNWIND_EXPORT _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)\n", static_cast(exception_object)); unw_context_t uc; unw_cursor_t cursor; unw_getcontext(&uc); // This field for is for compatibility with GCC to say this isn't a forced // unwind. EHABI #7.2 exception_object->unwinder_cache.reserved1 = 0; // phase 1: the search phase _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object); if (phase1 != _URC_NO_REASON) return phase1; // phase 2: the clean up phase return unwind_phase2(&uc, &cursor, exception_object, false); } _LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) { // This is to be called when exception handling completes to give us a chance // to perform any housekeeping. EHABI #7.2. But we have nothing to do here. (void)exception_object; } /// When _Unwind_RaiseException() is in phase2, it hands control /// to the personality function at each frame. The personality /// may force a jump to a landing pad in that function, the landing /// pad code may then call _Unwind_Resume() to continue with the /// unwinding. Note: the call to _Unwind_Resume() is from compiler /// geneated user code. All other _Unwind_* routines are called /// by the C++ runtime __cxa_* routines. /// /// Note: re-throwing an exception (as opposed to continuing the unwind) /// is implemented by having the code call __cxa_rethrow() which /// in turn calls _Unwind_Resume_or_Rethrow(). _LIBUNWIND_EXPORT void _Unwind_Resume(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)\n", static_cast(exception_object)); unw_context_t uc; unw_cursor_t cursor; unw_getcontext(&uc); // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0, // which is in the same position as private_1 below. // TODO(ajwong): Who wronte the above? Why is it true? unwind_phase2(&uc, &cursor, exception_object, true); // Clients assume _Unwind_Resume() does not return, so all we can do is abort. _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); } /// Called by personality handler during phase 2 to get LSDA for current frame. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_proc_info_t frameInfo; uintptr_t result = 0; if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) result = (uintptr_t)frameInfo.lsda; _LIBUNWIND_TRACE_API( "_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx\n", static_cast(context), (long long)result); return result; } static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation, void* valuep) { uint64_t value = 0; switch (representation) { case _UVRSD_UINT32: case _UVRSD_FLOAT: memcpy(&value, valuep, sizeof(uint32_t)); break; case _UVRSD_VFPX: case _UVRSD_UINT64: case _UVRSD_DOUBLE: memcpy(&value, valuep, sizeof(uint64_t)); break; } return value; } _Unwind_VRS_Result _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, uint32_t regno, _Unwind_VRS_DataRepresentation representation, void *valuep) { _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, " "rep=%d, value=0x%llX)\n", static_cast(context), regclass, regno, representation, ValueAsBitPattern(representation, valuep)); unw_cursor_t *cursor = (unw_cursor_t *)context; switch (regclass) { case _UVRSC_CORE: if (representation != _UVRSD_UINT32 || regno > 15) return _UVRSR_FAILED; return unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), *(unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_WMMXC: if (representation != _UVRSD_UINT32 || regno > 3) return _UVRSR_FAILED; return unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), *(unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_VFP: if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) return _UVRSR_FAILED; if (representation == _UVRSD_VFPX) { // Can only touch d0-15 with FSTMFDX. if (regno > 15) return _UVRSR_FAILED; unw_save_vfp_as_X(cursor); } else { if (regno > 31) return _UVRSR_FAILED; } return unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), *(unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_WMMXD: if (representation != _UVRSD_DOUBLE || regno > 31) return _UVRSR_FAILED; return unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), *(unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; } _LIBUNWIND_ABORT("unsupported register class"); } static _Unwind_VRS_Result _Unwind_VRS_Get_Internal(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, uint32_t regno, _Unwind_VRS_DataRepresentation representation, void *valuep) { unw_cursor_t *cursor = (unw_cursor_t *)context; switch (regclass) { case _UVRSC_CORE: if (representation != _UVRSD_UINT32 || regno > 15) return _UVRSR_FAILED; return unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), (unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_WMMXC: if (representation != _UVRSD_UINT32 || regno > 3) return _UVRSR_FAILED; return unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), (unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_VFP: if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) return _UVRSR_FAILED; if (representation == _UVRSD_VFPX) { // Can only touch d0-15 with FSTMFDX. if (regno > 15) return _UVRSR_FAILED; unw_save_vfp_as_X(cursor); } else { if (regno > 31) return _UVRSR_FAILED; } return unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), (unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_WMMXD: if (representation != _UVRSD_DOUBLE || regno > 31) return _UVRSR_FAILED; return unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), (unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; } _LIBUNWIND_ABORT("unsupported register class"); } _Unwind_VRS_Result _Unwind_VRS_Get( _Unwind_Context *context, _Unwind_VRS_RegClass regclass, uint32_t regno, _Unwind_VRS_DataRepresentation representation, void *valuep) { _Unwind_VRS_Result result = _Unwind_VRS_Get_Internal(context, regclass, regno, representation, valuep); _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, " "rep=%d, value=0x%llX, result = %d)\n", static_cast(context), regclass, regno, representation, ValueAsBitPattern(representation, valuep), result); return result; } _Unwind_VRS_Result _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, uint32_t discriminator, _Unwind_VRS_DataRepresentation representation) { _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, " "discriminator=%d, representation=%d)\n", static_cast(context), regclass, discriminator, representation); switch (regclass) { case _UVRSC_CORE: case _UVRSC_WMMXC: { if (representation != _UVRSD_UINT32) return _UVRSR_FAILED; // When popping SP from the stack, we don't want to override it from the // computed new stack location. See EHABI #7.5.4 table 3. bool poppedSP = false; uint32_t* sp; if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp) != _UVRSR_OK) { return _UVRSR_FAILED; } for (uint32_t i = 0; i < 16; ++i) { if (!(discriminator & static_cast(1 << i))) continue; uint32_t value = *sp++; if (regclass == _UVRSC_CORE && i == 13) poppedSP = true; if (_Unwind_VRS_Set(context, regclass, i, _UVRSD_UINT32, &value) != _UVRSR_OK) { return _UVRSR_FAILED; } } if (!poppedSP) { return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); } return _UVRSR_OK; } case _UVRSC_VFP: case _UVRSC_WMMXD: { if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) return _UVRSR_FAILED; uint32_t first = discriminator >> 16; uint32_t count = discriminator & 0xffff; uint32_t end = first+count; uint32_t* sp; if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp) != _UVRSR_OK) { return _UVRSR_FAILED; } // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard // format 1", which is equivalent to FSTMD + a padding word. for (uint32_t i = first; i < end; ++i) { // SP is only 32-bit aligned so don't copy 64-bit at a time. uint64_t value = *sp++; value |= ((uint64_t)(*sp++)) << 32; if (_Unwind_VRS_Set(context, regclass, i, representation, &value) != _UVRSR_OK) return _UVRSR_FAILED; } if (representation == _UVRSD_VFPX) ++sp; return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); } } _LIBUNWIND_ABORT("unsupported register class"); } /// Called by personality handler during phase 2 to find the start of the /// function. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_proc_info_t frameInfo; uintptr_t result = 0; if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) result = (uintptr_t)frameInfo.start_ip; _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX\n", static_cast(context), (long long)result); return result; } /// Called by personality handler during phase 2 if a foreign exception // is caught. _LIBUNWIND_EXPORT void _Unwind_DeleteException(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)\n", static_cast(exception_object)); if (exception_object->exception_cleanup != NULL) (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, exception_object); } extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code __gnu_unwind_frame(_Unwind_Exception *exception_object, struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; if (unw_step(cursor) != UNW_STEP_SUCCESS) return _URC_FAILURE; return _URC_OK; } #endif // _LIBUNWIND_ARM_EHABI Index: user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind =================================================================== --- user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind (revision 303641) +++ user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind (revision 303642) Property changes on: user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/llvm/projects/libunwind:r303205-303641 Index: user/alc/PQ_LAUNDRY/contrib/llvm =================================================================== --- user/alc/PQ_LAUNDRY/contrib/llvm (revision 303641) +++ user/alc/PQ_LAUNDRY/contrib/llvm (revision 303642) Property changes on: user/alc/PQ_LAUNDRY/contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/llvm:r303205-303641 Index: user/alc/PQ_LAUNDRY/contrib/ofed/usr.lib/libibverbs/config.h =================================================================== --- user/alc/PQ_LAUNDRY/contrib/ofed/usr.lib/libibverbs/config.h (revision 303641) +++ user/alc/PQ_LAUNDRY/contrib/ofed/usr.lib/libibverbs/config.h (revision 303642) @@ -1,2 +1 @@ -#define _WITH_GETLINE #include Index: user/alc/PQ_LAUNDRY/include/grp.h =================================================================== --- user/alc/PQ_LAUNDRY/include/grp.h (revision 303641) +++ user/alc/PQ_LAUNDRY/include/grp.h (revision 303642) @@ -1,93 +1,92 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)grp.h 8.2 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _GRP_H_ #define _GRP_H_ #include #include #define _PATH_GROUP "/etc/group" #ifndef _GID_T_DECLARED typedef __gid_t gid_t; #define _GID_T_DECLARED #endif #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif struct group { char *gr_name; /* group name */ char *gr_passwd; /* group password */ gid_t gr_gid; /* group id */ char **gr_mem; /* group members */ }; __BEGIN_DECLS #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE void endgrent(void); struct group *getgrent(void); #endif struct group *getgrgid(gid_t); struct group *getgrnam(const char *); #if __BSD_VISIBLE const char *group_from_gid(gid_t, int); int gid_from_group(const char *, gid_t *); int pwcache_groupdb(int (*)(int), void (*)(void), struct group * (*)(const char *), struct group * (*)(gid_t)); #endif #if __XSI_VISIBLE -/* XXX IEEE Std 1003.1, 2003 specifies `void setgrent(void)' */ -int setgrent(void); +void setgrent(void); #endif #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE int getgrgid_r(gid_t, struct group *, char *, size_t, struct group **); int getgrnam_r(const char *, struct group *, char *, size_t, struct group **); #endif #if __BSD_VISIBLE int getgrent_r(struct group *, char *, size_t, struct group **); int setgroupent(int); #endif __END_DECLS #endif /* !_GRP_H_ */ Index: user/alc/PQ_LAUNDRY/include/stdio.h =================================================================== --- user/alc/PQ_LAUNDRY/include/stdio.h (revision 303641) +++ user/alc/PQ_LAUNDRY/include/stdio.h (revision 303642) @@ -1,544 +1,508 @@ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Chris Torek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)stdio.h 8.5 (Berkeley) 4/29/95 * $FreeBSD$ */ #ifndef _STDIO_H_ #define _STDIO_H_ #include #include #include typedef __off_t fpos_t; #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #if __POSIX_VISIBLE >= 200809 #ifndef _OFF_T_DECLARED #define _OFF_T_DECLARED typedef __off_t off_t; #endif #ifndef _SSIZE_T_DECLARED #define _SSIZE_T_DECLARED typedef __ssize_t ssize_t; #endif #endif #ifndef _OFF64_T_DECLARED #define _OFF64_T_DECLARED typedef __off64_t off64_t; #endif #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE #ifndef _VA_LIST_DECLARED typedef __va_list va_list; #define _VA_LIST_DECLARED #endif #endif #define _FSTDIO /* Define for new stdio with functions. */ /* * NB: to fit things in six character monocase externals, the stdio * code uses the prefix `__s' for stdio objects, typically followed * by a three-character attempt at a mnemonic. */ /* stdio buffers */ struct __sbuf { unsigned char *_base; int _size; }; /* * stdio state variables. * * The following always hold: * * if (_flags&(__SLBF|__SWR)) == (__SLBF|__SWR), * _lbfsize is -_bf._size, else _lbfsize is 0 * if _flags&__SRD, _w is 0 * if _flags&__SWR, _r is 0 * * This ensures that the getc and putc macros (or inline functions) never * try to write or read from a file that is in `read' or `write' mode. * (Moreover, they can, and do, automatically switch from read mode to * write mode, and back, on "r+" and "w+" files.) * * _lbfsize is used only to make the inline line-buffered output stream * code as compact as possible. * * _ub, _up, and _ur are used when ungetc() pushes back more characters * than fit in the current _bf, or when ungetc() pushes back a character * that does not match the previous one in _bf. When this happens, * _ub._base becomes non-nil (i.e., a stream has ungetc() data iff * _ub._base!=NULL) and _up and _ur save the current values of _p and _r. * * Certain members of __sFILE are accessed directly via macros or * inline functions. To preserve ABI compat, these members must not * be disturbed. These members are marked below with (*). */ struct __sFILE { unsigned char *_p; /* (*) current position in (some) buffer */ int _r; /* (*) read space left for getc() */ int _w; /* (*) write space left for putc() */ short _flags; /* (*) flags, below; this FILE is free if 0 */ short _file; /* (*) fileno, if Unix descriptor, else -1 */ struct __sbuf _bf; /* (*) the buffer (at least 1 byte, if !NULL) */ int _lbfsize; /* (*) 0 or -_bf._size, for inline putc */ /* operations */ void *_cookie; /* (*) cookie passed to io functions */ int (*_close)(void *); int (*_read)(void *, char *, int); fpos_t (*_seek)(void *, fpos_t, int); int (*_write)(void *, const char *, int); /* separate buffer for long sequences of ungetc() */ struct __sbuf _ub; /* ungetc buffer */ unsigned char *_up; /* saved _p when _p is doing ungetc data */ int _ur; /* saved _r when _r is counting ungetc data */ /* tricks to meet minimum requirements even when malloc() fails */ unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */ unsigned char _nbuf[1]; /* guarantee a getc() buffer */ /* separate buffer for fgetln() when line crosses buffer boundary */ struct __sbuf _lb; /* buffer for fgetln() */ /* Unix stdio files get aligned to block boundaries on fseek() */ int _blksize; /* stat.st_blksize (may be != _bf._size) */ fpos_t _offset; /* current lseek offset */ struct pthread_mutex *_fl_mutex; /* used for MT-safety */ struct pthread *_fl_owner; /* current owner */ int _fl_count; /* recursive lock count */ int _orientation; /* orientation for fwide() */ __mbstate_t _mbstate; /* multibyte conversion state */ int _flags2; /* additional flags */ }; #ifndef _STDFILE_DECLARED #define _STDFILE_DECLARED typedef struct __sFILE FILE; #endif #ifndef _STDSTREAM_DECLARED __BEGIN_DECLS extern FILE *__stdinp; extern FILE *__stdoutp; extern FILE *__stderrp; __END_DECLS #define _STDSTREAM_DECLARED #endif #define __SLBF 0x0001 /* line buffered */ #define __SNBF 0x0002 /* unbuffered */ #define __SRD 0x0004 /* OK to read */ #define __SWR 0x0008 /* OK to write */ /* RD and WR are never simultaneously asserted */ #define __SRW 0x0010 /* open for reading & writing */ #define __SEOF 0x0020 /* found EOF */ #define __SERR 0x0040 /* found error */ #define __SMBF 0x0080 /* _bf._base is from malloc */ #define __SAPP 0x0100 /* fdopen()ed in append mode */ #define __SSTR 0x0200 /* this is an sprintf/snprintf string */ #define __SOPT 0x0400 /* do fseek() optimization */ #define __SNPT 0x0800 /* do not do fseek() optimization */ #define __SOFF 0x1000 /* set iff _offset is in fact correct */ #define __SMOD 0x2000 /* true => fgetln modified _p text */ #define __SALC 0x4000 /* allocate string space dynamically */ #define __SIGN 0x8000 /* ignore this file in _fwalk */ #define __S2OAP 0x0001 /* O_APPEND mode is set */ /* * The following three definitions are for ANSI C, which took them * from System V, which brilliantly took internal interface macros and * made them official arguments to setvbuf(), without renaming them. * Hence, these ugly _IOxxx names are *supposed* to appear in user code. * * Although numbered as their counterparts above, the implementation * does not rely on this. */ #define _IOFBF 0 /* setvbuf should set fully buffered */ #define _IOLBF 1 /* setvbuf should set line buffered */ #define _IONBF 2 /* setvbuf should set unbuffered */ #define BUFSIZ 1024 /* size of buffer used by setbuf */ #define EOF (-1) /* * FOPEN_MAX is a minimum maximum, and is the number of streams that * stdio can provide without attempting to allocate further resources * (which could fail). Do not use this for anything. */ /* must be == _POSIX_STREAM_MAX */ #ifndef FOPEN_MAX #define FOPEN_MAX 20 /* must be <= OPEN_MAX */ #endif #define FILENAME_MAX 1024 /* must be <= PATH_MAX */ /* System V/ANSI C; this is the wrong way to do this, do *not* use these. */ #if __XSI_VISIBLE #define P_tmpdir "/tmp/" #endif #define L_tmpnam 1024 /* XXX must be == PATH_MAX */ #define TMP_MAX 308915776 #ifndef SEEK_SET #define SEEK_SET 0 /* set file offset to offset */ #endif #ifndef SEEK_CUR #define SEEK_CUR 1 /* set file offset to current plus offset */ #endif #ifndef SEEK_END #define SEEK_END 2 /* set file offset to EOF plus offset */ #endif #define stdin __stdinp #define stdout __stdoutp #define stderr __stderrp __BEGIN_DECLS #ifdef _XLOCALE_H_ #include #endif /* * Functions defined in ANSI C standard. */ void clearerr(FILE *); int fclose(FILE *); int feof(FILE *); int ferror(FILE *); int fflush(FILE *); int fgetc(FILE *); int fgetpos(FILE * __restrict, fpos_t * __restrict); char *fgets(char * __restrict, int, FILE * __restrict); FILE *fopen(const char * __restrict, const char * __restrict); int fprintf(FILE * __restrict, const char * __restrict, ...); int fputc(int, FILE *); int fputs(const char * __restrict, FILE * __restrict); size_t fread(void * __restrict, size_t, size_t, FILE * __restrict); FILE *freopen(const char * __restrict, const char * __restrict, FILE * __restrict); int fscanf(FILE * __restrict, const char * __restrict, ...); int fseek(FILE *, long, int); int fsetpos(FILE *, const fpos_t *); long ftell(FILE *); size_t fwrite(const void * __restrict, size_t, size_t, FILE * __restrict); int getc(FILE *); int getchar(void); char *gets(char *); void perror(const char *); int printf(const char * __restrict, ...); int putc(int, FILE *); int putchar(int); int puts(const char *); int remove(const char *); int rename(const char *, const char *); void rewind(FILE *); int scanf(const char * __restrict, ...); void setbuf(FILE * __restrict, char * __restrict); int setvbuf(FILE * __restrict, char * __restrict, int, size_t); int sprintf(char * __restrict, const char * __restrict, ...); int sscanf(const char * __restrict, const char * __restrict, ...); FILE *tmpfile(void); char *tmpnam(char *); int ungetc(int, FILE *); int vfprintf(FILE * __restrict, const char * __restrict, __va_list); int vprintf(const char * __restrict, __va_list); int vsprintf(char * __restrict, const char * __restrict, __va_list); #if __ISO_C_VISIBLE >= 1999 int snprintf(char * __restrict, size_t, const char * __restrict, ...) __printflike(3, 4); int vfscanf(FILE * __restrict, const char * __restrict, __va_list) __scanflike(2, 0); int vscanf(const char * __restrict, __va_list) __scanflike(1, 0); int vsnprintf(char * __restrict, size_t, const char * __restrict, __va_list) __printflike(3, 0); int vsscanf(const char * __restrict, const char * __restrict, __va_list) __scanflike(2, 0); #endif /* * Functions defined in all versions of POSIX 1003.1. */ #if __BSD_VISIBLE || (__POSIX_VISIBLE && __POSIX_VISIBLE <= 199506) #define L_cuserid 17 /* size for cuserid(3); MAXLOGNAME, legacy */ #endif #if __POSIX_VISIBLE #define L_ctermid 1024 /* size for ctermid(3); PATH_MAX */ char *ctermid(char *); FILE *fdopen(int, const char *); int fileno(FILE *); #endif /* __POSIX_VISIBLE */ #if __POSIX_VISIBLE >= 199209 int pclose(FILE *); FILE *popen(const char *, const char *); #endif #if __POSIX_VISIBLE >= 199506 int ftrylockfile(FILE *); void flockfile(FILE *); void funlockfile(FILE *); /* * These are normally used through macros as defined below, but POSIX * requires functions as well. */ int getc_unlocked(FILE *); int getchar_unlocked(void); int putc_unlocked(int, FILE *); int putchar_unlocked(int); #endif #if __BSD_VISIBLE void clearerr_unlocked(FILE *); int feof_unlocked(FILE *); int ferror_unlocked(FILE *); int fileno_unlocked(FILE *); #endif #if __POSIX_VISIBLE >= 200112 int fseeko(FILE *, __off_t, int); __off_t ftello(FILE *); #endif #if __BSD_VISIBLE || __XSI_VISIBLE > 0 && __XSI_VISIBLE < 600 int getw(FILE *); int putw(int, FILE *); #endif /* BSD or X/Open before issue 6 */ #if __XSI_VISIBLE char *tempnam(const char *, const char *); #endif #if __POSIX_VISIBLE >= 200809 FILE *fmemopen(void * __restrict, size_t, const char * __restrict); ssize_t getdelim(char ** __restrict, size_t * __restrict, int, FILE * __restrict); FILE *open_memstream(char **, size_t *); int renameat(int, const char *, int, const char *); int vdprintf(int, const char * __restrict, __va_list); - -/* - * Every programmer and his dog wrote functions called getline() and dprintf() - * before POSIX.1-2008 came along and decided to usurp the names, so we - * don't prototype them by default unless one of the following is true: - * a) the app has requested them specifically by defining _WITH_GETLINE or - * _WITH_DPRINTF, respectively - * b) the app has requested a POSIX.1-2008 environment via _POSIX_C_SOURCE - * c) the app defines a GNUism such as _BSD_SOURCE or _GNU_SOURCE - */ -#ifndef _WITH_GETLINE -#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE) -#define _WITH_GETLINE -#elif defined(_POSIX_C_SOURCE) -#if _POSIX_C_SOURCE >= 200809 -#define _WITH_GETLINE -#endif -#endif -#endif - -#ifdef _WITH_GETLINE ssize_t getline(char ** __restrict, size_t * __restrict, FILE * __restrict); -#endif - -#ifndef _WITH_DPRINTF -#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE) -#define _WITH_DPRINTF -#elif defined(_POSIX_C_SOURCE) -#if _POSIX_C_SOURCE >= 200809 -#define _WITH_DPRINTF -#endif -#endif -#endif - -#ifdef _WITH_DPRINTF -int (dprintf)(int, const char * __restrict, ...); -#endif - +int dprintf(int, const char * __restrict, ...); #endif /* __POSIX_VISIBLE >= 200809 */ /* * Routines that are purely local. */ #if __BSD_VISIBLE int asprintf(char **, const char *, ...) __printflike(2, 3); char *ctermid_r(char *); void fcloseall(void); int fdclose(FILE *, int *); char *fgetln(FILE *, size_t *); const char *fmtcheck(const char *, const char *) __format_arg(2); int fpurge(FILE *); void setbuffer(FILE *, char *, int); int setlinebuf(FILE *); int vasprintf(char **, const char *, __va_list) __printflike(2, 0); /* * The system error table contains messages for the first sys_nerr * positive errno values. Use strerror() or strerror_r() from * instead. */ extern const int sys_nerr; extern const char * const sys_errlist[]; /* * Stdio function-access interface. */ FILE *funopen(const void *, int (*)(void *, char *, int), int (*)(void *, const char *, int), fpos_t (*)(void *, fpos_t, int), int (*)(void *)); #define fropen(cookie, fn) funopen(cookie, fn, 0, 0, 0) #define fwopen(cookie, fn) funopen(cookie, 0, fn, 0, 0) typedef __ssize_t cookie_read_function_t(void *, char *, size_t); typedef __ssize_t cookie_write_function_t(void *, const char *, size_t); typedef int cookie_seek_function_t(void *, off64_t *, int); typedef int cookie_close_function_t(void *); typedef struct { cookie_read_function_t *read; cookie_write_function_t *write; cookie_seek_function_t *seek; cookie_close_function_t *close; } cookie_io_functions_t; FILE *fopencookie(void *, const char *, cookie_io_functions_t); /* * Portability hacks. See . */ #ifndef _FTRUNCATE_DECLARED #define _FTRUNCATE_DECLARED int ftruncate(int, __off_t); #endif #ifndef _LSEEK_DECLARED #define _LSEEK_DECLARED __off_t lseek(int, __off_t, int); #endif #ifndef _MMAP_DECLARED #define _MMAP_DECLARED void *mmap(void *, size_t, int, int, int, __off_t); #endif #ifndef _TRUNCATE_DECLARED #define _TRUNCATE_DECLARED int truncate(const char *, __off_t); #endif #endif /* __BSD_VISIBLE */ /* * Functions internal to the implementation. */ int __srget(FILE *); int __swbuf(int, FILE *); /* * The __sfoo macros are here so that we can * define function versions in the C library. */ #define __sgetc(p) (--(p)->_r < 0 ? __srget(p) : (int)(*(p)->_p++)) #if defined(__GNUC__) && defined(__STDC__) static __inline int __sputc(int _c, FILE *_p) { if (--_p->_w >= 0 || (_p->_w >= _p->_lbfsize && (char)_c != '\n')) return (*_p->_p++ = _c); else return (__swbuf(_c, _p)); } #else /* * This has been tuned to generate reasonable code on the vax using pcc. */ #define __sputc(c, p) \ (--(p)->_w < 0 ? \ (p)->_w >= (p)->_lbfsize ? \ (*(p)->_p = (c)), *(p)->_p != '\n' ? \ (int)*(p)->_p++ : \ __swbuf('\n', p) : \ __swbuf((int)(c), p) : \ (*(p)->_p = (c), (int)*(p)->_p++)) #endif extern int __isthreaded; #ifndef __cplusplus #define __sfeof(p) (((p)->_flags & __SEOF) != 0) #define __sferror(p) (((p)->_flags & __SERR) != 0) #define __sclearerr(p) ((void)((p)->_flags &= ~(__SERR|__SEOF))) #define __sfileno(p) ((p)->_file) #define feof(p) (!__isthreaded ? __sfeof(p) : (feof)(p)) #define ferror(p) (!__isthreaded ? __sferror(p) : (ferror)(p)) #define clearerr(p) (!__isthreaded ? __sclearerr(p) : (clearerr)(p)) #if __POSIX_VISIBLE #define fileno(p) (!__isthreaded ? __sfileno(p) : (fileno)(p)) #endif #define getc(fp) (!__isthreaded ? __sgetc(fp) : (getc)(fp)) #define putc(x, fp) (!__isthreaded ? __sputc(x, fp) : (putc)(x, fp)) #define getchar() getc(stdin) #define putchar(x) putc(x, stdout) #if __BSD_VISIBLE /* * See ISO/IEC 9945-1 ANSI/IEEE Std 1003.1 Second Edition 1996-07-12 * B.8.2.7 for the rationale behind the *_unlocked() macros. */ #define feof_unlocked(p) __sfeof(p) #define ferror_unlocked(p) __sferror(p) #define clearerr_unlocked(p) __sclearerr(p) #define fileno_unlocked(p) __sfileno(p) #endif #if __POSIX_VISIBLE >= 199506 #define getc_unlocked(fp) __sgetc(fp) #define putc_unlocked(x, fp) __sputc(x, fp) #define getchar_unlocked() getc_unlocked(stdin) #define putchar_unlocked(x) putc_unlocked(x, stdout) #endif #endif /* __cplusplus */ __END_DECLS #endif /* !_STDIO_H_ */ Index: user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.3 =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.3 (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.3 (revision 303642) @@ -1,287 +1,286 @@ .\" Copyright (c) 1989, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" From: @(#)getgrent.3 8.2 (Berkeley) 4/19/94 .\" $FreeBSD$ .\" -.Dd April 16, 2003 +.Dd July 31, 2016 .Dt GETGRENT 3 .Os .Sh NAME .Nm getgrent , .Nm getgrent_r , .Nm getgrnam , .Nm getgrnam_r , .Nm getgrgid , .Nm getgrgid_r , .Nm setgroupent , .Nm setgrent , .Nm endgrent .Nd group database operations .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In grp.h .Ft struct group * .Fn getgrent void .Ft int .Fn getgrent_r "struct group *grp" "char *buffer" "size_t bufsize" "struct group **result" .Ft struct group * .Fn getgrnam "const char *name" .Ft int .Fn getgrnam_r "const char *name" "struct group *grp" "char *buffer" "size_t bufsize" "struct group **result" .Ft struct group * .Fn getgrgid "gid_t gid" .Ft int .Fn getgrgid_r "gid_t gid" "struct group *grp" "char *buffer" "size_t bufsize" "struct group **result" .Ft int .Fn setgroupent "int stayopen" -.Ft int +.Ft void .Fn setgrent void .Ft void .Fn endgrent void .Sh DESCRIPTION These functions operate on the group database file .Pa /etc/group which is described in .Xr group 5 . Each line of the database is defined by the structure .Vt group found in the include file .In grp.h : .Bd -literal -offset indent struct group { char *gr_name; /* group name */ char *gr_passwd; /* group password */ gid_t gr_gid; /* group id */ char **gr_mem; /* group members */ }; .Ed .Pp The functions .Fn getgrnam and .Fn getgrgid search the group database for the given group name pointed to by .Fa name or the group id pointed to by .Fa gid , respectively, returning the first one encountered. Identical group names or group gids may result in undefined behavior. .Pp The .Fn getgrent function sequentially reads the group database and is intended for programs that wish to step through the complete list of groups. .Pp The functions .Fn getgrent_r , .Fn getgrnam_r , and .Fn getgrgid_r are thread-safe versions of .Fn getgrent , .Fn getgrnam , and .Fn getgrgid , respectively. The caller must provide storage for the results of the search in the .Fa grp , .Fa buffer , .Fa bufsize , and .Fa result arguments. When these functions are successful, the .Fa grp argument will be filled-in, and a pointer to that argument will be stored in .Fa result . If an entry is not found or an error occurs, .Fa result will be set to .Dv NULL . .Pp These functions will open the group file for reading, if necessary. .Pp The .Fn setgroupent function opens the file, or rewinds it if it is already open. If .Fa stayopen is non-zero, file descriptors are left open, significantly speeding functions subsequent calls. This functionality is unnecessary for .Fn getgrent as it does not close its file descriptors by default. It should also be noted that it is dangerous for long-running programs to use this functionality as the group file may be updated. .Pp The .Fn setgrent function is identical to .Fn setgroupent with an argument of zero. .Pp The .Fn endgrent function closes any open files. .Sh RETURN VALUES The functions .Fn getgrent , .Fn getgrnam , and .Fn getgrgid , return a pointer to a group structure on success or .Dv NULL if the entry is not found or if an error occurs. If an error does occur, .Va errno will be set. Note that programs must explicitly set .Va errno to zero before calling any of these functions if they need to distinguish between a non-existent entry and an error. The functions .Fn getgrent_r , .Fn getgrnam_r , and .Fn getgrgid_r return 0 if no error occurred, or an error number to indicate failure. It is not an error if a matching entry is not found. (Thus, if .Fa result is set to .Dv NULL and the return value is 0, no matching entry exists.) .Pp -The functions +The function .Fn setgroupent -and -.Fn setgrent -return the value 1 if successful, otherwise the value +returns the value 1 if successful, otherwise the value 0 is returned. The functions -.Fn endgrent +.Fn endgrent , +.Fn setgrent and .Fn setgrfile have no return value. .Sh FILES .Bl -tag -width /etc/group -compact .It Pa /etc/group group database file .El .Sh COMPATIBILITY The historic function .Fn setgrfile , which allowed the specification of alternate password databases, has been deprecated and is no longer available. .Sh SEE ALSO .Xr getpwent 3 , .Xr group 5 , .Xr nsswitch.conf 5 , .Xr yp 8 .Sh STANDARDS The .Fn getgrent , .Fn getgrnam , .Fn getgrnam_r , .Fn getgrgid , .Fn getgrgid_r and .Fn endgrent functions conform to .St -p1003.1-96 . The .Fn setgrent function differs from that standard in that its return type is .Vt int rather than .Vt void . .Sh HISTORY The functions .Fn endgrent , .Fn getgrent , .Fn getgrnam , .Fn getgrgid , and .Fn setgrent appeared in .At v7 . The functions .Fn setgrfile and .Fn setgroupent appeared in .Bx 4.3 Reno . The functions .Fn getgrent_r , .Fn getgrnam_r , and .Fn getgrgid_r appeared in .Fx 5.1 . .Sh BUGS The functions .Fn getgrent , .Fn getgrnam , .Fn getgrgid , .Fn setgroupent and .Fn setgrent leave their results in an internal static object and return a pointer to that object. Subsequent calls to the same function will modify the same object. .Pp The functions .Fn getgrent , .Fn getgrent_r , .Fn endgrent , .Fn setgroupent , and .Fn setgrent are fairly useless in a networked environment and should be avoided, if possible. The .Fn getgrent and .Fn getgrent_r functions make no attempt to suppress duplicate information if multiple sources are specified in .Xr nsswitch.conf 5 . Index: user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.c (revision 303642) @@ -1,1554 +1,1552 @@ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by * Jacques A. Vidrine, Safeport Network Services, and Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 * ("CBOSS"), as part of the DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #ifdef YP #include #include #include #endif #include #include #include #ifdef HESIOD #include #endif #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "libc_private.h" #include "nss_tls.h" #ifdef NS_CACHING #include "nscache.h" #endif enum constants { GRP_STORAGE_INITIAL = 1 << 10, /* 1 KByte */ GRP_STORAGE_MAX = 1 << 20, /* 1 MByte */ SETGRENT = 1, ENDGRENT = 2, HESIOD_NAME_MAX = 256, }; static const ns_src defaultsrc[] = { { NSSRC_COMPAT, NS_SUCCESS }, { NULL, 0 } }; int __getgroupmembership(const char *, gid_t, gid_t *, int, int *); int __gr_match_entry(const char *, size_t, enum nss_lookup_type, const char *, gid_t); int __gr_parse_entry(char *, size_t, struct group *, char *, size_t, int *); static int is_comment_line(const char *, size_t); union key { const char *name; gid_t gid; }; static struct group *getgr(int (*)(union key, struct group *, char *, size_t, struct group **), union key); static int wrap_getgrnam_r(union key, struct group *, char *, size_t, struct group **); static int wrap_getgrgid_r(union key, struct group *, char *, size_t, struct group **); static int wrap_getgrent_r(union key, struct group *, char *, size_t, struct group **); struct files_state { FILE *fp; int stayopen; }; static void files_endstate(void *); NSS_TLS_HANDLING(files); static int files_setgrent(void *, void *, va_list); static int files_group(void *, void *, va_list); #ifdef HESIOD struct dns_state { long counter; }; static void dns_endstate(void *); NSS_TLS_HANDLING(dns); static int dns_setgrent(void *, void *, va_list); static int dns_group(void *, void *, va_list); #endif #ifdef YP struct nis_state { char domain[MAXHOSTNAMELEN]; int done; char *key; int keylen; }; static void nis_endstate(void *); NSS_TLS_HANDLING(nis); static int nis_setgrent(void *, void *, va_list); static int nis_group(void *, void *, va_list); #endif struct compat_state { FILE *fp; int stayopen; char *name; enum _compat { COMPAT_MODE_OFF = 0, COMPAT_MODE_ALL, COMPAT_MODE_NAME } compat; }; static void compat_endstate(void *); NSS_TLS_HANDLING(compat); static int compat_setgrent(void *, void *, va_list); static int compat_group(void *, void *, va_list); static int gr_addgid(gid_t, gid_t *, int, int *); static int getgroupmembership_fallback(void *, void *, va_list); #ifdef NS_CACHING static int grp_id_func(char *, size_t *, va_list, void *); static int grp_marshal_func(char *, size_t *, void *, va_list, void *); static int grp_unmarshal_func(char *, size_t, void *, va_list, void *); static int grp_id_func(char *buffer, size_t *buffer_size, va_list ap, void *cache_mdata) { char *name; gid_t gid; size_t desired_size, size; int res = NS_UNAVAIL; enum nss_lookup_type lookup_type; lookup_type = (enum nss_lookup_type)cache_mdata; switch (lookup_type) { case nss_lt_name: name = va_arg(ap, char *); size = strlen(name); desired_size = sizeof(enum nss_lookup_type) + size + 1; if (desired_size > *buffer_size) { res = NS_RETURN; goto fin; } memcpy(buffer, &lookup_type, sizeof(enum nss_lookup_type)); memcpy(buffer + sizeof(enum nss_lookup_type), name, size + 1); res = NS_SUCCESS; break; case nss_lt_id: gid = va_arg(ap, gid_t); desired_size = sizeof(enum nss_lookup_type) + sizeof(gid_t); if (desired_size > *buffer_size) { res = NS_RETURN; goto fin; } memcpy(buffer, &lookup_type, sizeof(enum nss_lookup_type)); memcpy(buffer + sizeof(enum nss_lookup_type), &gid, sizeof(gid_t)); res = NS_SUCCESS; break; default: /* should be unreachable */ return (NS_UNAVAIL); } fin: *buffer_size = desired_size; return (res); } static int grp_marshal_func(char *buffer, size_t *buffer_size, void *retval, va_list ap, void *cache_mdata) { char *name; gid_t gid; struct group *grp; char *orig_buf; size_t orig_buf_size; struct group new_grp; size_t desired_size, size, mem_size; char *p, **mem; switch ((enum nss_lookup_type)cache_mdata) { case nss_lt_name: name = va_arg(ap, char *); break; case nss_lt_id: gid = va_arg(ap, gid_t); break; case nss_lt_all: break; default: /* should be unreachable */ return (NS_UNAVAIL); } grp = va_arg(ap, struct group *); orig_buf = va_arg(ap, char *); orig_buf_size = va_arg(ap, size_t); desired_size = _ALIGNBYTES + sizeof(struct group) + sizeof(char *); if (grp->gr_name != NULL) desired_size += strlen(grp->gr_name) + 1; if (grp->gr_passwd != NULL) desired_size += strlen(grp->gr_passwd) + 1; if (grp->gr_mem != NULL) { mem_size = 0; for (mem = grp->gr_mem; *mem; ++mem) { desired_size += strlen(*mem) + 1; ++mem_size; } desired_size += _ALIGNBYTES + (mem_size + 1) * sizeof(char *); } if (desired_size > *buffer_size) { /* this assignment is here for future use */ *buffer_size = desired_size; return (NS_RETURN); } memcpy(&new_grp, grp, sizeof(struct group)); memset(buffer, 0, desired_size); *buffer_size = desired_size; p = buffer + sizeof(struct group) + sizeof(char *); memcpy(buffer + sizeof(struct group), &p, sizeof(char *)); p = (char *)_ALIGN(p); if (new_grp.gr_name != NULL) { size = strlen(new_grp.gr_name); memcpy(p, new_grp.gr_name, size); new_grp.gr_name = p; p += size + 1; } if (new_grp.gr_passwd != NULL) { size = strlen(new_grp.gr_passwd); memcpy(p, new_grp.gr_passwd, size); new_grp.gr_passwd = p; p += size + 1; } if (new_grp.gr_mem != NULL) { p = (char *)_ALIGN(p); memcpy(p, new_grp.gr_mem, sizeof(char *) * mem_size); new_grp.gr_mem = (char **)p; p += sizeof(char *) * (mem_size + 1); for (mem = new_grp.gr_mem; *mem; ++mem) { size = strlen(*mem); memcpy(p, *mem, size); *mem = p; p += size + 1; } } memcpy(buffer, &new_grp, sizeof(struct group)); return (NS_SUCCESS); } static int grp_unmarshal_func(char *buffer, size_t buffer_size, void *retval, va_list ap, void *cache_mdata) { char *name; gid_t gid; struct group *grp; char *orig_buf; size_t orig_buf_size; int *ret_errno; char *p; char **mem; switch ((enum nss_lookup_type)cache_mdata) { case nss_lt_name: name = va_arg(ap, char *); break; case nss_lt_id: gid = va_arg(ap, gid_t); break; case nss_lt_all: break; default: /* should be unreachable */ return (NS_UNAVAIL); } grp = va_arg(ap, struct group *); orig_buf = va_arg(ap, char *); orig_buf_size = va_arg(ap, size_t); ret_errno = va_arg(ap, int *); if (orig_buf_size < buffer_size - sizeof(struct group) - sizeof(char *)) { *ret_errno = ERANGE; return (NS_RETURN); } memcpy(grp, buffer, sizeof(struct group)); memcpy(&p, buffer + sizeof(struct group), sizeof(char *)); orig_buf = (char *)_ALIGN(orig_buf); memcpy(orig_buf, buffer + sizeof(struct group) + sizeof(char *) + _ALIGN(p) - (size_t)p, buffer_size - sizeof(struct group) - sizeof(char *) - _ALIGN(p) + (size_t)p); p = (char *)_ALIGN(p); NS_APPLY_OFFSET(grp->gr_name, orig_buf, p, char *); NS_APPLY_OFFSET(grp->gr_passwd, orig_buf, p, char *); if (grp->gr_mem != NULL) { NS_APPLY_OFFSET(grp->gr_mem, orig_buf, p, char **); for (mem = grp->gr_mem; *mem; ++mem) NS_APPLY_OFFSET(*mem, orig_buf, p, char *); } if (retval != NULL) *((struct group **)retval) = grp; return (NS_SUCCESS); } NSS_MP_CACHE_HANDLING(group); #endif /* NS_CACHING */ #ifdef NS_CACHING static const nss_cache_info setgrent_cache_info = NS_MP_CACHE_INFO_INITIALIZER( group, (void *)nss_lt_all, NULL, NULL); #endif static const ns_dtab setgrent_dtab[] = { { NSSRC_FILES, files_setgrent, (void *)SETGRENT }, #ifdef HESIOD { NSSRC_DNS, dns_setgrent, (void *)SETGRENT }, #endif #ifdef YP { NSSRC_NIS, nis_setgrent, (void *)SETGRENT }, #endif { NSSRC_COMPAT, compat_setgrent, (void *)SETGRENT }, #ifdef NS_CACHING NS_CACHE_CB(&setgrent_cache_info) #endif { NULL, NULL, NULL } }; #ifdef NS_CACHING static const nss_cache_info endgrent_cache_info = NS_MP_CACHE_INFO_INITIALIZER( group, (void *)nss_lt_all, NULL, NULL); #endif static const ns_dtab endgrent_dtab[] = { { NSSRC_FILES, files_setgrent, (void *)ENDGRENT }, #ifdef HESIOD { NSSRC_DNS, dns_setgrent, (void *)ENDGRENT }, #endif #ifdef YP { NSSRC_NIS, nis_setgrent, (void *)ENDGRENT }, #endif { NSSRC_COMPAT, compat_setgrent, (void *)ENDGRENT }, #ifdef NS_CACHING NS_CACHE_CB(&endgrent_cache_info) #endif { NULL, NULL, NULL } }; #ifdef NS_CACHING static const nss_cache_info getgrent_r_cache_info = NS_MP_CACHE_INFO_INITIALIZER( group, (void *)nss_lt_all, grp_marshal_func, grp_unmarshal_func); #endif static const ns_dtab getgrent_r_dtab[] = { { NSSRC_FILES, files_group, (void *)nss_lt_all }, #ifdef HESIOD { NSSRC_DNS, dns_group, (void *)nss_lt_all }, #endif #ifdef YP { NSSRC_NIS, nis_group, (void *)nss_lt_all }, #endif { NSSRC_COMPAT, compat_group, (void *)nss_lt_all }, #ifdef NS_CACHING NS_CACHE_CB(&getgrent_r_cache_info) #endif { NULL, NULL, NULL } }; static int gr_addgid(gid_t gid, gid_t *groups, int maxgrp, int *grpcnt) { int ret, dupc; for (dupc = 0; dupc < MIN(maxgrp, *grpcnt); dupc++) { if (groups[dupc] == gid) return 1; } ret = 1; if (*grpcnt < maxgrp) groups[*grpcnt] = gid; else ret = 0; (*grpcnt)++; return ret; } static int getgroupmembership_fallback(void *retval, void *mdata, va_list ap) { const ns_src src[] = { { mdata, NS_SUCCESS }, { NULL, 0} }; struct group grp; struct group *grp_p; char *buf; size_t bufsize; const char *uname; gid_t *groups; gid_t agroup; int maxgrp, *grpcnt; int i, rv, ret_errno; /* * As this is a fallback method, only provided src * list will be respected during methods search. */ assert(src[0].name != NULL); uname = va_arg(ap, const char *); agroup = va_arg(ap, gid_t); groups = va_arg(ap, gid_t *); maxgrp = va_arg(ap, int); grpcnt = va_arg(ap, int *); rv = NS_UNAVAIL; buf = malloc(GRP_STORAGE_INITIAL); if (buf == NULL) goto out; bufsize = GRP_STORAGE_INITIAL; gr_addgid(agroup, groups, maxgrp, grpcnt); _nsdispatch(NULL, setgrent_dtab, NSDB_GROUP, "setgrent", src, 0); for (;;) { do { ret_errno = 0; grp_p = NULL; rv = _nsdispatch(&grp_p, getgrent_r_dtab, NSDB_GROUP, "getgrent_r", src, &grp, buf, bufsize, &ret_errno); if (grp_p == NULL && ret_errno == ERANGE) { free(buf); if ((bufsize << 1) > GRP_STORAGE_MAX) { buf = NULL; errno = ERANGE; goto out; } bufsize <<= 1; buf = malloc(bufsize); if (buf == NULL) { goto out; } } } while (grp_p == NULL && ret_errno == ERANGE); if (ret_errno != 0) { errno = ret_errno; goto out; } if (grp_p == NULL) break; for (i = 0; grp.gr_mem[i]; i++) { if (strcmp(grp.gr_mem[i], uname) == 0) gr_addgid(grp.gr_gid, groups, maxgrp, grpcnt); } } _nsdispatch(NULL, endgrent_dtab, NSDB_GROUP, "endgrent", src); out: free(buf); return (rv); } -/* XXX IEEE Std 1003.1, 2003 specifies `void setgrent(void)' */ -int +void setgrent(void) { (void)_nsdispatch(NULL, setgrent_dtab, NSDB_GROUP, "setgrent", defaultsrc, 0); - return (1); } int setgroupent(int stayopen) { (void)_nsdispatch(NULL, setgrent_dtab, NSDB_GROUP, "setgrent", defaultsrc, stayopen); return (1); } void endgrent(void) { (void)_nsdispatch(NULL, endgrent_dtab, NSDB_GROUP, "endgrent", defaultsrc); } int getgrent_r(struct group *grp, char *buffer, size_t bufsize, struct group **result) { int rv, ret_errno; ret_errno = 0; *result = NULL; rv = _nsdispatch(result, getgrent_r_dtab, NSDB_GROUP, "getgrent_r", defaultsrc, grp, buffer, bufsize, &ret_errno); if (rv == NS_SUCCESS) return (0); else return (ret_errno); } int getgrnam_r(const char *name, struct group *grp, char *buffer, size_t bufsize, struct group **result) { #ifdef NS_CACHING static const nss_cache_info cache_info = NS_COMMON_CACHE_INFO_INITIALIZER( group, (void *)nss_lt_name, grp_id_func, grp_marshal_func, grp_unmarshal_func); #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_group, (void *)nss_lt_name }, #ifdef HESIOD { NSSRC_DNS, dns_group, (void *)nss_lt_name }, #endif #ifdef YP { NSSRC_NIS, nis_group, (void *)nss_lt_name }, #endif { NSSRC_COMPAT, compat_group, (void *)nss_lt_name }, #ifdef NS_CACHING NS_CACHE_CB(&cache_info) #endif { NULL, NULL, NULL } }; int rv, ret_errno; ret_errno = 0; *result = NULL; rv = _nsdispatch(result, dtab, NSDB_GROUP, "getgrnam_r", defaultsrc, name, grp, buffer, bufsize, &ret_errno); if (rv == NS_SUCCESS) return (0); else return (ret_errno); } int getgrgid_r(gid_t gid, struct group *grp, char *buffer, size_t bufsize, struct group **result) { #ifdef NS_CACHING static const nss_cache_info cache_info = NS_COMMON_CACHE_INFO_INITIALIZER( group, (void *)nss_lt_id, grp_id_func, grp_marshal_func, grp_unmarshal_func); #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_group, (void *)nss_lt_id }, #ifdef HESIOD { NSSRC_DNS, dns_group, (void *)nss_lt_id }, #endif #ifdef YP { NSSRC_NIS, nis_group, (void *)nss_lt_id }, #endif { NSSRC_COMPAT, compat_group, (void *)nss_lt_id }, #ifdef NS_CACHING NS_CACHE_CB(&cache_info) #endif { NULL, NULL, NULL } }; int rv, ret_errno; ret_errno = 0; *result = NULL; rv = _nsdispatch(result, dtab, NSDB_GROUP, "getgrgid_r", defaultsrc, gid, grp, buffer, bufsize, &ret_errno); if (rv == NS_SUCCESS) return (0); else return (ret_errno); } int __getgroupmembership(const char *uname, gid_t agroup, gid_t *groups, int maxgrp, int *grpcnt) { static const ns_dtab dtab[] = { NS_FALLBACK_CB(getgroupmembership_fallback) { NULL, NULL, NULL } }; assert(uname != NULL); /* groups may be NULL if just sizing when invoked with maxgrp = 0 */ assert(grpcnt != NULL); *grpcnt = 0; (void)_nsdispatch(NULL, dtab, NSDB_GROUP, "getgroupmembership", defaultsrc, uname, agroup, groups, maxgrp, grpcnt); /* too many groups found? */ return (*grpcnt > maxgrp ? -1 : 0); } static struct group grp; static char *grp_storage; static size_t grp_storage_size; static struct group * getgr(int (*fn)(union key, struct group *, char *, size_t, struct group **), union key key) { int rv; struct group *res; if (grp_storage == NULL) { grp_storage = malloc(GRP_STORAGE_INITIAL); if (grp_storage == NULL) return (NULL); grp_storage_size = GRP_STORAGE_INITIAL; } do { rv = fn(key, &grp, grp_storage, grp_storage_size, &res); if (res == NULL && rv == ERANGE) { free(grp_storage); if ((grp_storage_size << 1) > GRP_STORAGE_MAX) { grp_storage = NULL; errno = ERANGE; return (NULL); } grp_storage_size <<= 1; grp_storage = malloc(grp_storage_size); if (grp_storage == NULL) return (NULL); } } while (res == NULL && rv == ERANGE); if (rv != 0) errno = rv; return (res); } static int wrap_getgrnam_r(union key key, struct group *grp, char *buffer, size_t bufsize, struct group **res) { return (getgrnam_r(key.name, grp, buffer, bufsize, res)); } static int wrap_getgrgid_r(union key key, struct group *grp, char *buffer, size_t bufsize, struct group **res) { return (getgrgid_r(key.gid, grp, buffer, bufsize, res)); } static int wrap_getgrent_r(union key key __unused, struct group *grp, char *buffer, size_t bufsize, struct group **res) { return (getgrent_r(grp, buffer, bufsize, res)); } struct group * getgrnam(const char *name) { union key key; key.name = name; return (getgr(wrap_getgrnam_r, key)); } struct group * getgrgid(gid_t gid) { union key key; key.gid = gid; return (getgr(wrap_getgrgid_r, key)); } struct group * getgrent(void) { union key key; key.gid = 0; /* not used */ return (getgr(wrap_getgrent_r, key)); } static int is_comment_line(const char *s, size_t n) { const char *eom; eom = &s[n]; for (; s < eom; s++) if (*s == '#' || !isspace((unsigned char)*s)) break; return (*s == '#' || s == eom); } /* * files backend */ static void files_endstate(void *p) { if (p == NULL) return; if (((struct files_state *)p)->fp != NULL) fclose(((struct files_state *)p)->fp); free(p); } static int files_setgrent(void *retval, void *mdata, va_list ap) { struct files_state *st; int rv, stayopen; rv = files_getstate(&st); if (rv != 0) return (NS_UNAVAIL); switch ((enum constants)mdata) { case SETGRENT: stayopen = va_arg(ap, int); if (st->fp != NULL) rewind(st->fp); else if (stayopen) st->fp = fopen(_PATH_GROUP, "re"); break; case ENDGRENT: if (st->fp != NULL) { fclose(st->fp); st->fp = NULL; } break; default: break; } return (NS_UNAVAIL); } static int files_group(void *retval, void *mdata, va_list ap) { struct files_state *st; enum nss_lookup_type how; const char *name, *line; struct group *grp; gid_t gid; char *buffer; size_t bufsize, linesize; off_t pos; int rv, stayopen, *errnop; name = NULL; gid = (gid_t)-1; how = (enum nss_lookup_type)mdata; switch (how) { case nss_lt_name: name = va_arg(ap, const char *); break; case nss_lt_id: gid = va_arg(ap, gid_t); break; case nss_lt_all: break; default: return (NS_NOTFOUND); } grp = va_arg(ap, struct group *); buffer = va_arg(ap, char *); bufsize = va_arg(ap, size_t); errnop = va_arg(ap, int *); *errnop = files_getstate(&st); if (*errnop != 0) return (NS_UNAVAIL); if (st->fp == NULL && ((st->fp = fopen(_PATH_GROUP, "re")) == NULL)) { *errnop = errno; return (NS_UNAVAIL); } if (how == nss_lt_all) stayopen = 1; else { rewind(st->fp); stayopen = st->stayopen; } rv = NS_NOTFOUND; pos = ftello(st->fp); while ((line = fgetln(st->fp, &linesize)) != NULL) { if (line[linesize-1] == '\n') linesize--; rv = __gr_match_entry(line, linesize, how, name, gid); if (rv != NS_SUCCESS) continue; /* We need room at least for the line, a string NUL * terminator, alignment padding, and one (char *) * pointer for the member list terminator. */ if (bufsize <= linesize + _ALIGNBYTES + sizeof(char *)) { *errnop = ERANGE; rv = NS_RETURN; break; } memcpy(buffer, line, linesize); buffer[linesize] = '\0'; rv = __gr_parse_entry(buffer, linesize, grp, &buffer[linesize + 1], bufsize - linesize - 1, errnop); if (rv & NS_TERMINATE) break; pos = ftello(st->fp); } if (st->fp != NULL && !stayopen) { fclose(st->fp); st->fp = NULL; } if (rv == NS_SUCCESS && retval != NULL) *(struct group **)retval = grp; else if (rv == NS_RETURN && *errnop == ERANGE && st->fp != NULL) fseeko(st->fp, pos, SEEK_SET); return (rv); } #ifdef HESIOD /* * dns backend */ static void dns_endstate(void *p) { free(p); } static int dns_setgrent(void *retval, void *cb_data, va_list ap) { struct dns_state *st; int rv; rv = dns_getstate(&st); if (rv != 0) return (NS_UNAVAIL); st->counter = 0; return (NS_UNAVAIL); } static int dns_group(void *retval, void *mdata, va_list ap) { char buf[HESIOD_NAME_MAX]; struct dns_state *st; struct group *grp; const char *name, *label; void *ctx; char *buffer, **hes; size_t bufsize, adjsize, linesize; gid_t gid; enum nss_lookup_type how; int rv, *errnop; ctx = NULL; hes = NULL; name = NULL; gid = (gid_t)-1; how = (enum nss_lookup_type)mdata; switch (how) { case nss_lt_name: name = va_arg(ap, const char *); break; case nss_lt_id: gid = va_arg(ap, gid_t); break; case nss_lt_all: break; } grp = va_arg(ap, struct group *); buffer = va_arg(ap, char *); bufsize = va_arg(ap, size_t); errnop = va_arg(ap, int *); *errnop = dns_getstate(&st); if (*errnop != 0) return (NS_UNAVAIL); if (hesiod_init(&ctx) != 0) { *errnop = errno; rv = NS_UNAVAIL; goto fin; } do { rv = NS_NOTFOUND; switch (how) { case nss_lt_name: label = name; break; case nss_lt_id: if (snprintf(buf, sizeof(buf), "%lu", (unsigned long)gid) >= sizeof(buf)) goto fin; label = buf; break; case nss_lt_all: if (st->counter < 0) goto fin; if (snprintf(buf, sizeof(buf), "group-%ld", st->counter++) >= sizeof(buf)) goto fin; label = buf; break; } hes = hesiod_resolve(ctx, label, how == nss_lt_id ? "gid" : "group"); if ((how == nss_lt_id && hes == NULL && (hes = hesiod_resolve(ctx, buf, "group")) == NULL) || hes == NULL) { if (how == nss_lt_all) st->counter = -1; if (errno != ENOENT) *errnop = errno; goto fin; } rv = __gr_match_entry(hes[0], strlen(hes[0]), how, name, gid); if (rv != NS_SUCCESS) { hesiod_free_list(ctx, hes); hes = NULL; continue; } /* We need room at least for the line, a string NUL * terminator, alignment padding, and one (char *) * pointer for the member list terminator. */ adjsize = bufsize - _ALIGNBYTES - sizeof(char *); linesize = strlcpy(buffer, hes[0], adjsize); if (linesize >= adjsize) { *errnop = ERANGE; rv = NS_RETURN; goto fin; } hesiod_free_list(ctx, hes); hes = NULL; rv = __gr_parse_entry(buffer, linesize, grp, &buffer[linesize + 1], bufsize - linesize - 1, errnop); } while (how == nss_lt_all && !(rv & NS_TERMINATE)); fin: if (hes != NULL) hesiod_free_list(ctx, hes); if (ctx != NULL) hesiod_end(ctx); if (rv == NS_SUCCESS && retval != NULL) *(struct group **)retval = grp; return (rv); } #endif /* HESIOD */ #ifdef YP /* * nis backend */ static void nis_endstate(void *p) { if (p == NULL) return; free(((struct nis_state *)p)->key); free(p); } static int nis_setgrent(void *retval, void *cb_data, va_list ap) { struct nis_state *st; int rv; rv = nis_getstate(&st); if (rv != 0) return (NS_UNAVAIL); st->done = 0; free(st->key); st->key = NULL; return (NS_UNAVAIL); } static int nis_group(void *retval, void *mdata, va_list ap) { char *map; struct nis_state *st; struct group *grp; const char *name; char *buffer, *key, *result; size_t bufsize; gid_t gid; enum nss_lookup_type how; int *errnop, keylen, resultlen, rv; name = NULL; gid = (gid_t)-1; how = (enum nss_lookup_type)mdata; switch (how) { case nss_lt_name: name = va_arg(ap, const char *); map = "group.byname"; break; case nss_lt_id: gid = va_arg(ap, gid_t); map = "group.bygid"; break; case nss_lt_all: map = "group.byname"; break; } grp = va_arg(ap, struct group *); buffer = va_arg(ap, char *); bufsize = va_arg(ap, size_t); errnop = va_arg(ap, int *); *errnop = nis_getstate(&st); if (*errnop != 0) return (NS_UNAVAIL); if (st->domain[0] == '\0') { if (getdomainname(st->domain, sizeof(st->domain)) != 0) { *errnop = errno; return (NS_UNAVAIL); } } result = NULL; do { rv = NS_NOTFOUND; switch (how) { case nss_lt_name: if (strlcpy(buffer, name, bufsize) >= bufsize) goto erange; break; case nss_lt_id: if (snprintf(buffer, bufsize, "%lu", (unsigned long)gid) >= bufsize) goto erange; break; case nss_lt_all: if (st->done) goto fin; break; } result = NULL; if (how == nss_lt_all) { if (st->key == NULL) rv = yp_first(st->domain, map, &st->key, &st->keylen, &result, &resultlen); else { key = st->key; keylen = st->keylen; st->key = NULL; rv = yp_next(st->domain, map, key, keylen, &st->key, &st->keylen, &result, &resultlen); free(key); } if (rv != 0) { free(result); free(st->key); st->key = NULL; if (rv == YPERR_NOMORE) { st->done = 1; rv = NS_NOTFOUND; } else rv = NS_UNAVAIL; goto fin; } } else { rv = yp_match(st->domain, map, buffer, strlen(buffer), &result, &resultlen); if (rv == YPERR_KEY) { rv = NS_NOTFOUND; continue; } else if (rv != 0) { free(result); rv = NS_UNAVAIL; continue; } } /* We need room at least for the line, a string NUL * terminator, alignment padding, and one (char *) * pointer for the member list terminator. */ if (resultlen >= bufsize - _ALIGNBYTES - sizeof(char *)) { free(result); goto erange; } memcpy(buffer, result, resultlen); buffer[resultlen] = '\0'; free(result); rv = __gr_match_entry(buffer, resultlen, how, name, gid); if (rv == NS_SUCCESS) rv = __gr_parse_entry(buffer, resultlen, grp, &buffer[resultlen+1], bufsize - resultlen - 1, errnop); } while (how == nss_lt_all && !(rv & NS_TERMINATE)); fin: if (rv == NS_SUCCESS && retval != NULL) *(struct group **)retval = grp; return (rv); erange: *errnop = ERANGE; return (NS_RETURN); } #endif /* YP */ /* * compat backend */ static void compat_endstate(void *p) { struct compat_state *st; if (p == NULL) return; st = (struct compat_state *)p; free(st->name); if (st->fp != NULL) fclose(st->fp); free(p); } static int compat_setgrent(void *retval, void *mdata, va_list ap) { static const ns_src compatsrc[] = { #ifdef YP { NSSRC_NIS, NS_SUCCESS }, #endif { NULL, 0 } }; ns_dtab dtab[] = { #ifdef HESIOD { NSSRC_DNS, dns_setgrent, NULL }, #endif #ifdef YP { NSSRC_NIS, nis_setgrent, NULL }, #endif { NULL, NULL, NULL } }; struct compat_state *st; int rv, stayopen; #define set_setent(x, y) do { \ int i; \ for (i = 0; i < (int)(nitems(x) - 1); i++) \ x[i].mdata = (void *)y; \ } while (0) rv = compat_getstate(&st); if (rv != 0) return (NS_UNAVAIL); switch ((enum constants)mdata) { case SETGRENT: stayopen = va_arg(ap, int); if (st->fp != NULL) rewind(st->fp); else if (stayopen) st->fp = fopen(_PATH_GROUP, "re"); set_setent(dtab, mdata); (void)_nsdispatch(NULL, dtab, NSDB_GROUP_COMPAT, "setgrent", compatsrc, 0); break; case ENDGRENT: if (st->fp != NULL) { fclose(st->fp); st->fp = NULL; } set_setent(dtab, mdata); (void)_nsdispatch(NULL, dtab, NSDB_GROUP_COMPAT, "endgrent", compatsrc, 0); break; default: break; } st->compat = COMPAT_MODE_OFF; free(st->name); st->name = NULL; return (NS_UNAVAIL); #undef set_setent } static int compat_group(void *retval, void *mdata, va_list ap) { static const ns_src compatsrc[] = { #ifdef YP { NSSRC_NIS, NS_SUCCESS }, #endif { NULL, 0 } }; ns_dtab dtab[] = { #ifdef YP { NSSRC_NIS, nis_group, NULL }, #endif #ifdef HESIOD { NSSRC_DNS, dns_group, NULL }, #endif { NULL, NULL, NULL } }; struct compat_state *st; enum nss_lookup_type how; const char *name, *line; struct group *grp; gid_t gid; char *buffer, *p; void *discard; size_t bufsize, linesize; off_t pos; int rv, stayopen, *errnop; #define set_lookup_type(x, y) do { \ int i; \ for (i = 0; i < (int)(nitems(x) - 1); i++) \ x[i].mdata = (void *)y; \ } while (0) name = NULL; gid = (gid_t)-1; how = (enum nss_lookup_type)mdata; switch (how) { case nss_lt_name: name = va_arg(ap, const char *); break; case nss_lt_id: gid = va_arg(ap, gid_t); break; case nss_lt_all: break; default: return (NS_NOTFOUND); } grp = va_arg(ap, struct group *); buffer = va_arg(ap, char *); bufsize = va_arg(ap, size_t); errnop = va_arg(ap, int *); *errnop = compat_getstate(&st); if (*errnop != 0) return (NS_UNAVAIL); if (st->fp == NULL && ((st->fp = fopen(_PATH_GROUP, "re")) == NULL)) { *errnop = errno; rv = NS_UNAVAIL; goto fin; } if (how == nss_lt_all) stayopen = 1; else { rewind(st->fp); stayopen = st->stayopen; } docompat: switch (st->compat) { case COMPAT_MODE_ALL: set_lookup_type(dtab, how); switch (how) { case nss_lt_all: rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT, "getgrent_r", compatsrc, grp, buffer, bufsize, errnop); break; case nss_lt_id: rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT, "getgrgid_r", compatsrc, gid, grp, buffer, bufsize, errnop); break; case nss_lt_name: rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT, "getgrnam_r", compatsrc, name, grp, buffer, bufsize, errnop); break; } if (rv & NS_TERMINATE) goto fin; st->compat = COMPAT_MODE_OFF; break; case COMPAT_MODE_NAME: set_lookup_type(dtab, nss_lt_name); rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT, "getgrnam_r", compatsrc, st->name, grp, buffer, bufsize, errnop); switch (rv) { case NS_SUCCESS: switch (how) { case nss_lt_name: if (strcmp(name, grp->gr_name) != 0) rv = NS_NOTFOUND; break; case nss_lt_id: if (gid != grp->gr_gid) rv = NS_NOTFOUND; break; default: break; } break; case NS_RETURN: goto fin; default: break; } free(st->name); st->name = NULL; st->compat = COMPAT_MODE_OFF; if (rv == NS_SUCCESS) goto fin; break; default: break; } rv = NS_NOTFOUND; pos = ftello(st->fp); while ((line = fgetln(st->fp, &linesize)) != NULL) { if (line[linesize-1] == '\n') linesize--; if (linesize > 2 && line[0] == '+') { p = memchr(&line[1], ':', linesize); if (p == NULL || p == &line[1]) st->compat = COMPAT_MODE_ALL; else { st->name = malloc(p - line); if (st->name == NULL) { syslog(LOG_ERR, "getgrent memory allocation failure"); *errnop = ENOMEM; rv = NS_UNAVAIL; break; } memcpy(st->name, &line[1], p - line - 1); st->name[p - line - 1] = '\0'; st->compat = COMPAT_MODE_NAME; } goto docompat; } rv = __gr_match_entry(line, linesize, how, name, gid); if (rv != NS_SUCCESS) continue; /* We need room at least for the line, a string NUL * terminator, alignment padding, and one (char *) * pointer for the member list terminator. */ if (bufsize <= linesize + _ALIGNBYTES + sizeof(char *)) { *errnop = ERANGE; rv = NS_RETURN; break; } memcpy(buffer, line, linesize); buffer[linesize] = '\0'; rv = __gr_parse_entry(buffer, linesize, grp, &buffer[linesize + 1], bufsize - linesize - 1, errnop); if (rv & NS_TERMINATE) break; pos = ftello(st->fp); } fin: if (st->fp != NULL && !stayopen) { fclose(st->fp); st->fp = NULL; } if (rv == NS_SUCCESS && retval != NULL) *(struct group **)retval = grp; else if (rv == NS_RETURN && *errnop == ERANGE && st->fp != NULL) fseeko(st->fp, pos, SEEK_SET); return (rv); #undef set_lookup_type } /* * common group line matching and parsing */ int __gr_match_entry(const char *line, size_t linesize, enum nss_lookup_type how, const char *name, gid_t gid) { size_t namesize; const char *p, *eol; char *q; unsigned long n; int i, needed; if (linesize == 0 || is_comment_line(line, linesize)) return (NS_NOTFOUND); switch (how) { case nss_lt_name: needed = 1; break; case nss_lt_id: needed = 2; break; default: needed = 2; break; } eol = &line[linesize]; for (p = line, i = 0; i < needed && p < eol; p++) if (*p == ':') i++; if (i < needed) return (NS_NOTFOUND); switch (how) { case nss_lt_name: namesize = strlen(name); if (namesize + 1 == (size_t)(p - line) && memcmp(line, name, namesize) == 0) return (NS_SUCCESS); break; case nss_lt_id: n = strtoul(p, &q, 10); if (q < eol && *q == ':' && gid == (gid_t)n) return (NS_SUCCESS); break; case nss_lt_all: return (NS_SUCCESS); default: break; } return (NS_NOTFOUND); } int __gr_parse_entry(char *line, size_t linesize, struct group *grp, char *membuf, size_t membufsize, int *errnop) { char *s_gid, *s_mem, *p, **members; unsigned long n; int maxmembers; memset(grp, 0, sizeof(*grp)); members = (char **)_ALIGN(membuf); membufsize -= (char *)members - membuf; maxmembers = membufsize / sizeof(*members); if (maxmembers <= 0 || (grp->gr_name = strsep(&line, ":")) == NULL || grp->gr_name[0] == '\0' || (grp->gr_passwd = strsep(&line, ":")) == NULL || (s_gid = strsep(&line, ":")) == NULL || s_gid[0] == '\0') return (NS_NOTFOUND); s_mem = line; n = strtoul(s_gid, &s_gid, 10); if (s_gid[0] != '\0') return (NS_NOTFOUND); grp->gr_gid = (gid_t)n; grp->gr_mem = members; while (maxmembers > 1 && s_mem != NULL) { p = strsep(&s_mem, ","); if (p != NULL && *p != '\0') { *members++ = p; maxmembers--; } } *members = NULL; if (s_mem == NULL) return (NS_SUCCESS); else { *errnop = ERANGE; return (NS_RETURN); } } Index: user/alc/PQ_LAUNDRY/lib/libc/gen/glob.3 =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/gen/glob.3 (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/gen/glob.3 (revision 303642) @@ -1,457 +1,457 @@ .\" Copyright (c) 1989, 1991, 1993, 1994 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by .\" Guido van Rossum. .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)glob.3 8.3 (Berkeley) 4/16/94 .\" $FreeBSD$ .\" .Dd December 20, 2011 .Dt GLOB 3 .Os .Sh NAME .Nm glob , .Nm globfree .Nd generate pathnames matching a pattern .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In glob.h .Ft int .Fn glob "const char * restrict pattern" "int flags" "int (*errfunc)(const char *, int)" "glob_t * restrict pglob" .Ft void .Fn globfree "glob_t *pglob" .Sh DESCRIPTION The .Fn glob function is a pathname generator that implements the rules for file name pattern matching used by the shell. .Pp The include file .In glob.h defines the structure type .Fa glob_t , which contains at least the following fields: .Bd -literal typedef struct { size_t gl_pathc; /* count of total paths so far */ size_t gl_matchc; /* count of paths matching pattern */ size_t gl_offs; /* reserved at beginning of gl_pathv */ int gl_flags; /* returned flags */ char **gl_pathv; /* list of paths matching pattern */ } glob_t; .Ed .Pp The argument .Fa pattern is a pointer to a pathname pattern to be expanded. The .Fn glob argument matches all accessible pathnames against the pattern and creates a list of the pathnames that match. In order to have access to a pathname, .Fn glob requires search permission on every component of a path except the last and read permission on each directory of any filename component of .Fa pattern that contains any of the special characters .Ql * , .Ql ?\& or .Ql \&[ . .Pp The .Fn glob argument stores the number of matched pathnames into the .Fa gl_pathc field, and a pointer to a list of pointers to pathnames into the .Fa gl_pathv field. The first pointer after the last pathname is .Dv NULL . If the pattern does not match any pathnames, the returned number of matched paths is set to zero. .Pp It is the caller's responsibility to create the structure pointed to by .Fa pglob . The .Fn glob function allocates other space as needed, including the memory pointed to by .Fa gl_pathv . .Pp The argument .Fa flags is used to modify the behavior of .Fn glob . The value of .Fa flags is the bitwise inclusive .Tn OR of any of the following values defined in .In glob.h : .Bl -tag -width GLOB_ALTDIRFUNC .It Dv GLOB_APPEND Append pathnames generated to the ones from a previous call (or calls) to .Fn glob . The value of .Fa gl_pathc will be the total matches found by this call and the previous call(s). The pathnames are appended to, not merged with the pathnames returned by the previous call(s). Between calls, the caller must not change the setting of the .Dv GLOB_DOOFFS flag, nor change the value of .Fa gl_offs when .Dv GLOB_DOOFFS is set, nor (obviously) call .Fn globfree for .Fa pglob . .It Dv GLOB_DOOFFS Make use of the .Fa gl_offs field. If this flag is set, .Fa gl_offs is used to specify how many .Dv NULL pointers to prepend to the beginning of the .Fa gl_pathv field. In other words, .Fa gl_pathv will point to .Fa gl_offs .Dv NULL pointers, followed by .Fa gl_pathc pathname pointers, followed by a .Dv NULL pointer. .It Dv GLOB_ERR Causes .Fn glob to return when it encounters a directory that it cannot open or read. Ordinarily, .Fn glob continues to find matches. .It Dv GLOB_MARK Each pathname that is a directory that matches .Fa pattern has a slash appended. .It Dv GLOB_NOCHECK If .Fa pattern does not match any pathname, then .Fn glob returns a list consisting of only .Fa pattern , with the number of total pathnames set to 1, and the number of matched pathnames set to 0. The effect of backslash escaping is present in the pattern returned. .It Dv GLOB_NOESCAPE By default, a backslash .Pq Ql \e character is used to escape the following character in the pattern, avoiding any special interpretation of the character. If .Dv GLOB_NOESCAPE is set, backslash escaping is disabled. .It Dv GLOB_NOSORT By default, the pathnames are sorted in ascending collation order; this flag prevents that sorting (speeding up .Fn glob ) . .El .Pp The following values may also be included in .Fa flags , however, they are non-standard extensions to .St -p1003.2 . .Bl -tag -width GLOB_ALTDIRFUNC .It Dv GLOB_ALTDIRFUNC The following additional fields in the pglob structure have been initialized with alternate functions for glob to use to open, read, and close directories and to get stat information on names found in those directories. .Bd -literal void *(*gl_opendir)(const char * name); struct dirent *(*gl_readdir)(void *); void (*gl_closedir)(void *); int (*gl_lstat)(const char *name, struct stat *st); int (*gl_stat)(const char *name, struct stat *st); .Ed .Pp This extension is provided to allow programs such as .Xr restore 8 to provide globbing from directories stored on tape. .It Dv GLOB_BRACE Pre-process the pattern string to expand .Ql {pat,pat,...} strings like .Xr csh 1 . The pattern .Ql {} is left unexpanded for historical reasons (and .Xr csh 1 does the same thing to ease typing of .Xr find 1 patterns). .It Dv GLOB_MAGCHAR Set by the .Fn glob function if the pattern included globbing characters. See the description of the usage of the .Fa gl_matchc structure member for more details. .It Dv GLOB_NOMAGIC Is the same as .Dv GLOB_NOCHECK but it only appends the .Fa pattern if it does not contain any of the special characters ``*'', ``?'' or ``[''. .Dv GLOB_NOMAGIC is provided to simplify implementing the historic .Xr csh 1 globbing behavior and should probably not be used anywhere else. .It Dv GLOB_TILDE Expand patterns that start with .Ql ~ to user name home directories. .It Dv GLOB_LIMIT Limit the total number of returned pathnames to the value provided in .Fa gl_matchc (default .Dv ARG_MAX ) . This option should be set for programs that can be coerced into a denial of service attack via patterns that expand to a very large number of matches, such as a long string of .Ql */../*/.. . .El .Pp If, during the search, a directory is encountered that cannot be opened or read and .Fa errfunc is .Pf non- Dv NULL , .Fn glob calls .Fa \*(lp*errfunc\*(rp Ns ( Fa path , errno ) , however, the .Dv GLOB_ERR flag will cause an immediate return when this happens. .Pp If .Fa errfunc returns non-zero, .Fn glob stops the scan and returns .Dv GLOB_ABORTED after setting .Fa gl_pathc and .Fa gl_pathv to reflect any paths already matched. This also happens if an error is encountered and .Dv GLOB_ERR is set in .Fa flags , regardless of the return value of .Fa errfunc , if called. If .Dv GLOB_ERR is not set and either .Fa errfunc is .Dv NULL or .Fa errfunc returns zero, the error is ignored. .Pp The .Fn globfree function frees any space associated with .Fa pglob from a previous call(s) to .Fn glob . .Sh RETURN VALUES On successful completion, .Fn glob returns zero. In addition the fields of .Fa pglob contain the values described below: .Bl -tag -width GLOB_NOCHECK .It Fa gl_pathc contains the total number of matched pathnames so far. This includes other matches from previous invocations of .Fn glob if .Dv GLOB_APPEND was specified. .It Fa gl_matchc contains the number of matched pathnames in the current invocation of .Fn glob . .It Fa gl_flags contains a copy of the .Fa flags argument with the bit .Dv GLOB_MAGCHAR set if .Fa pattern contained any of the special characters ``*'', ``?'' or ``['', cleared if not. .It Fa gl_pathv contains a pointer to a .Dv NULL Ns -terminated list of matched pathnames. However, if .Fa gl_pathc is zero, the contents of .Fa gl_pathv are undefined. .El .Pp If .Fn glob terminates due to an error, it sets errno and returns one of the following non-zero constants, which are defined in the include file .In glob.h : .Bl -tag -width GLOB_NOCHECK .It Dv GLOB_NOSPACE An attempt to allocate memory failed, or if .Fa errno -was 0 +was E2BIG, .Dv GLOB_LIMIT was specified in the flags and .Fa pglob\->gl_matchc or more patterns were matched. .It Dv GLOB_ABORTED The scan was stopped because an error was encountered and either .Dv GLOB_ERR was set or .Fa \*(lp*errfunc\*(rp\*(lp\*(rp returned non-zero. .It Dv GLOB_NOMATCH The pattern did not match a pathname and .Dv GLOB_NOCHECK was not set. .El .Pp The arguments .Fa pglob\->gl_pathc and .Fa pglob\->gl_pathv are still set as specified above. .Sh EXAMPLES A rough equivalent of .Ql "ls -l *.c *.h" can be obtained with the following code: .Bd -literal -offset indent glob_t g; g.gl_offs = 2; glob("*.c", GLOB_DOOFFS, NULL, &g); glob("*.h", GLOB_DOOFFS | GLOB_APPEND, NULL, &g); g.gl_pathv[0] = "ls"; g.gl_pathv[1] = "-l"; execvp("ls", g.gl_pathv); .Ed .Sh SEE ALSO .Xr sh 1 , .Xr fnmatch 3 , .Xr regex 3 .Sh STANDARDS The current implementation of the .Fn glob function .Em does not conform to .St -p1003.2 . Collating symbol expressions, equivalence class expressions and character class expressions are not supported. .Pp The flags .Dv GLOB_ALTDIRFUNC , .Dv GLOB_BRACE , .Dv GLOB_LIMIT , .Dv GLOB_MAGCHAR , .Dv GLOB_NOMAGIC , and .Dv GLOB_TILDE , and the fields .Fa gl_matchc and .Fa gl_flags are extensions to the .Tn POSIX standard and should not be used by applications striving for strict conformance. .Sh HISTORY The .Fn glob and .Fn globfree functions first appeared in .Bx 4.4 . .Sh BUGS Patterns longer than .Dv MAXPATHLEN may cause unchecked errors. .Pp The .Fn glob argument may fail and set errno for any of the errors specified for the library routines .Xr stat 2 , .Xr closedir 3 , .Xr opendir 3 , .Xr readdir 3 , .Xr malloc 3 , and .Xr free 3 . Index: user/alc/PQ_LAUNDRY/lib/libc/gen/glob.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/gen/glob.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/gen/glob.c (revision 303642) @@ -1,1081 +1,1101 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Guido van Rossum. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); /* * glob(3) -- a superset of the one defined in POSIX 1003.2. * * The [!...] convention to negate a range is supported (SysV, Posix, ksh). * * Optional extra services, controlled by flags not defined by POSIX: * * GLOB_QUOTE: * Escaping convention: \ inhibits any special meaning the following * character might have (except \ at end of string is retained). * GLOB_MAGCHAR: * Set in gl_flags if pattern contained a globbing character. * GLOB_NOMAGIC: * Same as GLOB_NOCHECK, but it will only append pattern if it did * not contain any magic characters. [Used in csh style globbing] * GLOB_ALTDIRFUNC: * Use alternately specified directory access functions. * GLOB_TILDE: * expand ~user/foo to the /home/dir/of/user/foo * GLOB_BRACE: * expand {1,2}{a,b} to 1a 1b 2a 2b * gl_matchc: * Number of matches in the current invocation of glob. */ /* * Some notes on multibyte character support: * 1. Patterns with illegal byte sequences match nothing - even if * GLOB_NOCHECK is specified. * 2. Illegal byte sequences in filenames are handled by treating them as * single-byte characters with a values of such bytes of the sequence * cast to wchar_t. * 3. State-dependent encodings are not currently supported. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "collate.h" /* * glob(3) expansion limits. Stop the expansion if any of these limits * is reached. This caps the runtime in the face of DoS attacks. See * also CVE-2010-2632 */ #define GLOB_LIMIT_BRACE 128 /* number of brace calls */ #define GLOB_LIMIT_PATH 65536 /* number of path elements */ #define GLOB_LIMIT_READDIR 16384 /* number of readdirs */ #define GLOB_LIMIT_STAT 1024 /* number of stat system calls */ #define GLOB_LIMIT_STRING ARG_MAX /* maximum total size for paths */ struct glob_limit { size_t l_brace_cnt; size_t l_path_lim; size_t l_readdir_cnt; size_t l_stat_cnt; size_t l_string_cnt; }; #define DOT L'.' #define EOS L'\0' #define LBRACKET L'[' #define NOT L'!' #define QUESTION L'?' #define QUOTE L'\\' #define RANGE L'-' #define RBRACKET L']' #define SEP L'/' #define STAR L'*' #define TILDE L'~' #define LBRACE L'{' #define RBRACE L'}' #define COMMA L',' #define M_QUOTE 0x8000000000ULL #define M_PROTECT 0x4000000000ULL #define M_MASK 0xffffffffffULL #define M_CHAR 0x00ffffffffULL typedef uint_fast64_t Char; #define CHAR(c) ((Char)((c)&M_CHAR)) #define META(c) ((Char)((c)|M_QUOTE)) #define UNPROT(c) ((c) & ~M_PROTECT) #define M_ALL META(L'*') #define M_END META(L']') #define M_NOT META(L'!') #define M_ONE META(L'?') #define M_RNG META(L'-') #define M_SET META(L'[') #define ismeta(c) (((c)&M_QUOTE) != 0) #ifdef DEBUG #define isprot(c) (((c)&M_PROTECT) != 0) #endif static int compare(const void *, const void *); static int g_Ctoc(const Char *, char *, size_t); static int g_lstat(Char *, struct stat *, glob_t *); static DIR *g_opendir(Char *, glob_t *); static const Char *g_strchr(const Char *, wchar_t); #ifdef notdef static Char *g_strcat(Char *, const Char *); #endif static int g_stat(Char *, struct stat *, glob_t *); static int glob0(const Char *, glob_t *, struct glob_limit *, const char *); static int glob1(Char *, glob_t *, struct glob_limit *); static int glob2(Char *, Char *, Char *, Char *, glob_t *, struct glob_limit *); static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, struct glob_limit *); static int globextend(const Char *, glob_t *, struct glob_limit *, const char *); static const Char * globtilde(const Char *, Char *, size_t, glob_t *); static int globexp0(const Char *, glob_t *, struct glob_limit *, const char *); static int globexp1(const Char *, glob_t *, struct glob_limit *); static int globexp2(const Char *, const Char *, glob_t *, struct glob_limit *); static int globfinal(glob_t *, struct glob_limit *, size_t, const char *); static int match(Char *, Char *, Char *); #ifdef DEBUG static void qprintf(const char *, Char *); #endif int glob(const char * __restrict pattern, int flags, int (*errfunc)(const char *, int), glob_t * __restrict pglob) { struct glob_limit limit = { 0, 0, 0, 0, 0 }; const char *patnext; Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot; mbstate_t mbs; wchar_t wc; size_t clen; int too_long; patnext = pattern; if (!(flags & GLOB_APPEND)) { pglob->gl_pathc = 0; pglob->gl_pathv = NULL; if (!(flags & GLOB_DOOFFS)) pglob->gl_offs = 0; } if (flags & GLOB_LIMIT) { limit.l_path_lim = pglob->gl_matchc; if (limit.l_path_lim == 0) limit.l_path_lim = GLOB_LIMIT_PATH; } pglob->gl_flags = flags & ~GLOB_MAGCHAR; pglob->gl_errfunc = errfunc; pglob->gl_matchc = 0; bufnext = patbuf; bufend = bufnext + MAXPATHLEN - 1; too_long = 1; if (flags & GLOB_NOESCAPE) { memset(&mbs, 0, sizeof(mbs)); while (bufnext <= bufend) { clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) return (globfinal(pglob, &limit, pglob->gl_pathc, pattern)); else if (clen == 0) { too_long = 0; break; } *bufnext++ = wc; patnext += clen; } } else { /* Protect the quoted characters. */ memset(&mbs, 0, sizeof(mbs)); while (bufnext <= bufend) { if (*patnext == '\\') { if (*++patnext == '\0') { *bufnext++ = QUOTE; continue; } prot = M_PROTECT; } else prot = 0; clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) return (globfinal(pglob, &limit, pglob->gl_pathc, pattern)); else if (clen == 0) { too_long = 0; break; } *bufnext++ = wc | prot; patnext += clen; } } if (too_long) return (globfinal(pglob, &limit, pglob->gl_pathc, pattern)); *bufnext = EOS; if (flags & GLOB_BRACE) return (globexp0(patbuf, pglob, &limit, pattern)); else return (glob0(patbuf, pglob, &limit, pattern)); } static int globexp0(const Char *pattern, glob_t *pglob, struct glob_limit *limit, const char *origpat) { int rv; size_t oldpathc; /* Protect a single {}, for find(1), like csh */ if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) { if ((pglob->gl_flags & GLOB_LIMIT) && limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } return (glob0(pattern, pglob, limit, origpat)); } oldpathc = pglob->gl_pathc; if ((rv = globexp1(pattern, pglob, limit)) != 0) return rv; return (globfinal(pglob, limit, oldpathc, origpat)); } /* * Expand recursively a glob {} pattern. When there is no more expansion * invoke the standard globbing routine to glob the rest of the magic * characters */ static int globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit) { const Char* ptr; if ((ptr = g_strchr(pattern, LBRACE)) != NULL) { if ((pglob->gl_flags & GLOB_LIMIT) && limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } return (globexp2(ptr, pattern, pglob, limit)); } return (glob0(pattern, pglob, limit, NULL)); } /* * Recursive brace globbing helper. Tries to expand a single brace. * If it succeeds then it invokes globexp1 with the new pattern. * If it fails then it tries to glob the rest of the pattern and returns. */ static int globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, struct glob_limit *limit) { int i, rv; Char *lm, *ls; const Char *pe, *pm, *pm1, *pl; Char patbuf[MAXPATHLEN]; /* copy part up to the brace */ for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) continue; *lm = EOS; ls = lm; /* Find the balanced brace */ for (i = 0, pe = ++ptr; *pe != EOS; pe++) if (*pe == LBRACKET) { /* Ignore everything between [] */ for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) continue; if (*pe == EOS) { /* * We could not find a matching RBRACKET. * Ignore and just look for RBRACE */ pe = pm; } } else if (*pe == LBRACE) i++; else if (*pe == RBRACE) { if (i == 0) break; i--; } /* Non matching braces; just glob the pattern */ if (i != 0 || *pe == EOS) return (glob0(pattern, pglob, limit, NULL)); for (i = 0, pl = pm = ptr; pm <= pe; pm++) switch (*pm) { case LBRACKET: /* Ignore everything between [] */ for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++) continue; if (*pm == EOS) { /* * We could not find a matching RBRACKET. * Ignore and just look for RBRACE */ pm = pm1; } break; case LBRACE: i++; break; case RBRACE: if (i) { i--; break; } /* FALLTHROUGH */ case COMMA: if (i && *pm == COMMA) break; else { /* Append the current string */ for (lm = ls; (pl < pm); *lm++ = *pl++) continue; /* * Append the rest of the pattern after the * closing brace */ for (pl = pe + 1; (*lm++ = *pl++) != EOS;) continue; /* Expand the current pattern */ #ifdef DEBUG qprintf("globexp2:", patbuf); #endif rv = globexp1(patbuf, pglob, limit); if (rv) return (rv); /* move after the comma, to the next string */ pl = pm + 1; } break; default: break; } return (0); } /* * expand tilde from the passwd file. */ static const Char * globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob) { struct passwd *pwd; char *h, *sc; const Char *p; Char *b, *eb; wchar_t wc; wchar_t wbuf[MAXPATHLEN]; wchar_t *wbufend, *dc; size_t clen; mbstate_t mbs; int too_long; if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) return (pattern); /* * Copy up to the end of the string or / */ eb = &patbuf[patbuf_len - 1]; for (p = pattern + 1, b = patbuf; b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++) continue; if (*p != EOS && UNPROT(*p) != SEP) return (NULL); *b = EOS; h = NULL; if (patbuf[0] == EOS) { /* * handle a plain ~ or ~/ by expanding $HOME first (iff * we're not running setuid or setgid) and then trying * the password file */ if (issetugid() != 0 || (h = getenv("HOME")) == NULL) { if (((h = getlogin()) != NULL && (pwd = getpwnam(h)) != NULL) || (pwd = getpwuid(getuid())) != NULL) h = pwd->pw_dir; else return (pattern); } } else { /* * Expand a ~user */ if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf))) return (NULL); if ((pwd = getpwnam((char *)wbuf)) == NULL) return (pattern); else h = pwd->pw_dir; } /* Copy the home directory */ dc = wbuf; sc = h; wbufend = wbuf + MAXPATHLEN - 1; too_long = 1; memset(&mbs, 0, sizeof(mbs)); while (dc <= wbufend) { clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) { /* XXX See initial comment #2. */ wc = (unsigned char)*sc; clen = 1; memset(&mbs, 0, sizeof(mbs)); } if ((*dc++ = wc) == EOS) { too_long = 0; break; } sc += clen; } if (too_long) return (NULL); dc = wbuf; for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT) continue; if (*dc != EOS) return (NULL); /* Append the rest of the pattern */ if (*p != EOS) { too_long = 1; while (b <= eb) { if ((*b++ = *p++) == EOS) { too_long = 0; break; } } if (too_long) return (NULL); } else *b = EOS; return (patbuf); } /* * The main glob() routine: compiles the pattern (optionally processing * quotes), calls glob1() to do the real pattern matching, and finally * sorts the list (unless unsorted operation is requested). Returns 0 * if things went well, nonzero if errors occurred. */ static int glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit, const char *origpat) { const Char *qpatnext; int err; size_t oldpathc; Char *bufnext, c, patbuf[MAXPATHLEN]; qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); if (qpatnext == NULL) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } oldpathc = pglob->gl_pathc; bufnext = patbuf; /* We don't need to check for buffer overflow any more. */ while ((c = *qpatnext++) != EOS) { switch (c) { case LBRACKET: c = *qpatnext; if (c == NOT) ++qpatnext; if (*qpatnext == EOS || g_strchr(qpatnext+1, RBRACKET) == NULL) { *bufnext++ = LBRACKET; if (c == NOT) --qpatnext; break; } *bufnext++ = M_SET; if (c == NOT) *bufnext++ = M_NOT; c = *qpatnext++; do { *bufnext++ = CHAR(c); if (*qpatnext == RANGE && (c = qpatnext[1]) != RBRACKET) { *bufnext++ = M_RNG; *bufnext++ = CHAR(c); qpatnext += 2; } } while ((c = *qpatnext++) != RBRACKET); pglob->gl_flags |= GLOB_MAGCHAR; *bufnext++ = M_END; break; case QUESTION: pglob->gl_flags |= GLOB_MAGCHAR; *bufnext++ = M_ONE; break; case STAR: pglob->gl_flags |= GLOB_MAGCHAR; /* collapse adjacent stars to one, * to avoid exponential behavior */ if (bufnext == patbuf || bufnext[-1] != M_ALL) *bufnext++ = M_ALL; break; default: *bufnext++ = CHAR(c); break; } } *bufnext = EOS; #ifdef DEBUG qprintf("glob0:", patbuf); #endif if ((err = glob1(patbuf, pglob, limit)) != 0) return(err); if (origpat != NULL) return (globfinal(pglob, limit, oldpathc, origpat)); return (0); } static int globfinal(glob_t *pglob, struct glob_limit *limit, size_t oldpathc, const char *origpat) { /* * If there was no match we are going to append the origpat * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified * and the origpat did not contain any magic characters * GLOB_NOMAGIC is there just for compatibility with csh. */ if (pglob->gl_pathc == oldpathc) { if ((pglob->gl_flags & GLOB_NOCHECK) || ((pglob->gl_flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR))) return (globextend(NULL, pglob, limit, origpat)); else return (GLOB_NOMATCH); } if (!(pglob->gl_flags & GLOB_NOSORT)) qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, pglob->gl_pathc - oldpathc, sizeof(char *), compare); return (0); } static int compare(const void *p, const void *q) { return (strcoll(*(char **)p, *(char **)q)); } static int glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit) { Char pathbuf[MAXPATHLEN]; /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ if (*pattern == EOS) return (0); return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1, pattern, pglob, limit)); } /* * The functions glob2 and glob3 are mutually recursive; there is one level * of recursion for each segment in the pattern that contains one or more * meta characters. */ static int glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, glob_t *pglob, struct glob_limit *limit) { struct stat sb; Char *p, *q; int anymeta; /* * Loop over pattern segments until end of pattern or until * segment with meta character found. */ for (anymeta = 0;;) { if (*pattern == EOS) { /* End of pattern? */ *pathend = EOS; if (g_lstat(pathbuf, &sb, pglob)) return (0); if ((pglob->gl_flags & GLOB_LIMIT) && limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } if ((pglob->gl_flags & GLOB_MARK) && UNPROT(pathend[-1]) != SEP && (S_ISDIR(sb.st_mode) || (S_ISLNK(sb.st_mode) && g_stat(pathbuf, &sb, pglob) == 0 && S_ISDIR(sb.st_mode)))) { if (pathend + 1 > pathend_last) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } *pathend++ = SEP; *pathend = EOS; } ++pglob->gl_matchc; return (globextend(pathbuf, pglob, limit, NULL)); } /* Find end of next segment, copy tentatively to pathend. */ q = pathend; p = pattern; while (*p != EOS && UNPROT(*p) != SEP) { if (ismeta(*p)) anymeta = 1; if (q + 1 > pathend_last) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } *q++ = *p++; } if (!anymeta) { /* No expansion, do next segment. */ pathend = q; pattern = p; while (UNPROT(*pattern) == SEP) { if (pathend + 1 > pathend_last) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } *pathend++ = *pattern++; } } else /* Need expansion, recurse. */ return (glob3(pathbuf, pathend, pathend_last, pattern, p, pglob, limit)); } /* NOTREACHED */ } static int glob3(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, Char *restpattern, glob_t *pglob, struct glob_limit *limit) { struct dirent *dp; DIR *dirp; - int err, too_long, saverrno; + int err, too_long, saverrno, saverrno2; char buf[MAXPATHLEN + MB_LEN_MAX - 1]; struct dirent *(*readdirfunc)(DIR *); if (pathend > pathend_last) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } *pathend = EOS; if (pglob->gl_errfunc != NULL && g_Ctoc(pathbuf, buf, sizeof(buf))) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } + saverrno = errno; errno = 0; if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { if (errno == ENOENT || errno == ENOTDIR) return (0); if ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, errno)) || - (pglob->gl_flags & GLOB_ERR)) + (pglob->gl_flags & GLOB_ERR)) { + if (errno == 0) + errno = saverrno; return (GLOB_ABORTED); + } + if (errno == 0) + errno = saverrno; return (0); } err = 0; /* pglob->gl_readdir takes a void *, fix this manually */ if (pglob->gl_flags & GLOB_ALTDIRFUNC) readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir; else readdirfunc = readdir; errno = 0; /* Search directory for matching names. */ while ((dp = (*readdirfunc)(dirp)) != NULL) { char *sc; Char *dc; wchar_t wc; size_t clen; mbstate_t mbs; if ((pglob->gl_flags & GLOB_LIMIT) && limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) { - errno = 0; + errno = E2BIG; err = GLOB_NOSPACE; break; } /* Initial DOT must be matched literally. */ - if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) + if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) { + errno = 0; continue; + } memset(&mbs, 0, sizeof(mbs)); dc = pathend; sc = dp->d_name; too_long = 1; while (dc <= pathend_last) { clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) { /* XXX See initial comment #2. */ wc = (unsigned char)*sc; clen = 1; memset(&mbs, 0, sizeof(mbs)); } if ((*dc++ = wc) == EOS) { too_long = 0; break; } sc += clen; } + if (too_long && ((pglob->gl_errfunc != NULL && + pglob->gl_errfunc(buf, ENAMETOOLONG)) || + (pglob->gl_flags & GLOB_ERR))) { + errno = ENAMETOOLONG; + err = GLOB_ABORTED; + break; + } if (too_long || !match(pathend, pattern, restpattern)) { *pathend = EOS; + errno = 0; continue; } + if (errno == 0) + errno = saverrno; err = glob2(pathbuf, --dc, pathend_last, restpattern, pglob, limit); if (err) break; errno = 0; } - saverrno = errno; + saverrno2 = errno; if (pglob->gl_flags & GLOB_ALTDIRFUNC) (*pglob->gl_closedir)(dirp); else closedir(dirp); - errno = saverrno; + errno = saverrno2; if (err) return (err); if (dp == NULL && errno != 0 && ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, errno)) || (pglob->gl_flags & GLOB_ERR))) return (GLOB_ABORTED); + if (errno == 0) + errno = saverrno; return (0); } /* * Extend the gl_pathv member of a glob_t structure to accommodate a new item, * add the new item, and update gl_pathc. * * This assumes the BSD realloc, which only copies the block when its size * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic * behavior. * * Return 0 if new item added, error code if memory couldn't be allocated. * * Invariant of the glob_t structure: * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and * gl_pathv points to (gl_offs + gl_pathc + 1) items. */ static int globextend(const Char *path, glob_t *pglob, struct glob_limit *limit, const char *origpat) { char **pathv; size_t i, newsize, len; char *copy; const Char *p; if ((pglob->gl_flags & GLOB_LIMIT) && pglob->gl_matchc > limit->l_path_lim) { - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); /* realloc(NULL, newsize) is equivalent to malloc(newsize). */ pathv = realloc((void *)pglob->gl_pathv, newsize); if (pathv == NULL) return (GLOB_NOSPACE); if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { /* first time around -- clear initial gl_offs items */ pathv += pglob->gl_offs; for (i = pglob->gl_offs + 1; --i > 0; ) *--pathv = NULL; } pglob->gl_pathv = pathv; if (origpat != NULL) copy = strdup(origpat); else { for (p = path; *p++ != EOS;) continue; len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */ if ((copy = malloc(len)) != NULL) { if (g_Ctoc(path, copy, len)) { free(copy); - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } } } if (copy != NULL) { limit->l_string_cnt += strlen(copy) + 1; if ((pglob->gl_flags & GLOB_LIMIT) && limit->l_string_cnt >= GLOB_LIMIT_STRING) { free(copy); - errno = 0; + errno = E2BIG; return (GLOB_NOSPACE); } pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; } pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; return (copy == NULL ? GLOB_NOSPACE : 0); } /* * pattern matching function for filenames. Each occurrence of the * * pattern causes a recursion level. */ static int match(Char *name, Char *pat, Char *patend) { int ok, negate_range; Char c, k; struct xlocale_collate *table = (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; while (pat < patend) { c = *pat++; switch (c & M_MASK) { case M_ALL: if (pat == patend) return (1); do if (match(name, pat, patend)) return (1); while (*name++ != EOS); return (0); case M_ONE: if (*name++ == EOS) return (0); break; case M_SET: ok = 0; if ((k = *name++) == EOS) return (0); if ((negate_range = ((*pat & M_MASK) == M_NOT)) != 0) ++pat; while (((c = *pat++) & M_MASK) != M_END) if ((*pat & M_MASK) == M_RNG) { if (table->__collate_load_error ? CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) : __wcollate_range_cmp(CHAR(c), CHAR(k)) <= 0 && __wcollate_range_cmp(CHAR(k), CHAR(pat[1])) <= 0) ok = 1; pat += 2; } else if (c == k) ok = 1; if (ok == negate_range) return (0); break; default: if (*name++ != c) return (0); break; } } return (*name == EOS); } /* Free allocated data belonging to a glob_t structure. */ void globfree(glob_t *pglob) { size_t i; char **pp; if (pglob->gl_pathv != NULL) { pp = pglob->gl_pathv + pglob->gl_offs; for (i = pglob->gl_pathc; i--; ++pp) if (*pp) free(*pp); free(pglob->gl_pathv); pglob->gl_pathv = NULL; } } static DIR * g_opendir(Char *str, glob_t *pglob) { char buf[MAXPATHLEN + MB_LEN_MAX - 1]; if (*str == EOS) strcpy(buf, "."); else { if (g_Ctoc(str, buf, sizeof(buf))) { errno = ENAMETOOLONG; return (NULL); } } if (pglob->gl_flags & GLOB_ALTDIRFUNC) return ((*pglob->gl_opendir)(buf)); return (opendir(buf)); } static int g_lstat(Char *fn, struct stat *sb, glob_t *pglob) { char buf[MAXPATHLEN + MB_LEN_MAX - 1]; if (g_Ctoc(fn, buf, sizeof(buf))) { errno = ENAMETOOLONG; return (-1); } if (pglob->gl_flags & GLOB_ALTDIRFUNC) return((*pglob->gl_lstat)(buf, sb)); return (lstat(buf, sb)); } static int g_stat(Char *fn, struct stat *sb, glob_t *pglob) { char buf[MAXPATHLEN + MB_LEN_MAX - 1]; if (g_Ctoc(fn, buf, sizeof(buf))) { errno = ENAMETOOLONG; return (-1); } if (pglob->gl_flags & GLOB_ALTDIRFUNC) return ((*pglob->gl_stat)(buf, sb)); return (stat(buf, sb)); } static const Char * g_strchr(const Char *str, wchar_t ch) { do { if (*str == ch) return (str); } while (*str++); return (NULL); } static int g_Ctoc(const Char *str, char *buf, size_t len) { mbstate_t mbs; size_t clen; memset(&mbs, 0, sizeof(mbs)); while (len >= MB_CUR_MAX) { clen = wcrtomb(buf, CHAR(*str), &mbs); if (clen == (size_t)-1) { /* XXX See initial comment #2. */ *buf = (char)CHAR(*str); clen = 1; memset(&mbs, 0, sizeof(mbs)); } if (CHAR(*str) == EOS) return (0); str++; buf += clen; len -= clen; } return (1); } #ifdef DEBUG static void qprintf(const char *str, Char *s) { Char *p; (void)printf("%s\n", str); if (s != NULL) { for (p = s; *p != EOS; p++) (void)printf("%c", (char)CHAR(*p)); (void)printf("\n"); for (p = s; *p != EOS; p++) (void)printf("%c", (isprot(*p) ? '\\' : ' ')); (void)printf("\n"); for (p = s; *p != EOS; p++) (void)printf("%c", (ismeta(*p) ? '_' : ' ')); (void)printf("\n"); } } #endif Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/dprintf.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/stdio/dprintf.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/stdio/dprintf.c (revision 303642) @@ -1,46 +1,45 @@ /*- * Copyright (c) 2009 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); -#define _WITH_DPRINTF #include "namespace.h" #include #include #include "un-namespace.h" int dprintf(int fd, const char * __restrict fmt, ...) { va_list ap; int ret; va_start(ap, fmt); ret = vdprintf(fd, fmt, ap); va_end(ap); return (ret); } Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.3 =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.3 (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.3 (revision 303642) @@ -1,166 +1,137 @@ .\" Copyright (c) 2009 David Schultz .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd November 30, 2012 +.Dd July 30, 2016 .Dt GETLINE 3 .Os .Sh NAME .Nm getdelim , .Nm getline .Nd get a line from a stream .Sh LIBRARY .Lb libc .Sh SYNOPSIS -.Fd "#define _WITH_GETLINE" .In stdio.h .Ft ssize_t .Fn getdelim "char ** restrict linep" "size_t * restrict linecapp" "int delimiter" " FILE * restrict stream" .Ft ssize_t .Fn getline "char ** restrict linep" "size_t * restrict linecapp" " FILE * restrict stream" .Sh DESCRIPTION The .Fn getdelim function reads a line from .Fa stream , delimited by the character .Fa delimiter . The .Fn getline function is equivalent to .Fn getdelim with the newline character as the delimiter. The delimiter character is included as part of the line, unless the end of the file is reached. .Pp The caller may provide a pointer to a malloced buffer for the line in .Fa *linep , and the capacity of that buffer in .Fa *linecapp . These functions expand the buffer as needed, as if via .Fn realloc . If .Fa linep points to a .Dv NULL pointer, a new buffer will be allocated. In either case, .Fa *linep and .Fa *linecapp will be updated accordingly. .Sh RETURN VALUES The .Fn getdelim and .Fn getline functions return the number of characters stored in the buffer, excluding the terminating .Dv NUL character. The value \-1 is returned if an error occurs, or if end-of-file is reached. .Sh EXAMPLES The following code fragment reads lines from a file and writes them to standard output. The .Fn fwrite function is used in case the line contains embedded .Dv NUL characters. .Bd -literal -offset indent char *line = NULL; size_t linecap = 0; ssize_t linelen; while ((linelen = getline(&line, &linecap, fp)) > 0) fwrite(line, linelen, 1, stdout); free(line); .Ed -.Sh COMPATIBILITY -Many application writers used the name -.Va getline -before the -.Fn getline -function was introduced in -.St -p1003.1 , -so a prototype is not provided by default in order to avoid -compatibility problems. -Applications that wish to use the -.Fn getline -function described herein should either request a strict -.St -p1003.1-2008 -environment by defining the macro -.Dv _POSIX_C_SOURCE -to the value 200809 or greater, or by defining the macro -.Dv _WITH_GETLINE , -prior to the inclusion of -.In stdio.h . -For compatibility with GNU libc, defining either -.Dv _BSD_SOURCE -or -.Dv _GNU_SOURCE -prior to the inclusion of -.In stdio.h -will also make -.Fn getline -available. .Sh ERRORS These functions may fail if: .Bl -tag -width Er .It Bq Er EINVAL Either .Fa linep or .Fa linecapp is .Dv NULL . .It Bq Er EOVERFLOW No delimiter was found in the first .Dv SSIZE_MAX characters. .El .Pp These functions may also fail due to any of the errors specified for .Fn fgets and .Fn malloc . .Sh SEE ALSO .Xr fgetln 3 , .Xr fgets 3 , .Xr malloc 3 .Sh STANDARDS The .Fn getdelim and .Fn getline functions conform to .St -p1003.1-2008 . .Sh HISTORY These routines first appeared in .Fx 8.0 . .Sh BUGS There are no wide character versions of .Fn getdelim or .Fn getline . Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.c (revision 303642) @@ -1,39 +1,38 @@ /*- * Copyright (c) 2009 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); -#define _WITH_GETLINE #include ssize_t getline(char ** __restrict linep, size_t * __restrict linecapp, FILE * __restrict fp) { return (getdelim(linep, linecapp, '\n', fp)); } Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/printf.3 =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/stdio/printf.3 (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/stdio/printf.3 (revision 303642) @@ -1,920 +1,891 @@ .\" Copyright (c) 1990, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by .\" Chris Torek and the American National Standards Committee X3, .\" on Information Processing Systems. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)printf.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd December 2, 2009 +.Dd July 30, 2016 .Dt PRINTF 3 .Os .Sh NAME .Nm printf , fprintf , sprintf , snprintf , asprintf , dprintf , .Nm vprintf , vfprintf, vsprintf , vsnprintf , vasprintf, vdprintf .Nd formatted output conversion .Sh LIBRARY .Lb libc .Sh SYNOPSIS -.Fd "#define _WITH_DPRINTF" .In stdio.h .Ft int .Fn printf "const char * restrict format" ... .Ft int .Fn fprintf "FILE * restrict stream" "const char * restrict format" ... .Ft int .Fn sprintf "char * restrict str" "const char * restrict format" ... .Ft int .Fn snprintf "char * restrict str" "size_t size" "const char * restrict format" ... .Ft int .Fn asprintf "char **ret" "const char *format" ... .Ft int .Fn dprintf "int fd" "const char * restrict format" ... .In stdarg.h .Ft int .Fn vprintf "const char * restrict format" "va_list ap" .Ft int .Fn vfprintf "FILE * restrict stream" "const char * restrict format" "va_list ap" .Ft int .Fn vsprintf "char * restrict str" "const char * restrict format" "va_list ap" .Ft int .Fn vsnprintf "char * restrict str" "size_t size" "const char * restrict format" "va_list ap" .Ft int .Fn vasprintf "char **ret" "const char *format" "va_list ap" .Ft int .Fn vdprintf "int fd" "const char * restrict format" "va_list ap" .Sh DESCRIPTION The .Fn printf family of functions produces output according to a .Fa format as described below. The .Fn printf and .Fn vprintf functions write output to .Dv stdout , the standard output stream; .Fn fprintf and .Fn vfprintf write output to the given output .Fa stream ; .Fn dprintf and .Fn vdprintf write output to the given file descriptor; .Fn sprintf , .Fn snprintf , .Fn vsprintf , and .Fn vsnprintf write to the character string .Fa str ; and .Fn asprintf and .Fn vasprintf dynamically allocate a new string with .Xr malloc 3 . .Pp These functions write the output under the control of a .Fa format string that specifies how subsequent arguments (or arguments accessed via the variable-length argument facilities of .Xr stdarg 3 ) are converted for output. .Pp The .Fn asprintf and .Fn vasprintf functions set .Fa *ret to be a pointer to a buffer sufficiently large to hold the formatted string. This pointer should be passed to .Xr free 3 to release the allocated storage when it is no longer needed. If sufficient space cannot be allocated, .Fn asprintf and .Fn vasprintf will return \-1 and set .Fa ret to be a .Dv NULL pointer. .Pp The .Fn snprintf and .Fn vsnprintf functions will write at most .Fa size Ns \-1 of the characters printed into the output string (the .Fa size Ns 'th character then gets the terminating .Ql \e0 ) ; if the return value is greater than or equal to the .Fa size argument, the string was too short and some of the printed characters were discarded. The output is always null-terminated, unless .Fa size is 0. .Pp The .Fn sprintf and .Fn vsprintf functions effectively assume a .Fa size of .Dv INT_MAX + 1. .Pp The format string is composed of zero or more directives: ordinary .\" multibyte characters (not .Cm % ) , which are copied unchanged to the output stream; and conversion specifications, each of which results in fetching zero or more subsequent arguments. Each conversion specification is introduced by the .Cm % character. The arguments must correspond properly (after type promotion) with the conversion specifier. After the .Cm % , the following appear in sequence: .Bl -bullet .It An optional field, consisting of a decimal digit string followed by a .Cm $ , specifying the next argument to access. If this field is not provided, the argument following the last argument accessed will be used. Arguments are numbered starting at .Cm 1 . If unaccessed arguments in the format string are interspersed with ones that are accessed the results will be indeterminate. .It Zero or more of the following flags: .Bl -tag -width ".So \ Sc (space)" .It Sq Cm # The value should be converted to an .Dq alternate form . For .Cm c , d , i , n , p , s , and .Cm u conversions, this option has no effect. For .Cm o conversions, the precision of the number is increased to force the first character of the output string to a zero. For .Cm x and .Cm X conversions, a non-zero result has the string .Ql 0x (or .Ql 0X for .Cm X conversions) prepended to it. For .Cm a , A , e , E , f , F , g , and .Cm G conversions, the result will always contain a decimal point, even if no digits follow it (normally, a decimal point appears in the results of those conversions only if a digit follows). For .Cm g and .Cm G conversions, trailing zeros are not removed from the result as they would otherwise be. .It So Cm 0 Sc (zero) Zero padding. For all conversions except .Cm n , the converted value is padded on the left with zeros rather than blanks. If a precision is given with a numeric conversion .Cm ( d , i , o , u , i , x , and .Cm X ) , the .Cm 0 flag is ignored. .It Sq Cm \- A negative field width flag; the converted value is to be left adjusted on the field boundary. Except for .Cm n conversions, the converted value is padded on the right with blanks, rather than on the left with blanks or zeros. A .Cm \- overrides a .Cm 0 if both are given. .It So "\ " Sc (space) A blank should be left before a positive number produced by a signed conversion .Cm ( a , A , d , e , E , f , F , g , G , or .Cm i ) . .It Sq Cm + A sign must always be placed before a number produced by a signed conversion. A .Cm + overrides a space if both are used. .It So "'" Sc (apostrophe) Decimal conversions .Cm ( d , u , or .Cm i ) or the integral portion of a floating point conversion .Cm ( f or .Cm F ) should be grouped and separated by thousands using the non-monetary separator returned by .Xr localeconv 3 . .El .It An optional decimal digit string specifying a minimum field width. If the converted value has fewer characters than the field width, it will be padded with spaces on the left (or right, if the left-adjustment flag has been given) to fill out the field width. .It An optional precision, in the form of a period .Cm \&. followed by an optional digit string. If the digit string is omitted, the precision is taken as zero. This gives the minimum number of digits to appear for .Cm d , i , o , u , x , and .Cm X conversions, the number of digits to appear after the decimal-point for .Cm a , A , e , E , f , and .Cm F conversions, the maximum number of significant digits for .Cm g and .Cm G conversions, or the maximum number of characters to be printed from a string for .Cm s conversions. .It An optional length modifier, that specifies the size of the argument. The following length modifiers are valid for the .Cm d , i , n , o , u , x , or .Cm X conversion: .Bl -column ".Cm q Em (deprecated)" ".Vt signed char" ".Vt unsigned long long" ".Vt long long *" .It Sy Modifier Ta Cm d , i Ta Cm o , u , x , X Ta Cm n .It Cm hh Ta Vt "signed char" Ta Vt "unsigned char" Ta Vt "signed char *" .It Cm h Ta Vt short Ta Vt "unsigned short" Ta Vt "short *" .It Cm l No (ell) Ta Vt long Ta Vt "unsigned long" Ta Vt "long *" .It Cm ll No (ell ell) Ta Vt "long long" Ta Vt "unsigned long long" Ta Vt "long long *" .It Cm j Ta Vt intmax_t Ta Vt uintmax_t Ta Vt "intmax_t *" .It Cm t Ta Vt ptrdiff_t Ta (see note) Ta Vt "ptrdiff_t *" .It Cm z Ta (see note) Ta Vt size_t Ta (see note) .It Cm q Em (deprecated) Ta Vt quad_t Ta Vt u_quad_t Ta Vt "quad_t *" .El .Pp Note: the .Cm t modifier, when applied to a .Cm o , u , x , or .Cm X conversion, indicates that the argument is of an unsigned type equivalent in size to a .Vt ptrdiff_t . The .Cm z modifier, when applied to a .Cm d or .Cm i conversion, indicates that the argument is of a signed type equivalent in size to a .Vt size_t . Similarly, when applied to an .Cm n conversion, it indicates that the argument is a pointer to a signed type equivalent in size to a .Vt size_t . .Pp The following length modifier is valid for the .Cm a , A , e , E , f , F , g , or .Cm G conversion: .Bl -column ".Sy Modifier" ".Cm a , A , e , E , f , F , g , G" .It Sy Modifier Ta Cm a , A , e , E , f , F , g , G .It Cm l No (ell) Ta Vt double (ignored, same behavior as without it) .It Cm L Ta Vt "long double" .El .Pp The following length modifier is valid for the .Cm c or .Cm s conversion: .Bl -column ".Sy Modifier" ".Vt wint_t" ".Vt wchar_t *" .It Sy Modifier Ta Cm c Ta Cm s .It Cm l No (ell) Ta Vt wint_t Ta Vt "wchar_t *" .El .It A character that specifies the type of conversion to be applied. .El .Pp A field width or precision, or both, may be indicated by an asterisk .Ql * or an asterisk followed by one or more decimal digits and a .Ql $ instead of a digit string. In this case, an .Vt int argument supplies the field width or precision. A negative field width is treated as a left adjustment flag followed by a positive field width; a negative precision is treated as though it were missing. If a single format directive mixes positional .Pq Li nn$ and non-positional arguments, the results are undefined. .Pp The conversion specifiers and their meanings are: .Bl -tag -width ".Cm diouxX" .It Cm diouxX The .Vt int (or appropriate variant) argument is converted to signed decimal .Cm ( d and .Cm i ) , unsigned octal .Pq Cm o , unsigned decimal .Pq Cm u , or unsigned hexadecimal .Cm ( x and .Cm X ) notation. The letters .Dq Li abcdef are used for .Cm x conversions; the letters .Dq Li ABCDEF are used for .Cm X conversions. The precision, if any, gives the minimum number of digits that must appear; if the converted value requires fewer digits, it is padded on the left with zeros. .It Cm DOU The .Vt "long int" argument is converted to signed decimal, unsigned octal, or unsigned decimal, as if the format had been .Cm ld , lo , or .Cm lu respectively. These conversion characters are deprecated, and will eventually disappear. .It Cm eE The .Vt double argument is rounded and converted in the style .Sm off .Oo \- Oc Ar d Li \&. Ar ddd Li e \(+- Ar dd .Sm on where there is one digit before the decimal-point character and the number of digits after it is equal to the precision; if the precision is missing, it is taken as 6; if the precision is zero, no decimal-point character appears. An .Cm E conversion uses the letter .Ql E (rather than .Ql e ) to introduce the exponent. The exponent always contains at least two digits; if the value is zero, the exponent is 00. .Pp For .Cm a , A , e , E , f , F , g , and .Cm G conversions, positive and negative infinity are represented as .Li inf and .Li -inf respectively when using the lowercase conversion character, and .Li INF and .Li -INF respectively when using the uppercase conversion character. Similarly, NaN is represented as .Li nan when using the lowercase conversion, and .Li NAN when using the uppercase conversion. .It Cm fF The .Vt double argument is rounded and converted to decimal notation in the style .Sm off .Oo \- Oc Ar ddd Li \&. Ar ddd , .Sm on where the number of digits after the decimal-point character is equal to the precision specification. If the precision is missing, it is taken as 6; if the precision is explicitly zero, no decimal-point character appears. If a decimal point appears, at least one digit appears before it. .It Cm gG The .Vt double argument is converted in style .Cm f or .Cm e (or .Cm F or .Cm E for .Cm G conversions). The precision specifies the number of significant digits. If the precision is missing, 6 digits are given; if the precision is zero, it is treated as 1. Style .Cm e is used if the exponent from its conversion is less than \-4 or greater than or equal to the precision. Trailing zeros are removed from the fractional part of the result; a decimal point appears only if it is followed by at least one digit. .It Cm aA The .Vt double argument is rounded and converted to hexadecimal notation in the style .Sm off .Oo \- Oc Li 0x Ar h Li \&. Ar hhhp Oo \(+- Oc Ar d , .Sm on where the number of digits after the hexadecimal-point character is equal to the precision specification. If the precision is missing, it is taken as enough to represent the floating-point number exactly, and no rounding occurs. If the precision is zero, no hexadecimal-point character appears. The .Cm p is a literal character .Ql p , and the exponent consists of a positive or negative sign followed by a decimal number representing an exponent of 2. The .Cm A conversion uses the prefix .Dq Li 0X (rather than .Dq Li 0x ) , the letters .Dq Li ABCDEF (rather than .Dq Li abcdef ) to represent the hex digits, and the letter .Ql P (rather than .Ql p ) to separate the mantissa and exponent. .Pp Note that there may be multiple valid ways to represent floating-point numbers in this hexadecimal format. For example, .Li 0x1.92p+1 , 0x3.24p+0 , 0x6.48p-1 , and .Li 0xc.9p-2 are all equivalent. .Fx 8.0 and later always prints finite non-zero numbers using .Ql 1 as the digit before the hexadecimal point. Zeroes are always represented with a mantissa of 0 (preceded by a .Ql - if appropriate) and an exponent of .Li +0 . .It Cm C Treated as .Cm c with the .Cm l (ell) modifier. .It Cm c The .Vt int argument is converted to an .Vt "unsigned char" , and the resulting character is written. .Pp If the .Cm l (ell) modifier is used, the .Vt wint_t argument shall be converted to a .Vt wchar_t , and the (potentially multi-byte) sequence representing the single wide character is written, including any shift sequences. If a shift sequence is used, the shift state is also restored to the original state after the character. .It Cm S Treated as .Cm s with the .Cm l (ell) modifier. .It Cm s The .Vt "char *" argument is expected to be a pointer to an array of character type (pointer to a string). Characters from the array are written up to (but not including) a terminating .Dv NUL character; if a precision is specified, no more than the number specified are written. If a precision is given, no null character need be present; if the precision is not specified, or is greater than the size of the array, the array must contain a terminating .Dv NUL character. .Pp If the .Cm l (ell) modifier is used, the .Vt "wchar_t *" argument is expected to be a pointer to an array of wide characters (pointer to a wide string). For each wide character in the string, the (potentially multi-byte) sequence representing the wide character is written, including any shift sequences. If any shift sequence is used, the shift state is also restored to the original state after the string. Wide characters from the array are written up to (but not including) a terminating wide .Dv NUL character; if a precision is specified, no more than the number of bytes specified are written (including shift sequences). Partial characters are never written. If a precision is given, no null character need be present; if the precision is not specified, or is greater than the number of bytes required to render the multibyte representation of the string, the array must contain a terminating wide .Dv NUL character. .It Cm p The .Vt "void *" pointer argument is printed in hexadecimal (as if by .Ql %#x or .Ql %#lx ) . .It Cm n The number of characters written so far is stored into the integer indicated by the .Vt "int *" (or variant) pointer argument. No argument is converted. .It Cm % A .Ql % is written. No argument is converted. The complete conversion specification is .Ql %% . .El .Pp The decimal point character is defined in the program's locale (category .Dv LC_NUMERIC ) . .Pp In no case does a non-existent or small field width cause truncation of a numeric field; if the result of a conversion is wider than the field width, the field is expanded to contain the conversion result. .Sh RETURN VALUES These functions return the number of characters printed (not including the trailing .Ql \e0 used to end output to strings), except for .Fn snprintf and .Fn vsnprintf , which return the number of characters that would have been printed if the .Fa size were unlimited (again, not including the final .Ql \e0 ) . These functions return a negative value if an error occurs. .Sh EXAMPLES To print a date and time in the form .Dq Li "Sunday, July 3, 10:02" , where .Fa weekday and .Fa month are pointers to strings: .Bd -literal -offset indent #include fprintf(stdout, "%s, %s %d, %.2d:%.2d\en", weekday, month, day, hour, min); .Ed .Pp To print \*(Pi to five decimal places: .Bd -literal -offset indent #include #include fprintf(stdout, "pi = %.5f\en", 4 * atan(1.0)); .Ed .Pp To allocate a 128 byte string and print into it: .Bd -literal -offset indent #include #include #include char *newfmt(const char *fmt, ...) { char *p; va_list ap; if ((p = malloc(128)) == NULL) return (NULL); va_start(ap, fmt); (void) vsnprintf(p, 128, fmt, ap); va_end(ap); return (p); } .Ed .Sh COMPATIBILITY -Many application writers used the name -.Va dprintf -before the -.Fn dprintf -function was introduced in -.St -p1003.1 , -so a prototype is not provided by default in order to avoid -compatibility problems. -Applications that wish to use the -.Fn dprintf -function described herein should either request a strict -.St -p1003.1-2008 -environment by defining the macro -.Dv _POSIX_C_SOURCE -to the value 200809 or greater, or by defining the macro -.Dv _WITH_DPRINTF , -prior to the inclusion of -.In stdio.h . -For compatibility with GNU libc, defining either -.Dv _BSD_SOURCE -or -.Dv _GNU_SOURCE -prior to the inclusion of -.In stdio.h -will also make -.Fn dprintf -available. -.Pp The conversion formats .Cm \&%D , \&%O , and .Cm \&%U are not standard and are provided only for backward compatibility. The effect of padding the .Cm %p format with zeros (either by the .Cm 0 flag or by specifying a precision), and the benign effect (i.e., none) of the .Cm # flag on .Cm %n and .Cm %p conversions, as well as other nonsensical combinations such as .Cm %Ld , are not standard; such combinations should be avoided. .Sh ERRORS In addition to the errors documented for the .Xr write 2 system call, the .Fn printf family of functions may fail if: .Bl -tag -width Er .It Bq Er EILSEQ An invalid wide character code was encountered. .It Bq Er ENOMEM Insufficient storage space is available. .It Bq Er EOVERFLOW The .Fa size argument exceeds .Dv INT_MAX + 1 , or the return value would be too large to be represented by an .Vt int . .El .Sh SEE ALSO .Xr printf 1 , .Xr fmtcheck 3 , .Xr scanf 3 , .Xr setlocale 3 , .Xr wprintf 3 .Sh STANDARDS Subject to the caveats noted in the .Sx BUGS section below, the .Fn fprintf , .Fn printf , .Fn sprintf , .Fn vprintf , .Fn vfprintf , and .Fn vsprintf functions conform to .St -ansiC and .St -isoC-99 . With the same reservation, the .Fn snprintf and .Fn vsnprintf functions conform to .St -isoC-99 , while .Fn dprintf and .Fn vdprintf conform to .St -p1003.1-2008 . .Sh HISTORY The functions .Fn asprintf and .Fn vasprintf first appeared in the .Tn GNU C library. These were implemented by .An Peter Wemm Aq Mt peter@FreeBSD.org in .Fx 2.2 , but were later replaced with a different implementation from .Ox 2.3 by .An Todd C. Miller Aq Mt Todd.Miller@courtesan.com . The .Fn dprintf and .Fn vdprintf functions were added in .Fx 8.0 . .Sh BUGS The .Nm family of functions do not correctly handle multibyte characters in the .Fa format argument. .Sh SECURITY CONSIDERATIONS The .Fn sprintf and .Fn vsprintf functions are easily misused in a manner which enables malicious users to arbitrarily change a running program's functionality through a buffer overflow attack. Because .Fn sprintf and .Fn vsprintf assume an infinitely long string, callers must be careful not to overflow the actual space; this is often hard to assure. For safety, programmers should use the .Fn snprintf interface instead. For example: .Bd -literal void foo(const char *arbitrary_string, const char *and_another) { char onstack[8]; #ifdef BAD /* * This first sprintf is bad behavior. Do not use sprintf! */ sprintf(onstack, "%s, %s", arbitrary_string, and_another); #else /* * The following two lines demonstrate better use of * snprintf(). */ snprintf(onstack, sizeof(onstack), "%s, %s", arbitrary_string, and_another); #endif } .Ed .Pp The .Fn printf and .Fn sprintf family of functions are also easily misused in a manner allowing malicious users to arbitrarily change a running program's functionality by either causing the program to print potentially sensitive data .Dq "left on the stack" , or causing it to generate a memory fault or bus error by dereferencing an invalid pointer. .Pp .Cm %n can be used to write arbitrary data to potentially carefully-selected addresses. Programmers are therefore strongly advised to never pass untrusted strings as the .Fa format argument, as an attacker can put format specifiers in the string to mangle your stack, leading to a possible security hole. This holds true even if the string was built using a function like .Fn snprintf , as the resulting string may still contain user-supplied conversion specifiers for later interpolation by .Fn printf . .Pp Always use the proper secure idiom: .Pp .Dl "snprintf(buffer, sizeof(buffer), \*q%s\*q, string);" Index: user/alc/PQ_LAUNDRY/lib/libc/tests/stdio/getdelim_test.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libc/tests/stdio/getdelim_test.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libc/tests/stdio/getdelim_test.c (revision 303642) @@ -1,236 +1,235 @@ /*- * Copyright (c) 2009 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); -#define _WITH_GETLINE #include #include #include #include #include #define CHUNK_MAX 10 /* The assertions depend on this string. */ char apothegm[] = "All work and no play\0 makes Jack a dull boy.\n"; /* * This is a neurotic reader function designed to give getdelim() a * hard time. It reads through the string `apothegm' and returns a * random number of bytes up to the requested length. */ static int _reader(void *cookie, char *buf, int len) { size_t *offp = cookie; size_t r; r = random() % CHUNK_MAX + 1; if (len > r) len = r; if (len > sizeof(apothegm) - *offp) len = sizeof(apothegm) - *offp; memcpy(buf, apothegm + *offp, len); *offp += len; return (len); } static FILE * mkfilebuf(void) { size_t *offp; offp = malloc(sizeof(*offp)); /* XXX leak */ *offp = 0; return (fropen(offp, _reader)); } ATF_TC_WITHOUT_HEAD(getline_basic); ATF_TC_BODY(getline_basic, tc) { FILE *fp; char *line; size_t linecap; int i; srandom(0); /* * Test multiple times with different buffer sizes * and different _reader() return values. */ errno = 0; for (i = 0; i < 8; i++) { fp = mkfilebuf(); linecap = i; line = malloc(i); /* First line: the full apothegm */ ATF_REQUIRE(getline(&line, &linecap, fp) == sizeof(apothegm) - 1); ATF_REQUIRE(memcmp(line, apothegm, sizeof(apothegm)) == 0); ATF_REQUIRE(linecap >= sizeof(apothegm)); /* Second line: the NUL terminator following the newline */ ATF_REQUIRE(getline(&line, &linecap, fp) == 1); ATF_REQUIRE(line[0] == '\0' && line[1] == '\0'); /* Third line: EOF */ line[0] = 'X'; ATF_REQUIRE(getline(&line, &linecap, fp) == -1); ATF_REQUIRE(line[0] == '\0'); free(line); line = NULL; ATF_REQUIRE(feof(fp)); ATF_REQUIRE(!ferror(fp)); fclose(fp); } ATF_REQUIRE(errno == 0); } ATF_TC_WITHOUT_HEAD(stream_error); ATF_TC_BODY(stream_error, tc) { char *line; size_t linecap; /* Make sure read errors are handled properly. */ line = NULL; linecap = 0; errno = 0; ATF_REQUIRE(getline(&line, &linecap, stdout) == -1); ATF_REQUIRE(errno == EBADF); errno = 0; ATF_REQUIRE(getdelim(&line, &linecap, 'X', stdout) == -1); ATF_REQUIRE(errno == EBADF); ATF_REQUIRE(ferror(stdout)); } ATF_TC_WITHOUT_HEAD(invalid_params); ATF_TC_BODY(invalid_params, tc) { FILE *fp; char *line; size_t linecap; /* Make sure NULL linep or linecapp pointers are handled. */ fp = mkfilebuf(); ATF_REQUIRE(getline(NULL, &linecap, fp) == -1); ATF_REQUIRE(errno == EINVAL); ATF_REQUIRE(getline(&line, NULL, fp) == -1); ATF_REQUIRE(errno == EINVAL); ATF_REQUIRE(ferror(fp)); fclose(fp); } ATF_TC_WITHOUT_HEAD(eof); ATF_TC_BODY(eof, tc) { FILE *fp; char *line; size_t linecap; /* Make sure getline() allocates memory as needed if fp is at EOF. */ errno = 0; fp = mkfilebuf(); while (!feof(fp)) /* advance to EOF; can't fseek this stream */ getc(fp); line = NULL; linecap = 0; printf("getline\n"); ATF_REQUIRE(getline(&line, &linecap, fp) == -1); ATF_REQUIRE(line[0] == '\0'); ATF_REQUIRE(linecap > 0); ATF_REQUIRE(errno == 0); printf("feof\n"); ATF_REQUIRE(feof(fp)); ATF_REQUIRE(!ferror(fp)); fclose(fp); } ATF_TC_WITHOUT_HEAD(nul); ATF_TC_BODY(nul, tc) { FILE *fp; char *line; size_t linecap, n; errno = 0; line = NULL; linecap = 0; /* Make sure a NUL delimiter works. */ fp = mkfilebuf(); n = strlen(apothegm); printf("getdelim\n"); ATF_REQUIRE(getdelim(&line, &linecap, '\0', fp) == n + 1); ATF_REQUIRE(strcmp(line, apothegm) == 0); ATF_REQUIRE(line[n + 1] == '\0'); ATF_REQUIRE(linecap > n + 1); n = strlen(apothegm + n + 1); printf("getdelim 2\n"); ATF_REQUIRE(getdelim(&line, &linecap, '\0', fp) == n + 1); ATF_REQUIRE(line[n + 1] == '\0'); ATF_REQUIRE(linecap > n + 1); ATF_REQUIRE(errno == 0); ATF_REQUIRE(!ferror(fp)); fclose(fp); } ATF_TC_WITHOUT_HEAD(empty_NULL_buffer); ATF_TC_BODY(empty_NULL_buffer, tc) { FILE *fp; char *line; size_t linecap; /* Make sure NULL *linep and zero *linecapp are handled. */ fp = mkfilebuf(); line = NULL; linecap = 42; ATF_REQUIRE(getline(&line, &linecap, fp) == sizeof(apothegm) - 1); ATF_REQUIRE(memcmp(line, apothegm, sizeof(apothegm)) == 0); fp = mkfilebuf(); free(line); line = malloc(100); linecap = 0; ATF_REQUIRE(getline(&line, &linecap, fp) == sizeof(apothegm) - 1); ATF_REQUIRE(memcmp(line, apothegm, sizeof(apothegm)) == 0); free(line); ATF_REQUIRE(!ferror(fp)); fclose(fp); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, getline_basic); ATF_TP_ADD_TC(tp, stream_error); ATF_TP_ADD_TC(tp, eof); ATF_TP_ADD_TC(tp, invalid_params); ATF_TP_ADD_TC(tp, nul); ATF_TP_ADD_TC(tp, empty_NULL_buffer); return (atf_no_error()); } Index: user/alc/PQ_LAUNDRY/lib/libproc/Makefile =================================================================== --- user/alc/PQ_LAUNDRY/lib/libproc/Makefile (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libproc/Makefile (revision 303642) @@ -1,47 +1,47 @@ # $FreeBSD$ .include PACKAGE=lib${LIB} LIB= proc SRCS= proc_bkpt.c \ proc_create.c \ proc_regs.c \ proc_sym.c \ proc_rtld.c \ proc_util.c INCS= libproc.h CFLAGS+= -I${.CURDIR} .if ${MK_CXX} == "no" CFLAGS+= -DNO_CXA_DEMANGLE .elif ${MK_LIBCPLUSPLUS} != "no" LIBADD+= cxxrt .else LIBADD+= supcplusplus .endif -LIBADD+= elf rtld_db util +LIBADD+= elf procstat rtld_db util .if ${MK_CDDL} != "no" LIBADD+= ctf IGNORE_PRAGMA= YES CFLAGS+= -I${.CURDIR}/../../cddl/contrib/opensolaris/lib/libctf/common \ -I${.CURDIR}/../../sys/cddl/contrib/opensolaris/uts/common \ -I${.CURDIR}/../../sys/cddl/compat/opensolaris .else CFLAGS+= -DNO_CTF .endif SHLIB_MAJOR= 3 MAN= .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .include Index: user/alc/PQ_LAUNDRY/lib/libproc/_libproc.h =================================================================== --- user/alc/PQ_LAUNDRY/lib/libproc/_libproc.h (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libproc/_libproc.h (revision 303642) @@ -1,59 +1,60 @@ /*- * Copyright (c) 2008 John Birrell (jb@freebsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ -#include -#include #include -#include #include + #include #include "libproc.h" +struct procstat; + struct proc_handle { pid_t pid; /* Process ID. */ - int kq; /* Kernel event queue ID. */ int flags; /* Process flags. */ int status; /* Process status (PS_*). */ int wstat; /* Process wait status. */ + int model; /* Process data model. */ rd_agent_t *rdap; /* librtld_db agent */ - rd_loadobj_t *rdobjs; - size_t rdobjsz; - size_t nobjs; - struct lwpstatus lwps; - rd_loadobj_t *rdexec; /* rdobj index of program executable. */ - char execname[MAXPATHLEN]; /* Path to program executable. */ + rd_loadobj_t *rdobjs; /* Array of loaded objects. */ + size_t rdobjsz; /* Array size. */ + size_t nobjs; /* Num. objects currently loaded. */ + rd_loadobj_t *rdexec; /* rdobj for program executable. */ + struct lwpstatus lwps; /* Process status. */ + struct procstat *procstat; /* libprocstat handle. */ + char execpath[MAXPATHLEN]; /* Path to program executable. */ }; #ifdef DEBUG #define DPRINTF(...) warn(__VA_ARGS__) #define DPRINTFX(...) warnx(__VA_ARGS__) #else #define DPRINTF(...) do { } while (0) #define DPRINTFX(...) do { } while (0) #endif Index: user/alc/PQ_LAUNDRY/lib/libproc/libproc.h =================================================================== --- user/alc/PQ_LAUNDRY/lib/libproc/libproc.h (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libproc/libproc.h (revision 303642) @@ -1,157 +1,161 @@ /*- * Copyright (c) 2010 The FreeBSD Foundation * Copyright (c) 2008 John Birrell (jb@freebsd.org) * All rights reserved. * * Portions of this software were developed by Rui Paulo under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LIBPROC_H_ #define _LIBPROC_H_ #include #include #include struct ctf_file; struct proc_handle; typedef void (*proc_child_func)(void *); /* Values returned by proc_state(). */ #define PS_IDLE 1 #define PS_STOP 2 #define PS_RUN 3 #define PS_UNDEAD 4 #define PS_DEAD 5 #define PS_LOST 6 /* Reason values for proc_detach(). */ #define PRELEASE_HANG 1 #define PRELEASE_KILL 2 typedef struct prmap { uintptr_t pr_vaddr; /* Virtual address. */ size_t pr_size; /* Mapping size in bytes */ size_t pr_offset; /* Mapping offset in object */ char pr_mapname[PATH_MAX]; /* Mapping filename */ uint8_t pr_mflags; /* Protection flags */ #define MA_READ 0x01 #define MA_WRITE 0x02 #define MA_EXEC 0x04 #define MA_COW 0x08 #define MA_NEEDS_COPY 0x10 #define MA_NOCOREDUMP 0x20 } prmap_t; typedef struct prsyminfo { u_int prs_lmid; /* Map id. */ u_int prs_id; /* Symbol id. */ } prsyminfo_t; typedef int proc_map_f(void *, const prmap_t *, const char *); typedef int proc_sym_f(void *, const GElf_Sym *, const char *); /* Values for ELF sections */ #define PR_SYMTAB 1 #define PR_DYNSYM 2 /* Values for the 'mask' parameter in the iteration functions */ #define BIND_LOCAL 0x0001 #define BIND_GLOBAL 0x0002 #define BIND_WEAK 0x0004 #define BIND_ANY (BIND_LOCAL|BIND_GLOBAL|BIND_WEAK) #define TYPE_NOTYPE 0x0100 #define TYPE_OBJECT 0x0200 #define TYPE_FUNC 0x0400 #define TYPE_SECTION 0x0800 #define TYPE_FILE 0x1000 #define TYPE_ANY (TYPE_NOTYPE|TYPE_OBJECT|TYPE_FUNC|TYPE_SECTION|\ TYPE_FILE) typedef enum { REG_PC, REG_SP, REG_RVAL1, REG_RVAL2 } proc_reg_t; #define SIG2STR_MAX 8 typedef struct lwpstatus { int pr_why; #define PR_REQUESTED 1 #define PR_FAULTED 2 #define PR_SYSENTRY 3 #define PR_SYSEXIT 4 #define PR_SIGNALLED 5 int pr_what; #define FLTBPT -1 } lwpstatus_t; +#define PR_MODEL_ILP32 1 +#define PR_MODEL_LP64 2 + /* Function prototype definitions. */ __BEGIN_DECLS prmap_t *proc_addr2map(struct proc_handle *, uintptr_t); prmap_t *proc_name2map(struct proc_handle *, const char *); char *proc_objname(struct proc_handle *, uintptr_t, char *, size_t); prmap_t *proc_obj2map(struct proc_handle *, const char *); int proc_iter_objs(struct proc_handle *, proc_map_f *, void *); int proc_iter_symbyaddr(struct proc_handle *, const char *, int, int, proc_sym_f *, void *); int proc_addr2sym(struct proc_handle *, uintptr_t, char *, size_t, GElf_Sym *); int proc_attach(pid_t pid, int flags, struct proc_handle **pphdl); int proc_continue(struct proc_handle *); int proc_clearflags(struct proc_handle *, int); int proc_create(const char *, char * const *, proc_child_func *, void *, struct proc_handle **); int proc_detach(struct proc_handle *, int); int proc_getflags(struct proc_handle *); int proc_name2sym(struct proc_handle *, const char *, const char *, GElf_Sym *, prsyminfo_t *); struct ctf_file *proc_name2ctf(struct proc_handle *, const char *); int proc_setflags(struct proc_handle *, int); int proc_state(struct proc_handle *); +int proc_getmodel(struct proc_handle *); pid_t proc_getpid(struct proc_handle *); int proc_wstatus(struct proc_handle *); int proc_getwstat(struct proc_handle *); char * proc_signame(int, char *, size_t); int proc_read(struct proc_handle *, void *, size_t, size_t); const lwpstatus_t *proc_getlwpstatus(struct proc_handle *); void proc_free(struct proc_handle *); rd_agent_t *proc_rdagent(struct proc_handle *); void proc_updatesyms(struct proc_handle *); int proc_bkptset(struct proc_handle *, uintptr_t, unsigned long *); int proc_bkptdel(struct proc_handle *, uintptr_t, unsigned long); void proc_bkptregadj(unsigned long *); int proc_bkptexec(struct proc_handle *, unsigned long); int proc_regget(struct proc_handle *, proc_reg_t, unsigned long *); int proc_regset(struct proc_handle *, proc_reg_t, unsigned long); __END_DECLS #endif /* !_LIBPROC_H_ */ Index: user/alc/PQ_LAUNDRY/lib/libproc/proc_create.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libproc/proc_create.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libproc/proc_create.c (revision 303642) @@ -1,189 +1,236 @@ /*- * Copyright (c) 2008 John Birrell (jb@freebsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include +#include #include #include #include #include #include #include #include #include +#include +#include + #include "_libproc.h" -static int proc_init(pid_t, int, int, struct proc_handle *); +static int getelfclass(int); +static int proc_init(pid_t, int, int, struct proc_handle **); static int -proc_init(pid_t pid, int flags, int status, struct proc_handle *phdl) +getelfclass(int fd) { - int mib[4], error; - size_t len; + GElf_Ehdr ehdr; + Elf *e; + int class; + class = ELFCLASSNONE; + + if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) + goto out; + if (gelf_getehdr(e, &ehdr) == NULL) + goto out; + class = ehdr.e_ident[EI_CLASS]; +out: + (void)elf_end(e); + return (class); +} + +static int +proc_init(pid_t pid, int flags, int status, struct proc_handle **pphdl) +{ + struct kinfo_proc *kp; + struct proc_handle *phdl; + int error, class, count, fd; + + *pphdl = NULL; + if ((phdl = malloc(sizeof(*phdl))) == NULL) + return (ENOMEM); + memset(phdl, 0, sizeof(*phdl)); phdl->pid = pid; phdl->flags = flags; phdl->status = status; + phdl->procstat = procstat_open_sysctl(); + if (phdl->procstat == NULL) + return (ENOMEM); - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_PATHNAME; - mib[3] = pid; - len = sizeof(phdl->execname); - if (sysctl(mib, 4, phdl->execname, &len, NULL, 0) != 0) { - error = errno; - DPRINTF("ERROR: cannot get pathname for child process %d", pid); + /* Obtain a path to the executable. */ + if ((kp = procstat_getprocs(phdl->procstat, KERN_PROC_PID, pid, + &count)) == NULL) + return (ENOMEM); + error = procstat_getpathname(phdl->procstat, kp, phdl->execpath, + sizeof(phdl->execpath)); + procstat_freeprocs(phdl->procstat, kp); + if (error != 0) return (error); + + /* Use it to determine the data model for the process. */ + if ((fd = open(phdl->execpath, O_RDONLY)) < 0) { + error = errno; + goto out; } - if (len == 0) - phdl->execname[0] = '\0'; + class = getelfclass(fd); + switch (class) { + case ELFCLASS64: + phdl->model = PR_MODEL_LP64; + break; + case ELFCLASS32: + phdl->model = PR_MODEL_ILP32; + break; + case ELFCLASSNONE: + default: + error = EINVAL; + break; + } + (void)close(fd); - return (0); +out: + *pphdl = phdl; + return (error); } int proc_attach(pid_t pid, int flags, struct proc_handle **pphdl) { struct proc_handle *phdl; - int error = 0; - int status; + int error, status; if (pid == 0 || pid == getpid()) return (EINVAL); + if (elf_version(EV_CURRENT) == EV_NONE) + return (ENOENT); /* * Allocate memory for the process handle, a structure containing * all things related to the process. */ - if ((phdl = malloc(sizeof(struct proc_handle))) == NULL) - return (ENOMEM); - - elf_version(EV_CURRENT); - - error = proc_init(pid, flags, PS_RUN, phdl); + error = proc_init(pid, flags, PS_RUN, &phdl); if (error != 0) goto out; if (ptrace(PT_ATTACH, phdl->pid, 0, 0) != 0) { error = errno; DPRINTF("ERROR: cannot ptrace child process %d", pid); goto out; } /* Wait for the child process to stop. */ if (waitpid(pid, &status, WUNTRACED) == -1) { error = errno; DPRINTF("ERROR: child process %d didn't stop as expected", pid); goto out; } /* Check for an unexpected status. */ - if (WIFSTOPPED(status) == 0) + if (!WIFSTOPPED(status)) DPRINTFX("ERROR: child process %d status 0x%x", pid, status); else phdl->status = PS_STOP; out: - if (error) + if (error && phdl != NULL) { proc_free(phdl); - else - *pphdl = phdl; + phdl = NULL; + } + *pphdl = phdl; return (error); } int proc_create(const char *file, char * const *argv, proc_child_func *pcf, void *child_arg, struct proc_handle **pphdl) { struct proc_handle *phdl; int error = 0; int status; pid_t pid; - /* - * Allocate memory for the process handle, a structure containing - * all things related to the process. - */ - if ((phdl = malloc(sizeof(struct proc_handle))) == NULL) - return (ENOMEM); + if (elf_version(EV_CURRENT) == EV_NONE) + return (ENOENT); - elf_version(EV_CURRENT); - /* Fork a new process. */ if ((pid = vfork()) == -1) error = errno; else if (pid == 0) { /* The child expects to be traced. */ if (ptrace(PT_TRACE_ME, 0, 0, 0) != 0) _exit(1); if (pcf != NULL) (*pcf)(child_arg); /* Execute the specified file: */ execvp(file, argv); /* Couldn't execute the file. */ _exit(2); + /* NOTREACHED */ } else { /* The parent owns the process handle. */ - error = proc_init(pid, 0, PS_IDLE, phdl); + error = proc_init(pid, 0, PS_IDLE, &phdl); if (error != 0) goto bad; /* Wait for the child process to stop. */ if (waitpid(pid, &status, WUNTRACED) == -1) { error = errno; DPRINTF("ERROR: child process %d didn't stop as expected", pid); goto bad; } /* Check for an unexpected status. */ - if (WIFSTOPPED(status) == 0) { + if (!WIFSTOPPED(status)) { error = errno; DPRINTFX("ERROR: child process %d status 0x%x", pid, status); goto bad; } else phdl->status = PS_STOP; } bad: - if (error) + if (error && phdl != NULL) { proc_free(phdl); - else - *pphdl = phdl; + phdl = NULL; + } + *pphdl = phdl; return (error); } void proc_free(struct proc_handle *phdl) { + + if (phdl->procstat != NULL) + procstat_close(phdl->procstat); free(phdl); } Index: user/alc/PQ_LAUNDRY/lib/libproc/proc_rtld.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libproc/proc_rtld.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libproc/proc_rtld.c (revision 303642) @@ -1,84 +1,84 @@ /* * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * * This software was developed by Rui Paulo under sponsorship from the * FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "libproc.h" #include "_libproc.h" static int map_iter(const rd_loadobj_t *lop, void *arg) { struct proc_handle *phdl = arg; if (phdl->nobjs >= phdl->rdobjsz) { phdl->rdobjsz *= 2; phdl->rdobjs = reallocf(phdl->rdobjs, sizeof(*phdl->rdobjs) * phdl->rdobjsz); if (phdl->rdobjs == NULL) return (-1); } - if (strcmp(lop->rdl_path, phdl->execname) == 0 && + if (strcmp(lop->rdl_path, phdl->execpath) == 0 && (lop->rdl_prot & RD_RDL_X) != 0) phdl->rdexec = &phdl->rdobjs[phdl->nobjs]; memcpy(&phdl->rdobjs[phdl->nobjs++], lop, sizeof(*lop)); return (0); } rd_agent_t * proc_rdagent(struct proc_handle *phdl) { if (phdl->rdap == NULL && phdl->status != PS_UNDEAD && phdl->status != PS_IDLE) { if ((phdl->rdap = rd_new(phdl)) != NULL) { phdl->rdobjs = malloc(sizeof(*phdl->rdobjs) * 64); phdl->rdobjsz = 64; if (phdl->rdobjs == NULL) return (phdl->rdap); rd_loadobj_iter(phdl->rdap, map_iter, phdl); } } return (phdl->rdap); } void proc_updatesyms(struct proc_handle *phdl) { memset(phdl->rdobjs, 0, sizeof(*phdl->rdobjs) * phdl->rdobjsz); phdl->nobjs = 0; rd_loadobj_iter(phdl->rdap, map_iter, phdl); } Index: user/alc/PQ_LAUNDRY/lib/libproc/proc_util.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/libproc/proc_util.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/libproc/proc_util.c (revision 303642) @@ -1,230 +1,240 @@ /*- * Copyright (c) 2010 The FreeBSD Foundation * Copyright (c) 2008 John Birrell (jb@freebsd.org) * All rights reserved. * * Portions of this software were developed by Rui Paulo under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include "_libproc.h" int proc_clearflags(struct proc_handle *phdl, int mask) { if (phdl == NULL) return (EINVAL); phdl->flags &= ~mask; return (0); } /* * NB: we return -1 as the Solaris libproc Psetrun() function. */ int proc_continue(struct proc_handle *phdl) { int pending = 0; if (phdl == NULL) return (-1); if (phdl->status == PS_STOP && WSTOPSIG(phdl->wstat) != SIGTRAP) pending = WSTOPSIG(phdl->wstat); if (ptrace(PT_CONTINUE, phdl->pid, (caddr_t)(uintptr_t)1, pending) != 0) return (-1); phdl->status = PS_RUN; return (0); } int proc_detach(struct proc_handle *phdl, int reason) { int status; if (phdl == NULL) return (EINVAL); if (reason == PRELEASE_KILL) { kill(phdl->pid, SIGKILL); return (0); } if (ptrace(PT_DETACH, phdl->pid, 0, 0) != 0 && errno == ESRCH) return (0); if (errno == EBUSY) { kill(phdl->pid, SIGSTOP); waitpid(phdl->pid, &status, WUNTRACED); ptrace(PT_DETACH, phdl->pid, 0, 0); kill(phdl->pid, SIGCONT); return (0); } return (0); } int proc_getflags(struct proc_handle *phdl) { if (phdl == NULL) return (-1); return(phdl->flags); } int proc_setflags(struct proc_handle *phdl, int mask) { if (phdl == NULL) return (EINVAL); phdl->flags |= mask; return (0); } int proc_state(struct proc_handle *phdl) { if (phdl == NULL) return (-1); return (phdl->status); } pid_t proc_getpid(struct proc_handle *phdl) { if (phdl == NULL) return (-1); return (phdl->pid); } int +proc_getmodel(struct proc_handle *phdl) +{ + + if (phdl == NULL) + return (-1); + + return (phdl->model); +} + +int proc_wstatus(struct proc_handle *phdl) { int status; if (phdl == NULL) return (-1); if (waitpid(phdl->pid, &status, WUNTRACED) < 0) { if (errno != EINTR) DPRINTF("waitpid"); return (-1); } if (WIFSTOPPED(status)) phdl->status = PS_STOP; if (WIFEXITED(status) || WIFSIGNALED(status)) phdl->status = PS_UNDEAD; phdl->wstat = status; return (phdl->status); } int proc_getwstat(struct proc_handle *phdl) { if (phdl == NULL) return (-1); return (phdl->wstat); } char * proc_signame(int sig, char *name, size_t namesz) { strlcpy(name, strsignal(sig), namesz); return (name); } int proc_read(struct proc_handle *phdl, void *buf, size_t size, size_t addr) { struct ptrace_io_desc piod; if (phdl == NULL) return (-1); piod.piod_op = PIOD_READ_D; piod.piod_len = size; piod.piod_addr = (void *)buf; piod.piod_offs = (void *)addr; if (ptrace(PT_IO, phdl->pid, (caddr_t)&piod, 0) < 0) return (-1); return (piod.piod_len); } const lwpstatus_t * proc_getlwpstatus(struct proc_handle *phdl) { struct ptrace_lwpinfo lwpinfo; lwpstatus_t *psp = &phdl->lwps; siginfo_t *siginfo; if (phdl == NULL) return (NULL); if (ptrace(PT_LWPINFO, phdl->pid, (caddr_t)&lwpinfo, sizeof(lwpinfo)) < 0) return (NULL); siginfo = &lwpinfo.pl_siginfo; if (lwpinfo.pl_event == PL_EVENT_SIGNAL && (lwpinfo.pl_flags & PL_FLAG_SI) != 0) { if (siginfo->si_signo == SIGTRAP && (siginfo->si_code == TRAP_BRKPT || siginfo->si_code == TRAP_TRACE)) { psp->pr_why = PR_FAULTED; psp->pr_what = FLTBPT; } else { psp->pr_why = PR_SIGNALLED; psp->pr_what = siginfo->si_signo; } } else if (lwpinfo.pl_flags & PL_FLAG_SCE) { psp->pr_why = PR_SYSENTRY; } else if (lwpinfo.pl_flags & PL_FLAG_SCX) { psp->pr_why = PR_SYSEXIT; } return (psp); } Index: user/alc/PQ_LAUNDRY/lib/librtld_db/Makefile =================================================================== --- user/alc/PQ_LAUNDRY/lib/librtld_db/Makefile (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/librtld_db/Makefile (revision 303642) @@ -1,17 +1,19 @@ # $FreeBSD$ .include PACKAGE=lib${LIB} LIB= rtld_db SHLIB_MAJOR= 2 MAN= librtld_db.3 SRCS= rtld_db.c INCS= rtld_db.h CFLAGS+= -I${.CURDIR} # Avoid circular dependency, we only need the libproc.h header here. CFLAGS+= -I${.CURDIR:H}/libproc +LIBADD+= elf procstat + .include Index: user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.c =================================================================== --- user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.c (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.c (revision 303642) @@ -1,254 +1,389 @@ /* * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * * This software was developed by Rui Paulo under sponsorship from the * FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - */ + */ + #include __FBSDID("$FreeBSD$"); -#include -#include +#include +#include +#include #include +#include #include +#include +#include #include #include #include -#include +#include + +#include + +#include #include +#include #include #include "rtld_db.h" static int _librtld_db_debug = 0; #define DPRINTF(...) do { \ if (_librtld_db_debug) { \ fprintf(stderr, "librtld_db: DEBUG: "); \ fprintf(stderr, __VA_ARGS__); \ } \ } while (0) void rd_delete(rd_agent_t *rdap) { + if (rdap->rda_procstat != NULL) + procstat_close(rdap->rda_procstat); free(rdap); } const char * rd_errstr(rd_err_e rderr) { switch (rderr) { case RD_ERR: return "generic error"; case RD_OK: return "no error"; case RD_NOCAPAB: return "capability not supported"; case RD_DBERR: return "database error"; case RD_NOBASE: return "NOBASE"; case RD_NOMAPS: return "NOMAPS"; default: return "unknown error"; } } rd_err_e rd_event_addr(rd_agent_t *rdap, rd_event_e event, rd_notify_t *notify) { rd_err_e ret; DPRINTF("%s rdap %p event %d notify %p\n", __func__, rdap, event, notify); ret = RD_OK; switch (event) { case RD_NONE: break; case RD_PREINIT: notify->type = RD_NOTIFY_BPT; notify->u.bptaddr = rdap->rda_preinit_addr; break; case RD_POSTINIT: notify->type = RD_NOTIFY_BPT; notify->u.bptaddr = rdap->rda_postinit_addr; break; case RD_DLACTIVITY: notify->type = RD_NOTIFY_BPT; notify->u.bptaddr = rdap->rda_dlactivity_addr; break; default: ret = RD_ERR; break; } return (ret); } rd_err_e rd_event_enable(rd_agent_t *rdap __unused, int onoff) { DPRINTF("%s onoff %d\n", __func__, onoff); return (RD_OK); } rd_err_e rd_event_getmsg(rd_agent_t *rdap __unused, rd_event_msg_t *msg) { DPRINTF("%s\n", __func__); msg->type = RD_POSTINIT; msg->u.state = RD_CONSISTENT; return (RD_OK); } rd_err_e rd_init(int version) { char *debug = NULL; if (version == RD_VERSION) { debug = getenv("LIBRTLD_DB_DEBUG"); _librtld_db_debug = debug ? atoi(debug) : 0; return (RD_OK); } else return (RD_NOCAPAB); } rd_err_e rd_loadobj_iter(rd_agent_t *rdap, rl_iter_f *cb, void *clnt_data) { - int cnt, i, lastvn = 0; - rd_loadobj_t rdl; struct kinfo_vmentry *kves, *kve; + rd_loadobj_t rdl; + rd_err_e ret; + int cnt, i, lastvn; DPRINTF("%s\n", __func__); if ((kves = kinfo_getvmmap(proc_getpid(rdap->rda_php), &cnt)) == NULL) { warn("ERROR: kinfo_getvmmap() failed"); return (RD_ERR); } + + ret = RD_OK; + lastvn = 0; for (i = 0; i < cnt; i++) { kve = kves + i; if (kve->kve_type == KVME_TYPE_VNODE) lastvn = i; memset(&rdl, 0, sizeof(rdl)); /* * Map the kinfo_vmentry struct to the rd_loadobj structure. */ rdl.rdl_saddr = kve->kve_start; rdl.rdl_eaddr = kve->kve_end; rdl.rdl_offset = kve->kve_offset; if (kve->kve_protection & KVME_PROT_READ) rdl.rdl_prot |= RD_RDL_R; if (kve->kve_protection & KVME_PROT_WRITE) rdl.rdl_prot |= RD_RDL_W; if (kve->kve_protection & KVME_PROT_EXEC) rdl.rdl_prot |= RD_RDL_X; strlcpy(rdl.rdl_path, kves[lastvn].kve_path, - sizeof(rdl.rdl_path)); - (*cb)(&rdl, clnt_data); + sizeof(rdl.rdl_path)); + if ((*cb)(&rdl, clnt_data) != 0) { + ret = RD_ERR; + break; + } } free(kves); - - return (RD_OK); + return (ret); } void rd_log(const int onoff) { DPRINTF("%s\n", __func__); (void)onoff; } rd_agent_t * rd_new(struct proc_handle *php) { rd_agent_t *rdap; - rdap = malloc(sizeof(rd_agent_t)); - if (rdap) { - memset(rdap, 0, sizeof(rd_agent_t)); - rdap->rda_php = php; - rd_reset(rdap); - } + rdap = malloc(sizeof(*rdap)); + if (rdap == NULL) + return (NULL); + memset(rdap, 0, sizeof(rd_agent_t)); + rdap->rda_php = php; + rdap->rda_procstat = procstat_open_sysctl(); + + if (rd_reset(rdap) != RD_OK) { + rd_delete(rdap); + rdap = NULL; + } return (rdap); } rd_err_e rd_objpad_enable(rd_agent_t *rdap, size_t padsize) { DPRINTF("%s\n", __func__); (void)rdap; (void)padsize; return (RD_ERR); } rd_err_e rd_plt_resolution(rd_agent_t *rdap, uintptr_t pc, struct proc *proc, uintptr_t plt_base, rd_plt_info_t *rpi) { DPRINTF("%s\n", __func__); (void)rdap; (void)pc; (void)proc; (void)plt_base; (void)rpi; return (RD_ERR); } +static int +rtld_syms(rd_agent_t *rdap, const char *rtldpath, u_long base) +{ + GElf_Shdr shdr; + GElf_Sym sym; + Elf *e; + Elf_Data *data; + Elf_Scn *scn; + const char *symname; + Elf64_Word strscnidx; + int fd, i, ret; + + ret = 1; + e = NULL; + + fd = open(rtldpath, O_RDONLY); + if (fd < 0) + goto err; + + if (elf_version(EV_CURRENT) == EV_NONE) + goto err; + e = elf_begin(fd, ELF_C_READ, NULL); + if (e == NULL) { + close(fd); + goto err; + } + + scn = NULL; + while ((scn = elf_nextscn(e, scn)) != NULL) { + gelf_getshdr(scn, &shdr); + if (shdr.sh_type == SHT_DYNSYM) + break; + } + if (scn == NULL) + goto err; + + strscnidx = shdr.sh_link; + data = elf_getdata(scn, NULL); + if (data == NULL) + goto err; + + for (i = 0; gelf_getsym(data, i, &sym) != NULL; i++) { + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC || + GELF_ST_BIND(sym.st_info) != STB_GLOBAL) + continue; + symname = elf_strptr(e, strscnidx, sym.st_name); + if (symname == NULL) + continue; + + if (strcmp(symname, "r_debug_state") == 0) { + rdap->rda_preinit_addr = sym.st_value + base; + rdap->rda_dlactivity_addr = sym.st_value + base; + } else if (strcmp(symname, "_r_debug_postinit") == 0) { + rdap->rda_postinit_addr = sym.st_value + base; + } + } + + if (rdap->rda_preinit_addr != 0 && + rdap->rda_postinit_addr != 0 && + rdap->rda_dlactivity_addr != 0) + ret = 0; + +err: + if (e != NULL) + (void)elf_end(e); + if (fd >= 0) + (void)close(fd); + return (ret); +} + rd_err_e rd_reset(rd_agent_t *rdap) { - GElf_Sym sym; + struct kinfo_proc *kp; + struct kinfo_vmentry *kve; + Elf_Auxinfo *auxv; + const char *rtldpath; + u_long base; + rd_err_e rderr; + int count, i; - if (proc_name2sym(rdap->rda_php, "ld-elf.so.1", "r_debug_state", - &sym, NULL) < 0) - return (RD_ERR); - DPRINTF("found r_debug_state at 0x%lx\n", (unsigned long)sym.st_value); - rdap->rda_preinit_addr = sym.st_value; - rdap->rda_dlactivity_addr = sym.st_value; + kp = NULL; + auxv = NULL; + kve = NULL; + rderr = RD_ERR; - if (proc_name2sym(rdap->rda_php, "ld-elf.so.1", "_r_debug_postinit", - &sym, NULL) < 0) + kp = procstat_getprocs(rdap->rda_procstat, KERN_PROC_PID, + proc_getpid(rdap->rda_php), &count); + if (kp == NULL) return (RD_ERR); - DPRINTF("found _r_debug_postinit at 0x%lx\n", - (unsigned long)sym.st_value); - rdap->rda_postinit_addr = sym.st_value; + assert(count == 1); - return (RD_OK); + auxv = procstat_getauxv(rdap->rda_procstat, kp, &count); + if (auxv == NULL) + goto err; + + base = 0; + for (i = 0; i < count; i++) { + if (auxv[i].a_type == AT_BASE) { + base = auxv[i].a_un.a_val; + break; + } + } + if (i == count) + goto err; + + rtldpath = NULL; + kve = procstat_getvmmap(rdap->rda_procstat, kp, &count); + if (kve == NULL) + goto err; + for (i = 0; i < count; i++) { + if (kve[i].kve_start == base) { + rtldpath = kve[i].kve_path; + break; + } + } + if (i == count) + goto err; + + if (rtld_syms(rdap, rtldpath, base) != 0) + goto err; + + rderr = RD_OK; + +err: + if (kve != NULL) + procstat_freevmmap(rdap->rda_procstat, kve); + if (auxv != NULL) + procstat_freeauxv(rdap->rda_procstat, auxv); + if (kp != NULL) + procstat_freeprocs(rdap->rda_procstat, kp); + return (rderr); } Index: user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.h =================================================================== --- user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.h (revision 303641) +++ user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.h (revision 303642) @@ -1,152 +1,155 @@ /* * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * * This software was developed by Rui Paulo under sponsorship from the * FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _RTLD_DB_H_ #define _RTLD_DB_H_ #include -#include -#include - #define RD_VERSION 1 typedef enum { RD_OK, RD_ERR, RD_DBERR, RD_NOCAPAB, RD_NODYNAM, RD_NOBASE, RD_NOMAPS } rd_err_e; +/* XXX struct rd_agent should be private. */ +struct procstat; + typedef struct rd_agent { struct proc_handle *rda_php; + uintptr_t rda_dlactivity_addr; uintptr_t rda_preinit_addr; uintptr_t rda_postinit_addr; + + struct procstat *rda_procstat; } rd_agent_t; typedef struct rd_loadobj { uintptr_t rdl_saddr; /* start address */ uintptr_t rdl_eaddr; /* end address */ uint32_t rdl_offset; uint8_t rdl_prot; #define RD_RDL_R 0x01 #define RD_RDL_W 0x02 #define RD_RDL_X 0x04 enum { RDL_TYPE_NONE = 0, RDL_TYPE_DEF, RDL_TYPE_VNODE, RDL_TYPE_SWAP, RDL_TYPE_DEV, /* XXX some types missing */ RDL_TYPE_UNKNOWN = 255 } rdl_type; unsigned char rdl_path[PATH_MAX]; } rd_loadobj_t; typedef enum { RD_NONE = 0, RD_PREINIT, RD_POSTINIT, RD_DLACTIVITY } rd_event_e; typedef enum { RD_NOTIFY_BPT, RD_NOTIFY_AUTOBPT, RD_NOTIFY_SYSCALL } rd_notify_e; typedef struct rd_notify { rd_notify_e type; union { uintptr_t bptaddr; long syscallno; } u; } rd_notify_t; typedef enum { RD_NOSTATE = 0, RD_CONSISTENT, RD_ADD, RD_DELETE } rd_state_e; typedef struct rd_event_msg { rd_event_e type; union { rd_state_e state; } u; } rd_event_msg_t; typedef enum { RD_RESOLVE_NONE, RD_RESOLVE_STEP, RD_RESOLVE_TARGET, RD_RESOLVE_TARGET_STEP } rd_skip_e; typedef struct rd_plt_info { rd_skip_e pi_skip_method; long pi_nstep; uintptr_t pi_target; uintptr_t pi_baddr; unsigned int pi_flags; } rd_plt_info_t; #define RD_FLG_PI_PLTBOUND 0x0001 __BEGIN_DECLS struct proc_handle; void rd_delete(rd_agent_t *); const char *rd_errstr(rd_err_e); rd_err_e rd_event_addr(rd_agent_t *, rd_event_e, rd_notify_t *); rd_err_e rd_event_enable(rd_agent_t *, int); rd_err_e rd_event_getmsg(rd_agent_t *, rd_event_msg_t *); rd_err_e rd_init(int); typedef int rl_iter_f(const rd_loadobj_t *, void *); rd_err_e rd_loadobj_iter(rd_agent_t *, rl_iter_f *, void *); void rd_log(const int); rd_agent_t *rd_new(struct proc_handle *); rd_err_e rd_objpad_enable(rd_agent_t *, size_t); struct proc; rd_err_e rd_plt_resolution(rd_agent_t *, uintptr_t, struct proc *, uintptr_t, rd_plt_info_t *); rd_err_e rd_reset(rd_agent_t *); __END_DECLS #endif /* _RTLD_DB_H_ */ Index: user/alc/PQ_LAUNDRY/sbin/fsdb/fsdb.c =================================================================== --- user/alc/PQ_LAUNDRY/sbin/fsdb/fsdb.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sbin/fsdb/fsdb.c (revision 303642) @@ -1,1210 +1,1210 @@ /* $NetBSD: fsdb.c,v 1.2 1995/10/08 23:18:10 thorpej Exp $ */ /* * Copyright (c) 1995 John T. Kohl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsdb.h" #include "fsck.h" static void usage(void) __dead2; int cmdloop(void); static int compare_blk32(uint32_t *wantedblk, uint32_t curblk); static int compare_blk64(uint64_t *wantedblk, uint64_t curblk); static int founddatablk(uint64_t blk); static int find_blks32(uint32_t *buf, int size, uint32_t *blknum); static int find_blks64(uint64_t *buf, int size, uint64_t *blknum); static int find_indirblks32(uint32_t blk, int ind_level, uint32_t *blknum); static int find_indirblks64(uint64_t blk, int ind_level, uint64_t *blknum); static void usage(void) { fprintf(stderr, "usage: fsdb [-d] [-f] [-r] fsname\n"); exit(1); } int returntosingle; char nflag; /* * We suck in lots of fsck code, and just pick & choose the stuff we want. * * fsreadfd is set up to read from the file system, fswritefd to write to * the file system. */ int main(int argc, char *argv[]) { int ch, rval; char *fsys = NULL; while (-1 != (ch = getopt(argc, argv, "fdr"))) { switch (ch) { case 'f': /* The -f option is left for historical * reasons and has no meaning. */ break; case 'd': debug++; break; case 'r': nflag++; /* "no" in fsck, readonly for us */ break; default: usage(); } } argc -= optind; argv += optind; if (argc != 1) usage(); else fsys = argv[0]; sblock_init(); if (!setup(fsys)) errx(1, "cannot set up file system `%s'", fsys); printf("%s file system `%s'\nLast Mounted on %s\n", nflag? "Examining": "Editing", fsys, sblock.fs_fsmnt); rval = cmdloop(); if (!nflag) { sblock.fs_clean = 0; /* mark it dirty */ sbdirty(); ckfini(0); printf("*** FILE SYSTEM MARKED DIRTY\n"); printf("*** BE SURE TO RUN FSCK TO CLEAN UP ANY DAMAGE\n"); printf("*** IF IT WAS MOUNTED, RE-MOUNT WITH -u -o reload\n"); } exit(rval); } #define CMDFUNC(func) int func(int argc, char *argv[]) #define CMDFUNCSTART(func) int func(int argc, char *argv[]) CMDFUNC(helpfn); CMDFUNC(focus); /* focus on inode */ CMDFUNC(active); /* print active inode */ CMDFUNC(blocks); /* print blocks for active inode */ CMDFUNC(focusname); /* focus by name */ CMDFUNC(zapi); /* clear inode */ CMDFUNC(uplink); /* incr link */ CMDFUNC(downlink); /* decr link */ CMDFUNC(linkcount); /* set link count */ CMDFUNC(quit); /* quit */ CMDFUNC(findblk); /* find block */ CMDFUNC(ls); /* list directory */ CMDFUNC(rm); /* remove name */ CMDFUNC(ln); /* add name */ CMDFUNC(newtype); /* change type */ CMDFUNC(chmode); /* change mode */ CMDFUNC(chlen); /* change length */ CMDFUNC(chaflags); /* change flags */ CMDFUNC(chgen); /* change generation */ CMDFUNC(chowner); /* change owner */ CMDFUNC(chgroup); /* Change group */ CMDFUNC(back); /* pop back to last ino */ CMDFUNC(chbtime); /* Change btime */ CMDFUNC(chmtime); /* Change mtime */ CMDFUNC(chctime); /* Change ctime */ CMDFUNC(chatime); /* Change atime */ CMDFUNC(chinum); /* Change inode # of dirent */ CMDFUNC(chname); /* Change dirname of dirent */ struct cmdtable cmds[] = { { "help", "Print out help", 1, 1, FL_RO, helpfn }, { "?", "Print out help", 1, 1, FL_RO, helpfn }, { "inode", "Set active inode to INUM", 2, 2, FL_RO, focus }, { "clri", "Clear inode INUM", 2, 2, FL_WR, zapi }, { "lookup", "Set active inode by looking up NAME", 2, 2, FL_RO | FL_ST, focusname }, { "cd", "Set active inode by looking up NAME", 2, 2, FL_RO | FL_ST, focusname }, { "back", "Go to previous active inode", 1, 1, FL_RO, back }, { "active", "Print active inode", 1, 1, FL_RO, active }, { "print", "Print active inode", 1, 1, FL_RO, active }, { "blocks", "Print block numbers of active inode", 1, 1, FL_RO, blocks }, { "uplink", "Increment link count", 1, 1, FL_WR, uplink }, { "downlink", "Decrement link count", 1, 1, FL_WR, downlink }, { "linkcount", "Set link count to COUNT", 2, 2, FL_WR, linkcount }, { "findblk", "Find inode owning disk block(s)", 2, 33, FL_RO, findblk}, { "ls", "List current inode as directory", 1, 1, FL_RO, ls }, { "rm", "Remove NAME from current inode directory", 2, 2, FL_WR | FL_ST, rm }, { "del", "Remove NAME from current inode directory", 2, 2, FL_WR | FL_ST, rm }, { "ln", "Hardlink INO into current inode directory as NAME", 3, 3, FL_WR | FL_ST, ln }, { "chinum", "Change dir entry number INDEX to INUM", 3, 3, FL_WR, chinum }, { "chname", "Change dir entry number INDEX to NAME", 3, 3, FL_WR | FL_ST, chname }, { "chtype", "Change type of current inode to TYPE", 2, 2, FL_WR, newtype }, { "chmod", "Change mode of current inode to MODE", 2, 2, FL_WR, chmode }, { "chlen", "Change length of current inode to LENGTH", 2, 2, FL_WR, chlen }, { "chown", "Change owner of current inode to OWNER", 2, 2, FL_WR, chowner }, { "chgrp", "Change group of current inode to GROUP", 2, 2, FL_WR, chgroup }, { "chflags", "Change flags of current inode to FLAGS", 2, 2, FL_WR, chaflags }, { "chgen", "Change generation number of current inode to GEN", 2, 2, FL_WR, chgen }, { "btime", "Change btime of current inode to BTIME", 2, 2, FL_WR, chbtime }, { "mtime", "Change mtime of current inode to MTIME", 2, 2, FL_WR, chmtime }, { "ctime", "Change ctime of current inode to CTIME", 2, 2, FL_WR, chctime }, { "atime", "Change atime of current inode to ATIME", 2, 2, FL_WR, chatime }, { "quit", "Exit", 1, 1, FL_RO, quit }, { "q", "Exit", 1, 1, FL_RO, quit }, { "exit", "Exit", 1, 1, FL_RO, quit }, { NULL, 0, 0, 0, 0, NULL }, }; int helpfn(int argc, char *argv[]) { struct cmdtable *cmdtp; printf("Commands are:\n%-10s %5s %5s %s\n", "command", "min args", "max args", "what"); for (cmdtp = cmds; cmdtp->cmd; cmdtp++) printf("%-10s %5u %5u %s\n", cmdtp->cmd, cmdtp->minargc-1, cmdtp->maxargc-1, cmdtp->helptxt); return 0; } char * prompt(EditLine *el) { static char pstring[64]; snprintf(pstring, sizeof(pstring), "fsdb (inum: %ju)> ", (uintmax_t)curinum); return pstring; } int cmdloop(void) { char *line; const char *elline; int cmd_argc, rval = 0, known; #define scratch known char **cmd_argv; struct cmdtable *cmdp; History *hist; EditLine *elptr; HistEvent he; curinode = ginode(ROOTINO); curinum = ROOTINO; printactive(0); hist = history_init(); history(hist, &he, H_SETSIZE, 100); /* 100 elt history buffer */ elptr = el_init("fsdb", stdin, stdout, stderr); el_set(elptr, EL_EDITOR, "emacs"); el_set(elptr, EL_PROMPT, prompt); el_set(elptr, EL_HIST, history, hist); el_source(elptr, NULL); while ((elline = el_gets(elptr, &scratch)) != NULL && scratch != 0) { if (debug) printf("command `%s'\n", elline); history(hist, &he, H_ENTER, elline); line = strdup(elline); cmd_argv = crack(line, &cmd_argc); /* * el_parse returns -1 to signal that it's not been handled * internally. */ if (el_parse(elptr, cmd_argc, (const char **)cmd_argv) != -1) continue; if (cmd_argc) { known = 0; for (cmdp = cmds; cmdp->cmd; cmdp++) { if (!strcmp(cmdp->cmd, cmd_argv[0])) { if ((cmdp->flags & FL_WR) == FL_WR && nflag) warnx("`%s' requires write access", cmd_argv[0]), rval = 1; else if (cmd_argc >= cmdp->minargc && cmd_argc <= cmdp->maxargc) rval = (*cmdp->handler)(cmd_argc, cmd_argv); else if (cmd_argc >= cmdp->minargc && (cmdp->flags & FL_ST) == FL_ST) { strcpy(line, elline); cmd_argv = recrack(line, &cmd_argc, cmdp->maxargc); rval = (*cmdp->handler)(cmd_argc, cmd_argv); } else rval = argcount(cmdp, cmd_argc, cmd_argv); known = 1; break; } } if (!known) warnx("unknown command `%s'", cmd_argv[0]), rval = 1; } else rval = 0; free(line); if (rval < 0) /* user typed "quit" */ return 0; if (rval) warnx("rval was %d", rval); } el_end(elptr); history_end(hist); return rval; } union dinode *curinode; ino_t curinum, ocurrent; #define GETINUM(ac,inum) inum = strtoul(argv[ac], &cp, 0); \ if (inum < ROOTINO || inum > maxino || cp == argv[ac] || *cp != '\0' ) { \ printf("inode %ju out of range; range is [%ju,%ju]\n", \ (uintmax_t)inum, (uintmax_t)ROOTINO, (uintmax_t)maxino); \ return 1; \ } /* * Focus on given inode number */ CMDFUNCSTART(focus) { ino_t inum; char *cp; GETINUM(1,inum); curinode = ginode(inum); ocurrent = curinum; curinum = inum; printactive(0); return 0; } CMDFUNCSTART(back) { curinum = ocurrent; curinode = ginode(curinum); printactive(0); return 0; } CMDFUNCSTART(zapi) { ino_t inum; union dinode *dp; char *cp; GETINUM(1,inum); dp = ginode(inum); clearinode(dp); inodirty(); if (curinode) /* re-set after potential change */ curinode = ginode(curinum); return 0; } CMDFUNCSTART(active) { printactive(0); return 0; } CMDFUNCSTART(blocks) { printactive(1); return 0; } CMDFUNCSTART(quit) { return -1; } CMDFUNCSTART(uplink) { if (!checkactive()) return 1; DIP_SET(curinode, di_nlink, DIP(curinode, di_nlink) + 1); printf("inode %ju link count now %d\n", (uintmax_t)curinum, DIP(curinode, di_nlink)); inodirty(); return 0; } CMDFUNCSTART(downlink) { if (!checkactive()) return 1; DIP_SET(curinode, di_nlink, DIP(curinode, di_nlink) - 1); printf("inode %ju link count now %d\n", (uintmax_t)curinum, DIP(curinode, di_nlink)); inodirty(); return 0; } const char *typename[] = { "unknown", "fifo", "char special", "unregistered #3", "directory", "unregistered #5", "blk special", "unregistered #7", "regular", "unregistered #9", "symlink", "unregistered #11", "socket", "unregistered #13", "whiteout", }; int diroff; int slot; int scannames(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; printf("slot %d off %d ino %d reclen %d: %s, `%.*s'\n", slot++, diroff, dirp->d_ino, dirp->d_reclen, typename[dirp->d_type], dirp->d_namlen, dirp->d_name); diroff += dirp->d_reclen; return (KEEPON); } CMDFUNCSTART(ls) { struct inodesc idesc; checkactivedir(); /* let it go on anyway */ slot = 0; diroff = 0; idesc.id_number = curinum; idesc.id_func = scannames; idesc.id_type = DATA; idesc.id_fix = IGNORE; ckinode(curinode, &idesc); curinode = ginode(curinum); return 0; } static int findblk_numtofind; static int wantedblksize; CMDFUNCSTART(findblk) { ino_t inum, inosused; uint32_t *wantedblk32; uint64_t *wantedblk64; struct bufarea *cgbp; struct cg *cgp; int c, i, is_ufs2; wantedblksize = (argc - 1); is_ufs2 = sblock.fs_magic == FS_UFS2_MAGIC; ocurrent = curinum; if (is_ufs2) { wantedblk64 = calloc(wantedblksize, sizeof(uint64_t)); if (wantedblk64 == NULL) err(1, "malloc"); for (i = 1; i < argc; i++) wantedblk64[i - 1] = dbtofsb(&sblock, strtoull(argv[i], NULL, 0)); } else { wantedblk32 = calloc(wantedblksize, sizeof(uint32_t)); if (wantedblk32 == NULL) err(1, "malloc"); for (i = 1; i < argc; i++) wantedblk32[i - 1] = dbtofsb(&sblock, strtoull(argv[i], NULL, 0)); } findblk_numtofind = wantedblksize; /* * sblock.fs_ncg holds a number of cylinder groups. * Iterate over all cylinder groups. */ for (c = 0; c < sblock.fs_ncg; c++) { /* * sblock.fs_ipg holds a number of inodes per cylinder group. * Calculate a highest inode number for a given cylinder group. */ inum = c * sblock.fs_ipg; /* Read cylinder group. */ cgbp = cgget(c); cgp = cgbp->b_un.b_cg; /* * Get a highest used inode number for a given cylinder group. * For UFS1 all inodes initialized at the newfs stage. */ if (is_ufs2) inosused = cgp->cg_initediblk; else inosused = sblock.fs_ipg; for (; inosused > 0; inum++, inosused--) { /* Skip magic inodes: 0, WINO, ROOTINO. */ if (inum < ROOTINO) continue; /* * Check if the block we are looking for is just an inode block. * * ino_to_fsba() - get block containing inode from its number. * INOPB() - get a number of inodes in one disk block. */ if (is_ufs2 ? compare_blk64(wantedblk64, ino_to_fsba(&sblock, inum)) : compare_blk32(wantedblk32, ino_to_fsba(&sblock, inum))) { printf("block %llu: inode block (%ju-%ju)\n", (unsigned long long)fsbtodb(&sblock, ino_to_fsba(&sblock, inum)), (uintmax_t)(inum / INOPB(&sblock)) * INOPB(&sblock), (uintmax_t)(inum / INOPB(&sblock) + 1) * INOPB(&sblock)); findblk_numtofind--; if (findblk_numtofind == 0) goto end; } /* Get on-disk inode aka dinode. */ curinum = inum; curinode = ginode(inum); /* Find IFLNK dinode with allocated data blocks. */ switch (DIP(curinode, di_mode) & IFMT) { case IFDIR: case IFREG: if (DIP(curinode, di_blocks) == 0) continue; break; case IFLNK: { uint64_t size = DIP(curinode, di_size); if (size > 0 && size < sblock.fs_maxsymlinklen && DIP(curinode, di_blocks) == 0) continue; else break; } default: continue; } /* Look through direct data blocks. */ if (is_ufs2 ? find_blks64(curinode->dp2.di_db, NDADDR, wantedblk64) : find_blks32(curinode->dp1.di_db, NDADDR, wantedblk32)) goto end; for (i = 0; i < NIADDR; i++) { /* * Does the block we are looking for belongs to the * indirect blocks? */ if (is_ufs2 ? compare_blk64(wantedblk64, curinode->dp2.di_ib[i]) : compare_blk32(wantedblk32, curinode->dp1.di_ib[i])) if (founddatablk(is_ufs2 ? curinode->dp2.di_ib[i] : curinode->dp1.di_ib[i])) goto end; /* * Search through indirect, double and triple indirect * data blocks. */ if (is_ufs2 ? (curinode->dp2.di_ib[i] != 0) : (curinode->dp1.di_ib[i] != 0)) if (is_ufs2 ? find_indirblks64(curinode->dp2.di_ib[i], i, wantedblk64) : find_indirblks32(curinode->dp1.di_ib[i], i, wantedblk32)) goto end; } } } end: curinum = ocurrent; curinode = ginode(curinum); return 0; } static int compare_blk32(uint32_t *wantedblk, uint32_t curblk) { int i; for (i = 0; i < wantedblksize; i++) { if (wantedblk[i] != 0 && wantedblk[i] == curblk) { wantedblk[i] = 0; return 1; } } return 0; } static int compare_blk64(uint64_t *wantedblk, uint64_t curblk) { int i; for (i = 0; i < wantedblksize; i++) { if (wantedblk[i] != 0 && wantedblk[i] == curblk) { wantedblk[i] = 0; return 1; } } return 0; } static int founddatablk(uint64_t blk) { printf("%llu: data block of inode %ju\n", (unsigned long long)fsbtodb(&sblock, blk), (uintmax_t)curinum); findblk_numtofind--; if (findblk_numtofind == 0) return 1; return 0; } static int find_blks32(uint32_t *buf, int size, uint32_t *wantedblk) { int blk; for (blk = 0; blk < size; blk++) { if (buf[blk] == 0) continue; if (compare_blk32(wantedblk, buf[blk])) { if (founddatablk(buf[blk])) return 1; } } return 0; } static int find_indirblks32(uint32_t blk, int ind_level, uint32_t *wantedblk) { #define MAXNINDIR (MAXBSIZE / sizeof(uint32_t)) uint32_t idblk[MAXNINDIR]; int i; blread(fsreadfd, (char *)idblk, fsbtodb(&sblock, blk), (int)sblock.fs_bsize); if (ind_level <= 0) { if (find_blks32(idblk, sblock.fs_bsize / sizeof(uint32_t), wantedblk)) return 1; } else { ind_level--; for (i = 0; i < sblock.fs_bsize / sizeof(uint32_t); i++) { if (compare_blk32(wantedblk, idblk[i])) { if (founddatablk(idblk[i])) return 1; } if (idblk[i] != 0) if (find_indirblks32(idblk[i], ind_level, wantedblk)) return 1; } } #undef MAXNINDIR return 0; } static int find_blks64(uint64_t *buf, int size, uint64_t *wantedblk) { int blk; for (blk = 0; blk < size; blk++) { if (buf[blk] == 0) continue; if (compare_blk64(wantedblk, buf[blk])) { if (founddatablk(buf[blk])) return 1; } } return 0; } static int find_indirblks64(uint64_t blk, int ind_level, uint64_t *wantedblk) { #define MAXNINDIR (MAXBSIZE / sizeof(uint64_t)) uint64_t idblk[MAXNINDIR]; int i; blread(fsreadfd, (char *)idblk, fsbtodb(&sblock, blk), (int)sblock.fs_bsize); if (ind_level <= 0) { if (find_blks64(idblk, sblock.fs_bsize / sizeof(uint64_t), wantedblk)) return 1; } else { ind_level--; for (i = 0; i < sblock.fs_bsize / sizeof(uint64_t); i++) { if (compare_blk64(wantedblk, idblk[i])) { if (founddatablk(idblk[i])) return 1; } if (idblk[i] != 0) if (find_indirblks64(idblk[i], ind_level, wantedblk)) return 1; } } #undef MAXNINDIR return 0; } int findino(struct inodesc *idesc); /* from fsck */ static int dolookup(char *name); static int dolookup(char *name) { struct inodesc idesc; if (!checkactivedir()) return 0; idesc.id_number = curinum; idesc.id_func = findino; idesc.id_name = name; idesc.id_type = DATA; idesc.id_fix = IGNORE; if (ckinode(curinode, &idesc) & FOUND) { curinum = idesc.id_parent; curinode = ginode(curinum); printactive(0); return 1; } else { warnx("name `%s' not found in current inode directory", name); return 0; } } CMDFUNCSTART(focusname) { char *p, *val; if (!checkactive()) return 1; ocurrent = curinum; if (argv[1][0] == '/') { curinum = ROOTINO; curinode = ginode(ROOTINO); } else { if (!checkactivedir()) return 1; } for (p = argv[1]; p != NULL;) { while ((val = strsep(&p, "/")) != NULL && *val == '\0'); if (val) { printf("component `%s': ", val); fflush(stdout); if (!dolookup(val)) { curinode = ginode(curinum); return(1); } } } return 0; } CMDFUNCSTART(ln) { ino_t inum; int rval; char *cp; GETINUM(1,inum); if (!checkactivedir()) return 1; rval = makeentry(curinum, inum, argv[2]); if (rval) printf("Ino %ju entered as `%s'\n", (uintmax_t)inum, argv[2]); else printf("could not enter name? weird.\n"); curinode = ginode(curinum); return rval; } CMDFUNCSTART(rm) { int rval; if (!checkactivedir()) return 1; rval = changeino(curinum, argv[1], 0); if (rval & ALTERED) { printf("Name `%s' removed\n", argv[1]); return 0; } else { printf("could not remove name ('%s')? weird.\n", argv[1]); return 1; } } long slotcount, desired; int chinumfunc(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (slotcount++ == desired) { dirp->d_ino = idesc->id_parent; return STOP|ALTERED|FOUND; } return KEEPON; } CMDFUNCSTART(chinum) { char *cp; ino_t inum; struct inodesc idesc; slotcount = 0; if (!checkactivedir()) return 1; GETINUM(2,inum); desired = strtol(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0' || desired < 0) { printf("invalid slot number `%s'\n", argv[1]); return 1; } idesc.id_number = curinum; idesc.id_func = chinumfunc; idesc.id_fix = IGNORE; idesc.id_type = DATA; idesc.id_parent = inum; /* XXX convenient hiding place */ if (ckinode(curinode, &idesc) & FOUND) return 0; else { warnx("no %sth slot in current directory", argv[1]); return 1; } } int chnamefunc(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; struct direct testdir; if (slotcount++ == desired) { /* will name fit? */ testdir.d_namlen = strlen(idesc->id_name); if (DIRSIZ(NEWDIRFMT, &testdir) <= dirp->d_reclen) { dirp->d_namlen = testdir.d_namlen; strcpy(dirp->d_name, idesc->id_name); return STOP|ALTERED|FOUND; } else return STOP|FOUND; /* won't fit, so give up */ } return KEEPON; } CMDFUNCSTART(chname) { int rval; char *cp; struct inodesc idesc; slotcount = 0; if (!checkactivedir()) return 1; desired = strtoul(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0') { printf("invalid slot number `%s'\n", argv[1]); return 1; } idesc.id_number = curinum; idesc.id_func = chnamefunc; idesc.id_fix = IGNORE; idesc.id_type = DATA; idesc.id_name = argv[2]; rval = ckinode(curinode, &idesc); if ((rval & (FOUND|ALTERED)) == (FOUND|ALTERED)) return 0; else if (rval & FOUND) { warnx("new name `%s' does not fit in slot %s\n", argv[2], argv[1]); return 1; } else { warnx("no %sth slot in current directory", argv[1]); return 1; } } struct typemap { const char *typename; int typebits; } typenamemap[] = { {"file", IFREG}, {"dir", IFDIR}, {"socket", IFSOCK}, {"fifo", IFIFO}, }; CMDFUNCSTART(newtype) { int type; struct typemap *tp; if (!checkactive()) return 1; type = DIP(curinode, di_mode) & IFMT; for (tp = typenamemap; - tp < &typenamemap[sizeof(typenamemap)/sizeof(*typenamemap)]; + tp < &typenamemap[nitems(typenamemap)]; tp++) { if (!strcmp(argv[1], tp->typename)) { printf("setting type to %s\n", tp->typename); type = tp->typebits; break; } } - if (tp == &typenamemap[sizeof(typenamemap)/sizeof(*typenamemap)]) { + if (tp == &typenamemap[nitems(typenamemap)]) { warnx("type `%s' not known", argv[1]); warnx("try one of `file', `dir', `socket', `fifo'"); return 1; } DIP_SET(curinode, di_mode, DIP(curinode, di_mode) & ~IFMT); DIP_SET(curinode, di_mode, DIP(curinode, di_mode) | type); inodirty(); printactive(0); return 0; } CMDFUNCSTART(chlen) { int rval = 1; long len; char *cp; if (!checkactive()) return 1; len = strtol(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0' || len < 0) { warnx("bad length `%s'", argv[1]); return 1; } DIP_SET(curinode, di_size, len); inodirty(); printactive(0); return rval; } CMDFUNCSTART(chmode) { int rval = 1; long modebits; char *cp; if (!checkactive()) return 1; modebits = strtol(argv[1], &cp, 8); if (cp == argv[1] || *cp != '\0' || (modebits & ~07777)) { warnx("bad modebits `%s'", argv[1]); return 1; } DIP_SET(curinode, di_mode, DIP(curinode, di_mode) & ~07777); DIP_SET(curinode, di_mode, DIP(curinode, di_mode) | modebits); inodirty(); printactive(0); return rval; } CMDFUNCSTART(chaflags) { int rval = 1; u_long flags; char *cp; if (!checkactive()) return 1; flags = strtoul(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0' ) { warnx("bad flags `%s'", argv[1]); return 1; } if (flags > UINT_MAX) { warnx("flags set beyond 32-bit range of field (%lx)\n", flags); return(1); } DIP_SET(curinode, di_flags, flags); inodirty(); printactive(0); return rval; } CMDFUNCSTART(chgen) { int rval = 1; long gen; char *cp; if (!checkactive()) return 1; gen = strtol(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0' ) { warnx("bad gen `%s'", argv[1]); return 1; } if (gen > INT_MAX || gen < INT_MIN) { warnx("gen set beyond 32-bit range of field (%lx)\n", gen); return(1); } DIP_SET(curinode, di_gen, gen); inodirty(); printactive(0); return rval; } CMDFUNCSTART(linkcount) { int rval = 1; int lcnt; char *cp; if (!checkactive()) return 1; lcnt = strtol(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0' ) { warnx("bad link count `%s'", argv[1]); return 1; } if (lcnt > USHRT_MAX || lcnt < 0) { warnx("max link count is %d\n", USHRT_MAX); return 1; } DIP_SET(curinode, di_nlink, lcnt); inodirty(); printactive(0); return rval; } CMDFUNCSTART(chowner) { int rval = 1; unsigned long uid; char *cp; struct passwd *pwd; if (!checkactive()) return 1; uid = strtoul(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0' ) { /* try looking up name */ if ((pwd = getpwnam(argv[1]))) { uid = pwd->pw_uid; } else { warnx("bad uid `%s'", argv[1]); return 1; } } DIP_SET(curinode, di_uid, uid); inodirty(); printactive(0); return rval; } CMDFUNCSTART(chgroup) { int rval = 1; unsigned long gid; char *cp; struct group *grp; if (!checkactive()) return 1; gid = strtoul(argv[1], &cp, 0); if (cp == argv[1] || *cp != '\0' ) { if ((grp = getgrnam(argv[1]))) { gid = grp->gr_gid; } else { warnx("bad gid `%s'", argv[1]); return 1; } } DIP_SET(curinode, di_gid, gid); inodirty(); printactive(0); return rval; } int dotime(char *name, time_t *secp, int32_t *nsecp) { char *p, *val; struct tm t; int32_t nsec; p = strchr(name, '.'); if (p) { *p = '\0'; nsec = strtoul(++p, &val, 0); if (val == p || *val != '\0' || nsec >= 1000000000 || nsec < 0) { warnx("invalid nanoseconds"); goto badformat; } } else nsec = 0; if (strlen(name) != 14) { badformat: warnx("date format: YYYYMMDDHHMMSS[.nsec]"); return 1; } *nsecp = nsec; for (p = name; *p; p++) if (*p < '0' || *p > '9') goto badformat; p = name; #define VAL() ((*p++) - '0') t.tm_year = VAL(); t.tm_year = VAL() + t.tm_year * 10; t.tm_year = VAL() + t.tm_year * 10; t.tm_year = VAL() + t.tm_year * 10 - 1900; t.tm_mon = VAL(); t.tm_mon = VAL() + t.tm_mon * 10 - 1; t.tm_mday = VAL(); t.tm_mday = VAL() + t.tm_mday * 10; t.tm_hour = VAL(); t.tm_hour = VAL() + t.tm_hour * 10; t.tm_min = VAL(); t.tm_min = VAL() + t.tm_min * 10; t.tm_sec = VAL(); t.tm_sec = VAL() + t.tm_sec * 10; t.tm_isdst = -1; *secp = mktime(&t); if (*secp == -1) { warnx("date/time out of range"); return 1; } return 0; } CMDFUNCSTART(chbtime) { time_t secs; int32_t nsecs; if (dotime(argv[1], &secs, &nsecs)) return 1; if (sblock.fs_magic == FS_UFS1_MAGIC) return 1; curinode->dp2.di_birthtime = _time_to_time64(secs); curinode->dp2.di_birthnsec = nsecs; inodirty(); printactive(0); return 0; } CMDFUNCSTART(chmtime) { time_t secs; int32_t nsecs; if (dotime(argv[1], &secs, &nsecs)) return 1; if (sblock.fs_magic == FS_UFS1_MAGIC) curinode->dp1.di_mtime = _time_to_time32(secs); else curinode->dp2.di_mtime = _time_to_time64(secs); DIP_SET(curinode, di_mtimensec, nsecs); inodirty(); printactive(0); return 0; } CMDFUNCSTART(chatime) { time_t secs; int32_t nsecs; if (dotime(argv[1], &secs, &nsecs)) return 1; if (sblock.fs_magic == FS_UFS1_MAGIC) curinode->dp1.di_atime = _time_to_time32(secs); else curinode->dp2.di_atime = _time_to_time64(secs); DIP_SET(curinode, di_atimensec, nsecs); inodirty(); printactive(0); return 0; } CMDFUNCSTART(chctime) { time_t secs; int32_t nsecs; if (dotime(argv[1], &secs, &nsecs)) return 1; if (sblock.fs_magic == FS_UFS1_MAGIC) curinode->dp1.di_ctime = _time_to_time32(secs); else curinode->dp2.di_ctime = _time_to_time64(secs); DIP_SET(curinode, di_ctimensec, nsecs); inodirty(); printactive(0); return 0; } Index: user/alc/PQ_LAUNDRY/sbin/ipfw/tables.c =================================================================== --- user/alc/PQ_LAUNDRY/sbin/ipfw/tables.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sbin/ipfw/tables.c (revision 303642) @@ -1,1995 +1,2007 @@ /* * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * in-kernel ipfw tables support. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ipfw2.h" static void table_modify_record(ipfw_obj_header *oh, int ac, char *av[], int add, int quiet, int update, int atomic); static int table_flush(ipfw_obj_header *oh); static int table_destroy(ipfw_obj_header *oh); static int table_do_create(ipfw_obj_header *oh, ipfw_xtable_info *i); static int table_do_modify(ipfw_obj_header *oh, ipfw_xtable_info *i); static int table_do_swap(ipfw_obj_header *oh, char *second); static void table_create(ipfw_obj_header *oh, int ac, char *av[]); static void table_modify(ipfw_obj_header *oh, int ac, char *av[]); static void table_lookup(ipfw_obj_header *oh, int ac, char *av[]); static void table_lock(ipfw_obj_header *oh, int lock); static int table_swap(ipfw_obj_header *oh, char *second); static int table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i); static int table_show_info(ipfw_xtable_info *i, void *arg); static void table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint32_t set, uint16_t uidx); static int table_flush_one(ipfw_xtable_info *i, void *arg); static int table_show_one(ipfw_xtable_info *i, void *arg); static int table_do_get_list(ipfw_xtable_info *i, ipfw_obj_header **poh); static void table_show_list(ipfw_obj_header *oh, int need_header); static void table_show_entry(ipfw_xtable_info *i, ipfw_obj_tentry *tent); static void tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *key, int add, uint8_t *ptype, uint32_t *pvmask, ipfw_xtable_info *xi); static void tentry_fill_value(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *arg, uint8_t type, uint32_t vmask); static void table_show_value(char *buf, size_t bufsize, ipfw_table_value *v, uint32_t vmask, int print_ip); typedef int (table_cb_t)(ipfw_xtable_info *i, void *arg); static int tables_foreach(table_cb_t *f, void *arg, int sort); #ifndef s6_addr32 #define s6_addr32 __u6_addr.__u6_addr32 #endif static struct _s_x tabletypes[] = { { "addr", IPFW_TABLE_ADDR }, { "iface", IPFW_TABLE_INTERFACE }, { "number", IPFW_TABLE_NUMBER }, { "flow", IPFW_TABLE_FLOW }, { NULL, 0 } }; static struct _s_x tablevaltypes[] = { { "skipto", IPFW_VTYPE_SKIPTO }, { "pipe", IPFW_VTYPE_PIPE }, { "fib", IPFW_VTYPE_FIB }, { "nat", IPFW_VTYPE_NAT }, { "dscp", IPFW_VTYPE_DSCP }, { "tag", IPFW_VTYPE_TAG }, { "divert", IPFW_VTYPE_DIVERT }, { "netgraph", IPFW_VTYPE_NETGRAPH }, { "limit", IPFW_VTYPE_LIMIT }, { "ipv4", IPFW_VTYPE_NH4 }, { "ipv6", IPFW_VTYPE_NH6 }, { NULL, 0 } }; static struct _s_x tablecmds[] = { { "add", TOK_ADD }, { "delete", TOK_DEL }, { "create", TOK_CREATE }, { "destroy", TOK_DESTROY }, { "flush", TOK_FLUSH }, { "modify", TOK_MODIFY }, { "swap", TOK_SWAP }, { "info", TOK_INFO }, { "detail", TOK_DETAIL }, { "list", TOK_LIST }, { "lookup", TOK_LOOKUP }, { "atomic", TOK_ATOMIC }, { "lock", TOK_LOCK }, { "unlock", TOK_UNLOCK }, { NULL, 0 } }; static int lookup_host (char *host, struct in_addr *ipaddr) { struct hostent *he; if (!inet_aton(host, ipaddr)) { if ((he = gethostbyname(host)) == NULL) return(-1); *ipaddr = *(struct in_addr *)he->h_addr_list[0]; } return(0); } /* * This one handles all table-related commands * ipfw table NAME create ... * ipfw table NAME modify ... * ipfw table NAME destroy * ipfw table NAME swap NAME * ipfw table NAME lock * ipfw table NAME unlock * ipfw table NAME add addr[/masklen] [value] * ipfw table NAME add [addr[/masklen] value] [addr[/masklen] value] .. * ipfw table NAME delete addr[/masklen] [addr[/masklen]] .. * ipfw table NAME lookup addr * ipfw table {NAME | all} flush * ipfw table {NAME | all} list * ipfw table {NAME | all} info * ipfw table {NAME | all} detail */ void ipfw_table_handler(int ac, char *av[]) { int do_add, is_all; int atomic, error, tcmd; ipfw_xtable_info i; ipfw_obj_header oh; char *tablename; uint32_t set; void *arg; memset(&oh, 0, sizeof(oh)); is_all = 0; if (co.use_set != 0) set = co.use_set - 1; else set = 0; ac--; av++; NEED1("table needs name"); tablename = *av; if (table_check_name(tablename) == 0) { table_fill_ntlv(&oh.ntlv, *av, set, 1); oh.idx = 1; } else { if (strcmp(tablename, "all") == 0) is_all = 1; else errx(EX_USAGE, "table name %s is invalid", tablename); } ac--; av++; NEED1("table needs command"); tcmd = get_token(tablecmds, *av, "table command"); /* Check if atomic operation was requested */ atomic = 0; if (tcmd == TOK_ATOMIC) { ac--; av++; NEED1("atomic needs command"); tcmd = get_token(tablecmds, *av, "table command"); switch (tcmd) { case TOK_ADD: break; default: errx(EX_USAGE, "atomic is not compatible with %s", *av); } atomic = 1; } switch (tcmd) { case TOK_LIST: case TOK_INFO: case TOK_DETAIL: case TOK_FLUSH: break; default: if (is_all != 0) errx(EX_USAGE, "table name required"); } switch (tcmd) { case TOK_ADD: case TOK_DEL: do_add = **av == 'a'; ac--; av++; table_modify_record(&oh, ac, av, do_add, co.do_quiet, co.do_quiet, atomic); break; case TOK_CREATE: ac--; av++; table_create(&oh, ac, av); break; case TOK_MODIFY: ac--; av++; table_modify(&oh, ac, av); break; case TOK_DESTROY: - if (table_destroy(&oh) != 0) + if (table_destroy(&oh) == 0) + break; + if (errno != ESRCH) err(EX_OSERR, "failed to destroy table %s", tablename); + /* ESRCH isn't fatal, warn if not quiet mode */ + if (co.do_quiet == 0) + warn("failed to destroy table %s", tablename); break; case TOK_FLUSH: if (is_all == 0) { - if ((error = table_flush(&oh)) != 0) + if ((error = table_flush(&oh)) == 0) + break; + if (errno != ESRCH) err(EX_OSERR, "failed to flush table %s info", tablename); + /* ESRCH isn't fatal, warn if not quiet mode */ + if (co.do_quiet == 0) + warn("failed to flush table %s info", + tablename); } else { error = tables_foreach(table_flush_one, &oh, 1); if (error != 0) err(EX_OSERR, "failed to flush tables list"); + /* XXX: we ignore errors here */ } break; case TOK_SWAP: ac--; av++; NEED1("second table name required"); table_swap(&oh, *av); break; case TOK_LOCK: case TOK_UNLOCK: table_lock(&oh, (tcmd == TOK_LOCK)); break; case TOK_DETAIL: case TOK_INFO: arg = (tcmd == TOK_DETAIL) ? (void *)1 : NULL; if (is_all == 0) { if ((error = table_get_info(&oh, &i)) != 0) err(EX_OSERR, "failed to request table info"); table_show_info(&i, arg); } else { error = tables_foreach(table_show_info, arg, 1); if (error != 0) err(EX_OSERR, "failed to request tables list"); } break; case TOK_LIST: if (is_all == 0) { ipfw_xtable_info i; if ((error = table_get_info(&oh, &i)) != 0) err(EX_OSERR, "failed to request table info"); table_show_one(&i, NULL); } else { error = tables_foreach(table_show_one, NULL, 1); if (error != 0) err(EX_OSERR, "failed to request tables list"); } break; case TOK_LOOKUP: ac--; av++; table_lookup(&oh, ac, av); break; } } static void table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint32_t set, uint16_t uidx) { ntlv->head.type = IPFW_TLV_TBL_NAME; ntlv->head.length = sizeof(ipfw_obj_ntlv); ntlv->idx = uidx; ntlv->set = set; strlcpy(ntlv->name, name, sizeof(ntlv->name)); } static void table_fill_objheader(ipfw_obj_header *oh, ipfw_xtable_info *i) { oh->idx = 1; table_fill_ntlv(&oh->ntlv, i->tablename, i->set, 1); } static struct _s_x tablenewcmds[] = { { "type", TOK_TYPE }, { "valtype", TOK_VALTYPE }, { "algo", TOK_ALGO }, { "limit", TOK_LIMIT }, { "locked", TOK_LOCK }, { NULL, 0 } }; static struct _s_x flowtypecmds[] = { { "src-ip", IPFW_TFFLAG_SRCIP }, { "proto", IPFW_TFFLAG_PROTO }, { "src-port", IPFW_TFFLAG_SRCPORT }, { "dst-ip", IPFW_TFFLAG_DSTIP }, { "dst-port", IPFW_TFFLAG_DSTPORT }, { NULL, 0 } }; int table_parse_type(uint8_t ttype, char *p, uint8_t *tflags) { uint32_t fset, fclear; char *e; /* Parse type options */ switch(ttype) { case IPFW_TABLE_FLOW: fset = fclear = 0; if (fill_flags(flowtypecmds, p, &e, &fset, &fclear) != 0) errx(EX_USAGE, "unable to parse flow option %s", e); *tflags = fset; break; default: return (EX_USAGE); } return (0); } void table_print_type(char *tbuf, size_t size, uint8_t type, uint8_t tflags) { const char *tname; int l; if ((tname = match_value(tabletypes, type)) == NULL) tname = "unknown"; l = snprintf(tbuf, size, "%s", tname); tbuf += l; size -= l; switch(type) { case IPFW_TABLE_FLOW: if (tflags != 0) { *tbuf++ = ':'; l--; print_flags_buffer(tbuf, size, flowtypecmds, tflags); } break; } } /* * Creates new table * * ipfw table NAME create [ type { addr | iface | number | flow } ] * [ algo algoname ] */ static void table_create(ipfw_obj_header *oh, int ac, char *av[]) { ipfw_xtable_info xi; int error, tcmd, val; uint32_t fset, fclear; char *e, *p; char tbuf[128]; memset(&xi, 0, sizeof(xi)); while (ac > 0) { tcmd = get_token(tablenewcmds, *av, "option"); ac--; av++; switch (tcmd) { case TOK_LIMIT: NEED1("limit value required"); xi.limit = strtol(*av, NULL, 10); ac--; av++; break; case TOK_TYPE: NEED1("table type required"); /* Type may have suboptions after ':' */ if ((p = strchr(*av, ':')) != NULL) *p++ = '\0'; val = match_token(tabletypes, *av); if (val == -1) { concat_tokens(tbuf, sizeof(tbuf), tabletypes, ", "); errx(EX_USAGE, "Unknown tabletype: %s. Supported: %s", *av, tbuf); } xi.type = val; if (p != NULL) { error = table_parse_type(val, p, &xi.tflags); if (error != 0) errx(EX_USAGE, "Unsupported suboptions: %s", p); } ac--; av++; break; case TOK_VALTYPE: NEED1("table value type required"); fset = fclear = 0; val = fill_flags(tablevaltypes, *av, &e, &fset, &fclear); if (val != -1) { xi.vmask = fset; ac--; av++; break; } concat_tokens(tbuf, sizeof(tbuf), tablevaltypes, ", "); errx(EX_USAGE, "Unknown value type: %s. Supported: %s", e, tbuf); break; case TOK_ALGO: NEED1("table algorithm name required"); if (strlen(*av) > sizeof(xi.algoname)) errx(EX_USAGE, "algorithm name too long"); strlcpy(xi.algoname, *av, sizeof(xi.algoname)); ac--; av++; break; case TOK_LOCK: xi.flags |= IPFW_TGFLAGS_LOCKED; break; } } /* Set some defaults to preserve compatibility. */ if (xi.algoname[0] == '\0' && xi.type == 0) xi.type = IPFW_TABLE_ADDR; if (xi.vmask == 0) xi.vmask = IPFW_VTYPE_LEGACY; if ((error = table_do_create(oh, &xi)) != 0) err(EX_OSERR, "Table creation failed"); } /* * Creates new table * * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success. */ static int table_do_create(ipfw_obj_header *oh, ipfw_xtable_info *i) { char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)]; int error; memcpy(tbuf, oh, sizeof(*oh)); memcpy(tbuf + sizeof(*oh), i, sizeof(*i)); oh = (ipfw_obj_header *)tbuf; error = do_set3(IP_FW_TABLE_XCREATE, &oh->opheader, sizeof(tbuf)); return (error); } /* * Modifies existing table * * ipfw table NAME modify [ limit number ] */ static void table_modify(ipfw_obj_header *oh, int ac, char *av[]) { ipfw_xtable_info xi; int tcmd; memset(&xi, 0, sizeof(xi)); while (ac > 0) { tcmd = get_token(tablenewcmds, *av, "option"); ac--; av++; switch (tcmd) { case TOK_LIMIT: NEED1("limit value required"); xi.limit = strtol(*av, NULL, 10); xi.mflags |= IPFW_TMFLAGS_LIMIT; ac--; av++; break; default: errx(EX_USAGE, "cmd is not supported for modificatiob"); } } if (table_do_modify(oh, &xi) != 0) err(EX_OSERR, "Table modification failed"); } /* * Modifies existing table. * * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success. */ static int table_do_modify(ipfw_obj_header *oh, ipfw_xtable_info *i) { char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)]; int error; memcpy(tbuf, oh, sizeof(*oh)); memcpy(tbuf + sizeof(*oh), i, sizeof(*i)); oh = (ipfw_obj_header *)tbuf; error = do_set3(IP_FW_TABLE_XMODIFY, &oh->opheader, sizeof(tbuf)); return (error); } /* * Locks or unlocks given table */ static void table_lock(ipfw_obj_header *oh, int lock) { ipfw_xtable_info xi; memset(&xi, 0, sizeof(xi)); xi.mflags |= IPFW_TMFLAGS_LOCK; xi.flags |= (lock != 0) ? IPFW_TGFLAGS_LOCKED : 0; if (table_do_modify(oh, &xi) != 0) err(EX_OSERR, "Table %s failed", lock != 0 ? "lock" : "unlock"); } /* * Destroys given table specified by @oh->ntlv. * Returns 0 on success. */ static int table_destroy(ipfw_obj_header *oh) { if (do_set3(IP_FW_TABLE_XDESTROY, &oh->opheader, sizeof(*oh)) != 0) return (-1); return (0); } /* * Flushes given table specified by @oh->ntlv. * Returns 0 on success. */ static int table_flush(ipfw_obj_header *oh) { if (do_set3(IP_FW_TABLE_XFLUSH, &oh->opheader, sizeof(*oh)) != 0) return (-1); return (0); } static int table_do_swap(ipfw_obj_header *oh, char *second) { char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ntlv)]; int error; memset(tbuf, 0, sizeof(tbuf)); memcpy(tbuf, oh, sizeof(*oh)); oh = (ipfw_obj_header *)tbuf; table_fill_ntlv((ipfw_obj_ntlv *)(oh + 1), second, oh->ntlv.set, 1); error = do_set3(IP_FW_TABLE_XSWAP, &oh->opheader, sizeof(tbuf)); return (error); } /* * Swaps given table with @second one. */ static int table_swap(ipfw_obj_header *oh, char *second) { - int error; if (table_check_name(second) != 0) errx(EX_USAGE, "table name %s is invalid", second); - error = table_do_swap(oh, second); + if (table_do_swap(oh, second) == 0) + return (0); - switch (error) { + switch (errno) { case EINVAL: errx(EX_USAGE, "Unable to swap table: check types"); case EFBIG: errx(EX_USAGE, "Unable to swap table: check limits"); } return (0); } /* * Retrieves table in given table specified by @oh->ntlv. * it inside @i. * Returns 0 on success. */ static int table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i) { char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)]; size_t sz; sz = sizeof(tbuf); memset(tbuf, 0, sizeof(tbuf)); memcpy(tbuf, oh, sizeof(*oh)); oh = (ipfw_obj_header *)tbuf; if (do_get3(IP_FW_TABLE_XINFO, &oh->opheader, &sz) != 0) return (errno); if (sz < sizeof(tbuf)) return (EINVAL); *i = *(ipfw_xtable_info *)(oh + 1); return (0); } static struct _s_x tablealgoclass[] = { { "hash", IPFW_TACLASS_HASH }, { "array", IPFW_TACLASS_ARRAY }, { "radix", IPFW_TACLASS_RADIX }, { NULL, 0 } }; struct ta_cldata { uint8_t taclass; uint8_t spare4; uint16_t itemsize; uint16_t itemsize6; uint32_t size; uint32_t count; }; /* * Print global/per-AF table @i algorithm info. */ static void table_show_tainfo(ipfw_xtable_info *i, struct ta_cldata *d, const char *af, const char *taclass) { switch (d->taclass) { case IPFW_TACLASS_HASH: case IPFW_TACLASS_ARRAY: printf(" %salgorithm %s info\n", af, taclass); if (d->itemsize == d->itemsize6) printf(" size: %u items: %u itemsize: %u\n", d->size, d->count, d->itemsize); else printf(" size: %u items: %u " "itemsize4: %u itemsize6: %u\n", d->size, d->count, d->itemsize, d->itemsize6); break; case IPFW_TACLASS_RADIX: printf(" %salgorithm %s info\n", af, taclass); if (d->itemsize == d->itemsize6) printf(" items: %u itemsize: %u\n", d->count, d->itemsize); else printf(" items: %u " "itemsize4: %u itemsize6: %u\n", d->count, d->itemsize, d->itemsize6); break; default: printf(" algo class: %s\n", taclass); } } static void table_print_valheader(char *buf, size_t bufsize, uint32_t vmask) { if (vmask == IPFW_VTYPE_LEGACY) { snprintf(buf, bufsize, "legacy"); return; } memset(buf, 0, bufsize); print_flags_buffer(buf, bufsize, tablevaltypes, vmask); } /* * Prints table info struct @i in human-readable form. */ static int table_show_info(ipfw_xtable_info *i, void *arg) { const char *vtype; ipfw_ta_tinfo *tainfo; int afdata, afitem; struct ta_cldata d; char ttype[64], tvtype[64]; table_print_type(ttype, sizeof(ttype), i->type, i->tflags); table_print_valheader(tvtype, sizeof(tvtype), i->vmask); printf("--- table(%s), set(%u) ---\n", i->tablename, i->set); if ((i->flags & IPFW_TGFLAGS_LOCKED) != 0) printf(" kindex: %d, type: %s, locked\n", i->kidx, ttype); else printf(" kindex: %d, type: %s\n", i->kidx, ttype); printf(" references: %u, valtype: %s\n", i->refcnt, tvtype); printf(" algorithm: %s\n", i->algoname); printf(" items: %u, size: %u\n", i->count, i->size); if (i->limit > 0) printf(" limit: %u\n", i->limit); /* Print algo-specific info if requested & set */ if (arg == NULL) return (0); if ((i->ta_info.flags & IPFW_TATFLAGS_DATA) == 0) return (0); tainfo = &i->ta_info; afdata = 0; afitem = 0; if (tainfo->flags & IPFW_TATFLAGS_AFDATA) afdata = 1; if (tainfo->flags & IPFW_TATFLAGS_AFITEM) afitem = 1; memset(&d, 0, sizeof(d)); d.taclass = tainfo->taclass4; d.size = tainfo->size4; d.count = tainfo->count4; d.itemsize = tainfo->itemsize4; if (afdata == 0 && afitem != 0) d.itemsize6 = tainfo->itemsize6; else d.itemsize6 = d.itemsize; if ((vtype = match_value(tablealgoclass, d.taclass)) == NULL) vtype = "unknown"; if (afdata == 0) { table_show_tainfo(i, &d, "", vtype); } else { table_show_tainfo(i, &d, "IPv4 ", vtype); memset(&d, 0, sizeof(d)); d.taclass = tainfo->taclass6; if ((vtype = match_value(tablealgoclass, d.taclass)) == NULL) vtype = "unknown"; d.size = tainfo->size6; d.count = tainfo->count6; d.itemsize = tainfo->itemsize6; d.itemsize6 = d.itemsize; table_show_tainfo(i, &d, "IPv6 ", vtype); } return (0); } /* * Function wrappers which can be used either * as is or as foreach function parameter. */ static int table_show_one(ipfw_xtable_info *i, void *arg) { ipfw_obj_header *oh; int error; if ((error = table_do_get_list(i, &oh)) != 0) { err(EX_OSERR, "Error requesting table %s list", i->tablename); return (error); } table_show_list(oh, 1); free(oh); return (0); } static int table_flush_one(ipfw_xtable_info *i, void *arg) { ipfw_obj_header *oh; oh = (ipfw_obj_header *)arg; table_fill_ntlv(&oh->ntlv, i->tablename, i->set, 1); return (table_flush(oh)); } static int table_do_modify_record(int cmd, ipfw_obj_header *oh, ipfw_obj_tentry *tent, int count, int atomic) { ipfw_obj_ctlv *ctlv; ipfw_obj_tentry *tent_base; caddr_t pbuf; char xbuf[sizeof(*oh) + sizeof(ipfw_obj_ctlv) + sizeof(*tent)]; int error, i; size_t sz; sz = sizeof(*ctlv) + sizeof(*tent) * count; if (count == 1) { memset(xbuf, 0, sizeof(xbuf)); pbuf = xbuf; } else { if ((pbuf = calloc(1, sizeof(*oh) + sz)) == NULL) return (ENOMEM); } memcpy(pbuf, oh, sizeof(*oh)); oh = (ipfw_obj_header *)pbuf; oh->opheader.version = 1; ctlv = (ipfw_obj_ctlv *)(oh + 1); ctlv->count = count; ctlv->head.length = sz; if (atomic != 0) ctlv->flags |= IPFW_CTF_ATOMIC; tent_base = tent; memcpy(ctlv + 1, tent, sizeof(*tent) * count); tent = (ipfw_obj_tentry *)(ctlv + 1); for (i = 0; i < count; i++, tent++) { tent->head.length = sizeof(ipfw_obj_tentry); tent->idx = oh->idx; } sz += sizeof(*oh); error = do_get3(cmd, &oh->opheader, &sz); tent = (ipfw_obj_tentry *)(ctlv + 1); /* Copy result back to provided buffer */ memcpy(tent_base, ctlv + 1, sizeof(*tent) * count); if (pbuf != xbuf) free(pbuf); return (error); } static void table_modify_record(ipfw_obj_header *oh, int ac, char *av[], int add, int quiet, int update, int atomic) { ipfw_obj_tentry *ptent, tent, *tent_buf; ipfw_xtable_info xi; uint8_t type; uint32_t vmask; int cmd, count, error, i, ignored; char *texterr, *etxt, *px; if (ac == 0) errx(EX_USAGE, "address required"); if (add != 0) { cmd = IP_FW_TABLE_XADD; texterr = "Adding record failed"; } else { cmd = IP_FW_TABLE_XDEL; texterr = "Deleting record failed"; } /* * Calculate number of entries: * Assume [key val] x N for add * and * key x N for delete */ count = (add != 0) ? ac / 2 + 1 : ac; if (count <= 1) { /* Adding single entry with/without value */ memset(&tent, 0, sizeof(tent)); tent_buf = &tent; } else { if ((tent_buf = calloc(count, sizeof(tent))) == NULL) errx(EX_OSERR, "Unable to allocate memory for all entries"); } ptent = tent_buf; memset(&xi, 0, sizeof(xi)); count = 0; while (ac > 0) { tentry_fill_key(oh, ptent, *av, add, &type, &vmask, &xi); /* * Compatibility layer: auto-create table if not exists. */ if (xi.tablename[0] == '\0') { xi.type = type; xi.vmask = vmask; strlcpy(xi.tablename, oh->ntlv.name, sizeof(xi.tablename)); if (quiet == 0) warnx("DEPRECATED: inserting data into " "non-existent table %s. (auto-created)", xi.tablename); table_do_create(oh, &xi); } oh->ntlv.type = type; ac--; av++; if (add != 0 && ac > 0) { tentry_fill_value(oh, ptent, *av, type, vmask); ac--; av++; } if (update != 0) ptent->head.flags |= IPFW_TF_UPDATE; count++; ptent++; } error = table_do_modify_record(cmd, oh, tent_buf, count, atomic); /* * Compatibility stuff: do not yell on duplicate keys or * failed deletions. */ if (error == 0 || (error == EEXIST && add != 0) || (error == ENOENT && add == 0)) { if (quiet != 0) { if (tent_buf != &tent) free(tent_buf); return; } } /* Report results back */ ptent = tent_buf; for (i = 0; i < count; ptent++, i++) { ignored = 0; switch (ptent->result) { case IPFW_TR_ADDED: px = "added"; break; case IPFW_TR_DELETED: px = "deleted"; break; case IPFW_TR_UPDATED: px = "updated"; break; case IPFW_TR_LIMIT: px = "limit"; ignored = 1; break; case IPFW_TR_ERROR: px = "error"; ignored = 1; break; case IPFW_TR_NOTFOUND: px = "notfound"; ignored = 1; break; case IPFW_TR_EXISTS: px = "exists"; ignored = 1; break; case IPFW_TR_IGNORED: px = "ignored"; ignored = 1; break; default: px = "unknown"; ignored = 1; } if (error != 0 && atomic != 0 && ignored == 0) printf("%s(reverted): ", px); else printf("%s: ", px); table_show_entry(&xi, ptent); } if (tent_buf != &tent) free(tent_buf); if (error == 0) return; /* Get real OS error */ error = errno; /* Try to provide more human-readable error */ switch (error) { case EEXIST: etxt = "record already exists"; break; case EFBIG: etxt = "limit hit"; break; case ESRCH: etxt = "table not found"; break; case ENOENT: etxt = "record not found"; break; case EACCES: etxt = "table is locked"; break; default: etxt = strerror(error); } errx(EX_OSERR, "%s: %s", texterr, etxt); } static int table_do_lookup(ipfw_obj_header *oh, char *key, ipfw_xtable_info *xi, ipfw_obj_tentry *xtent) { char xbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_obj_tentry)]; ipfw_obj_tentry *tent; uint8_t type; uint32_t vmask; size_t sz; memcpy(xbuf, oh, sizeof(*oh)); oh = (ipfw_obj_header *)xbuf; tent = (ipfw_obj_tentry *)(oh + 1); memset(tent, 0, sizeof(*tent)); tent->head.length = sizeof(*tent); tent->idx = 1; tentry_fill_key(oh, tent, key, 0, &type, &vmask, xi); oh->ntlv.type = type; sz = sizeof(xbuf); if (do_get3(IP_FW_TABLE_XFIND, &oh->opheader, &sz) != 0) return (errno); if (sz < sizeof(xbuf)) return (EINVAL); *xtent = *tent; return (0); } static void table_lookup(ipfw_obj_header *oh, int ac, char *av[]) { ipfw_obj_tentry xtent; ipfw_xtable_info xi; char key[64]; int error; if (ac == 0) errx(EX_USAGE, "address required"); strlcpy(key, *av, sizeof(key)); memset(&xi, 0, sizeof(xi)); error = table_do_lookup(oh, key, &xi, &xtent); switch (error) { case 0: break; case ESRCH: errx(EX_UNAVAILABLE, "Table %s not found", oh->ntlv.name); case ENOENT: errx(EX_UNAVAILABLE, "Entry %s not found", *av); case ENOTSUP: errx(EX_UNAVAILABLE, "Table %s algo does not support " "\"lookup\" method", oh->ntlv.name); default: err(EX_OSERR, "getsockopt(IP_FW_TABLE_XFIND)"); } table_show_entry(&xi, &xtent); } static void tentry_fill_key_type(char *arg, ipfw_obj_tentry *tentry, uint8_t type, uint8_t tflags) { char *p, *pp; int mask, af; struct in6_addr *paddr, tmp; struct tflow_entry *tfe; uint32_t key, *pkey; uint16_t port; struct protoent *pent; struct servent *sent; int masklen; masklen = 0; af = 0; paddr = (struct in6_addr *)&tentry->k; switch (type) { case IPFW_TABLE_ADDR: /* Remove / if exists */ if ((p = strchr(arg, '/')) != NULL) { *p = '\0'; mask = atoi(p + 1); } if (inet_pton(AF_INET, arg, paddr) == 1) { if (p != NULL && mask > 32) errx(EX_DATAERR, "bad IPv4 mask width: %s", p + 1); masklen = p ? mask : 32; af = AF_INET; } else if (inet_pton(AF_INET6, arg, paddr) == 1) { if (IN6_IS_ADDR_V4COMPAT(paddr)) errx(EX_DATAERR, "Use IPv4 instead of v4-compatible"); if (p != NULL && mask > 128) errx(EX_DATAERR, "bad IPv6 mask width: %s", p + 1); masklen = p ? mask : 128; af = AF_INET6; } else { /* Assume FQDN */ if (lookup_host(arg, (struct in_addr *)paddr) != 0) errx(EX_NOHOST, "hostname ``%s'' unknown", arg); masklen = 32; type = IPFW_TABLE_ADDR; af = AF_INET; } break; case IPFW_TABLE_INTERFACE: /* Assume interface name. Copy significant data only */ mask = MIN(strlen(arg), IF_NAMESIZE - 1); memcpy(paddr, arg, mask); /* Set mask to exact match */ masklen = 8 * IF_NAMESIZE; break; case IPFW_TABLE_NUMBER: /* Port or any other key */ key = strtol(arg, &p, 10); if (*p != '\0') errx(EX_DATAERR, "Invalid number: %s", arg); pkey = (uint32_t *)paddr; *pkey = key; masklen = 32; break; case IPFW_TABLE_FLOW: /* Assume [src-ip][,proto][,src-port][,dst-ip][,dst-port] */ tfe = &tentry->k.flow; af = 0; /* Handle */ if ((tflags & IPFW_TFFLAG_SRCIP) != 0) { if ((p = strchr(arg, ',')) != NULL) *p++ = '\0'; /* Determine family using temporary storage */ if (inet_pton(AF_INET, arg, &tmp) == 1) { if (af != 0 && af != AF_INET) errx(EX_DATAERR, "Inconsistent address family\n"); af = AF_INET; memcpy(&tfe->a.a4.sip, &tmp, 4); } else if (inet_pton(AF_INET6, arg, &tmp) == 1) { if (af != 0 && af != AF_INET6) errx(EX_DATAERR, "Inconsistent address family\n"); af = AF_INET6; memcpy(&tfe->a.a6.sip6, &tmp, 16); } arg = p; } /* Handle */ if ((tflags & IPFW_TFFLAG_PROTO) != 0) { if (arg == NULL) errx(EX_DATAERR, "invalid key: proto missing"); if ((p = strchr(arg, ',')) != NULL) *p++ = '\0'; key = strtol(arg, &pp, 10); if (*pp != '\0') { if ((pent = getprotobyname(arg)) == NULL) errx(EX_DATAERR, "Unknown proto: %s", arg); else key = pent->p_proto; } if (key > 255) errx(EX_DATAERR, "Bad protocol number: %u",key); tfe->proto = key; arg = p; } /* Handle */ if ((tflags & IPFW_TFFLAG_SRCPORT) != 0) { if (arg == NULL) errx(EX_DATAERR, "invalid key: src port missing"); if ((p = strchr(arg, ',')) != NULL) *p++ = '\0'; if ((port = htons(strtol(arg, NULL, 10))) == 0) { if ((sent = getservbyname(arg, NULL)) == NULL) errx(EX_DATAERR, "Unknown service: %s", arg); else key = sent->s_port; } tfe->sport = port; arg = p; } /* Handle */ if ((tflags & IPFW_TFFLAG_DSTIP) != 0) { if (arg == NULL) errx(EX_DATAERR, "invalid key: dst ip missing"); if ((p = strchr(arg, ',')) != NULL) *p++ = '\0'; /* Determine family using temporary storage */ if (inet_pton(AF_INET, arg, &tmp) == 1) { if (af != 0 && af != AF_INET) errx(EX_DATAERR, "Inconsistent address family"); af = AF_INET; memcpy(&tfe->a.a4.dip, &tmp, 4); } else if (inet_pton(AF_INET6, arg, &tmp) == 1) { if (af != 0 && af != AF_INET6) errx(EX_DATAERR, "Inconsistent address family"); af = AF_INET6; memcpy(&tfe->a.a6.dip6, &tmp, 16); } arg = p; } /* Handle */ if ((tflags & IPFW_TFFLAG_DSTPORT) != 0) { if (arg == NULL) errx(EX_DATAERR, "invalid key: dst port missing"); if ((p = strchr(arg, ',')) != NULL) *p++ = '\0'; if ((port = htons(strtol(arg, NULL, 10))) == 0) { if ((sent = getservbyname(arg, NULL)) == NULL) errx(EX_DATAERR, "Unknown service: %s", arg); else key = sent->s_port; } tfe->dport = port; arg = p; } tfe->af = af; break; default: errx(EX_DATAERR, "Unsupported table type: %d", type); } tentry->subtype = af; tentry->masklen = masklen; } /* * Tries to guess table key type. * This procedure is used in legacy table auto-create * code AND in `ipfw -n` ruleset checking. * * Imported from old table_fill_xentry() parse code. */ static int guess_key_type(char *key, uint8_t *ptype) { char *p; struct in6_addr addr; uint32_t kv; if (ishexnumber(*key) != 0 || *key == ':') { /* Remove / if exists */ if ((p = strchr(key, '/')) != NULL) *p = '\0'; if ((inet_pton(AF_INET, key, &addr) == 1) || (inet_pton(AF_INET6, key, &addr) == 1)) { *ptype = IPFW_TABLE_CIDR; if (p != NULL) *p = '/'; return (0); } else { /* Port or any other key */ /* Skip non-base 10 entries like 'fa1' */ kv = strtol(key, &p, 10); if (*p == '\0') { *ptype = IPFW_TABLE_NUMBER; return (0); } else if ((p != key) && (*p == '.')) { /* * Warn on IPv4 address strings * which are "valid" for inet_aton() but not * in inet_pton(). * * Typical examples: '10.5' or '10.0.0.05' */ return (1); } } } if (strchr(key, '.') == NULL) { *ptype = IPFW_TABLE_INTERFACE; return (0); } if (lookup_host(key, (struct in_addr *)&addr) != 0) return (1); *ptype = IPFW_TABLE_CIDR; return (0); } static void tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *key, int add, uint8_t *ptype, uint32_t *pvmask, ipfw_xtable_info *xi) { uint8_t type, tflags; uint32_t vmask; int error; type = 0; tflags = 0; vmask = 0; if (xi->tablename[0] == '\0') error = table_get_info(oh, xi); else error = 0; if (error == 0) { if (co.test_only == 0) { /* Table found */ type = xi->type; tflags = xi->tflags; vmask = xi->vmask; } else { /* * We're running `ipfw -n` * Compatibility layer: try to guess key type * before failing. */ if (guess_key_type(key, &type) != 0) { /* Inknown key */ errx(EX_USAGE, "Cannot guess " "key '%s' type", key); } vmask = IPFW_VTYPE_LEGACY; } } else { if (error != ESRCH) errx(EX_OSERR, "Error requesting table %s info", oh->ntlv.name); if (add == 0) errx(EX_DATAERR, "Table %s does not exist", oh->ntlv.name); /* * Table does not exist * Compatibility layer: try to guess key type before failing. */ if (guess_key_type(key, &type) != 0) { /* Inknown key */ errx(EX_USAGE, "Table %s does not exist, cannot guess " "key '%s' type", oh->ntlv.name, key); } vmask = IPFW_VTYPE_LEGACY; } tentry_fill_key_type(key, tent, type, tflags); *ptype = type; *pvmask = vmask; } static void set_legacy_value(uint32_t val, ipfw_table_value *v) { v->tag = val; v->pipe = val; v->divert = val; v->skipto = val; v->netgraph = val; v->fib = val; v->nat = val; v->nh4 = val; v->dscp = (uint8_t)val; v->limit = val; } static void tentry_fill_value(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *arg, uint8_t type, uint32_t vmask) { struct addrinfo hints, *res; uint32_t a4, flag, val; ipfw_table_value *v; uint32_t i; int dval; char *comma, *e, *etype, *n, *p; v = &tent->v.value; /* Compat layer: keep old behavior for legacy value types */ if (vmask == IPFW_VTYPE_LEGACY) { /* Try to interpret as number first */ val = strtoul(arg, &p, 0); if (*p == '\0') { set_legacy_value(val, v); return; } if (inet_pton(AF_INET, arg, &val) == 1) { set_legacy_value(ntohl(val), v); return; } /* Try hostname */ if (lookup_host(arg, (struct in_addr *)&val) == 0) { set_legacy_value(val, v); return; } errx(EX_OSERR, "Unable to parse value %s", arg); } /* * Shorthands: handle single value if vmask consists * of numbers only. e.g.: * vmask = "fib,skipto" -> treat input "1" as "1,1" */ n = arg; etype = NULL; for (i = 1; i < (1 << 31); i *= 2) { if ((flag = (vmask & i)) == 0) continue; vmask &= ~flag; if ((comma = strchr(n, ',')) != NULL) *comma = '\0'; switch (flag) { case IPFW_VTYPE_TAG: v->tag = strtol(n, &e, 10); if (*e != '\0') etype = "tag"; break; case IPFW_VTYPE_PIPE: v->pipe = strtol(n, &e, 10); if (*e != '\0') etype = "pipe"; break; case IPFW_VTYPE_DIVERT: v->divert = strtol(n, &e, 10); if (*e != '\0') etype = "divert"; break; case IPFW_VTYPE_SKIPTO: v->skipto = strtol(n, &e, 10); if (*e != '\0') etype = "skipto"; break; case IPFW_VTYPE_NETGRAPH: v->netgraph = strtol(n, &e, 10); if (*e != '\0') etype = "netgraph"; break; case IPFW_VTYPE_FIB: v->fib = strtol(n, &e, 10); if (*e != '\0') etype = "fib"; break; case IPFW_VTYPE_NAT: v->nat = strtol(n, &e, 10); if (*e != '\0') etype = "nat"; break; case IPFW_VTYPE_LIMIT: v->limit = strtol(n, &e, 10); if (*e != '\0') etype = "limit"; break; case IPFW_VTYPE_NH4: if (strchr(n, '.') != NULL && inet_pton(AF_INET, n, &a4) == 1) { v->nh4 = ntohl(a4); break; } if (lookup_host(n, (struct in_addr *)&v->nh4) == 0) break; etype = "ipv4"; break; case IPFW_VTYPE_DSCP: if (isalpha(*n)) { if ((dval = match_token(f_ipdscp, n)) != -1) { v->dscp = dval; break; } else etype = "DSCP code"; } else { v->dscp = strtol(n, &e, 10); if (v->dscp > 63 || *e != '\0') etype = "DSCP value"; } break; case IPFW_VTYPE_NH6: if (strchr(n, ':') != NULL) { memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET6; hints.ai_flags = AI_NUMERICHOST; if (getaddrinfo(n, NULL, &hints, &res) == 0) { v->nh6 = ((struct sockaddr_in6 *) res->ai_addr)->sin6_addr; v->zoneid = ((struct sockaddr_in6 *) res->ai_addr)->sin6_scope_id; freeaddrinfo(res); break; } } etype = "ipv6"; break; } if (etype != NULL) errx(EX_USAGE, "Unable to parse %s as %s", n, etype); if (comma != NULL) *comma++ = ','; if ((n = comma) != NULL) continue; /* End of input. */ if (vmask != 0) errx(EX_USAGE, "Not enough fields inside value"); } } /* * Compare table names. * Honor number comparison. */ static int tablename_cmp(const void *a, const void *b) { ipfw_xtable_info *ia, *ib; ia = (ipfw_xtable_info *)a; ib = (ipfw_xtable_info *)b; return (stringnum_cmp(ia->tablename, ib->tablename)); } /* * Retrieves table list from kernel, * optionally sorts it and calls requested function for each table. * Returns 0 on success. */ static int tables_foreach(table_cb_t *f, void *arg, int sort) { ipfw_obj_lheader *olh; ipfw_xtable_info *info; size_t sz; int i, error; /* Start with reasonable default */ sz = sizeof(*olh) + 16 * sizeof(ipfw_xtable_info); for (;;) { if ((olh = calloc(1, sz)) == NULL) return (ENOMEM); olh->size = sz; if (do_get3(IP_FW_TABLES_XLIST, &olh->opheader, &sz) != 0) { sz = olh->size; free(olh); if (errno != ENOMEM) return (errno); continue; } if (sort != 0) qsort(olh + 1, olh->count, olh->objsize, tablename_cmp); info = (ipfw_xtable_info *)(olh + 1); for (i = 0; i < olh->count; i++) { error = f(info, arg); /* Ignore errors for now */ info = (ipfw_xtable_info *)((caddr_t)info + olh->objsize); } free(olh); break; } return (0); } /* * Retrieves all entries for given table @i in * eXtended format. Allocate buffer large enough * to store result. Called needs to free it later. * * Returns 0 on success. */ static int table_do_get_list(ipfw_xtable_info *i, ipfw_obj_header **poh) { ipfw_obj_header *oh; size_t sz; int c; sz = 0; oh = NULL; for (c = 0; c < 8; c++) { if (sz < i->size) sz = i->size + 44; if (oh != NULL) free(oh); if ((oh = calloc(1, sz)) == NULL) continue; table_fill_objheader(oh, i); oh->opheader.version = 1; /* Current version */ if (do_get3(IP_FW_TABLE_XLIST, &oh->opheader, &sz) == 0) { *poh = oh; return (0); } if (errno != ENOMEM) break; } free(oh); return (errno); } /* * Shows all entries from @oh in human-readable format */ static void table_show_list(ipfw_obj_header *oh, int need_header) { ipfw_obj_tentry *tent; uint32_t count; ipfw_xtable_info *i; i = (ipfw_xtable_info *)(oh + 1); tent = (ipfw_obj_tentry *)(i + 1); if (need_header) printf("--- table(%s), set(%u) ---\n", i->tablename, i->set); count = i->count; while (count > 0) { table_show_entry(i, tent); tent = (ipfw_obj_tentry *)((caddr_t)tent + tent->head.length); count--; } } static void table_show_value(char *buf, size_t bufsize, ipfw_table_value *v, uint32_t vmask, int print_ip) { char abuf[INET6_ADDRSTRLEN + IF_NAMESIZE + 2]; struct sockaddr_in6 sa6; uint32_t flag, i, l; size_t sz; struct in_addr a4; sz = bufsize; /* * Some shorthands for printing values: * legacy assumes all values are equal, so keep the first one. */ if (vmask == IPFW_VTYPE_LEGACY) { if (print_ip != 0) { flag = htonl(v->tag); inet_ntop(AF_INET, &flag, buf, sz); } else snprintf(buf, sz, "%u", v->tag); return; } for (i = 1; i < (1 << 31); i *= 2) { if ((flag = (vmask & i)) == 0) continue; l = 0; switch (flag) { case IPFW_VTYPE_TAG: l = snprintf(buf, sz, "%u,", v->tag); break; case IPFW_VTYPE_PIPE: l = snprintf(buf, sz, "%u,", v->pipe); break; case IPFW_VTYPE_DIVERT: l = snprintf(buf, sz, "%d,", v->divert); break; case IPFW_VTYPE_SKIPTO: l = snprintf(buf, sz, "%d,", v->skipto); break; case IPFW_VTYPE_NETGRAPH: l = snprintf(buf, sz, "%u,", v->netgraph); break; case IPFW_VTYPE_FIB: l = snprintf(buf, sz, "%u,", v->fib); break; case IPFW_VTYPE_NAT: l = snprintf(buf, sz, "%u,", v->nat); break; case IPFW_VTYPE_LIMIT: l = snprintf(buf, sz, "%u,", v->limit); break; case IPFW_VTYPE_NH4: a4.s_addr = htonl(v->nh4); inet_ntop(AF_INET, &a4, abuf, sizeof(abuf)); l = snprintf(buf, sz, "%s,", abuf); break; case IPFW_VTYPE_DSCP: l = snprintf(buf, sz, "%d,", v->dscp); break; case IPFW_VTYPE_NH6: sa6.sin6_family = AF_INET6; sa6.sin6_len = sizeof(sa6); sa6.sin6_addr = v->nh6; sa6.sin6_port = 0; sa6.sin6_scope_id = v->zoneid; if (getnameinfo((const struct sockaddr *)&sa6, sa6.sin6_len, abuf, sizeof(abuf), NULL, 0, NI_NUMERICHOST) == 0) l = snprintf(buf, sz, "%s,", abuf); break; } buf += l; sz -= l; } if (sz != bufsize) *(buf - 1) = '\0'; } static void table_show_entry(ipfw_xtable_info *i, ipfw_obj_tentry *tent) { char *comma, tbuf[128], pval[128]; void *paddr; struct tflow_entry *tfe; table_show_value(pval, sizeof(pval), &tent->v.value, i->vmask, co.do_value_as_ip); switch (i->type) { case IPFW_TABLE_ADDR: /* IPv4 or IPv6 prefixes */ inet_ntop(tent->subtype, &tent->k, tbuf, sizeof(tbuf)); printf("%s/%u %s\n", tbuf, tent->masklen, pval); break; case IPFW_TABLE_INTERFACE: /* Interface names */ printf("%s %s\n", tent->k.iface, pval); break; case IPFW_TABLE_NUMBER: /* numbers */ printf("%u %s\n", tent->k.key, pval); break; case IPFW_TABLE_FLOW: /* flows */ tfe = &tent->k.flow; comma = ""; if ((i->tflags & IPFW_TFFLAG_SRCIP) != 0) { if (tfe->af == AF_INET) paddr = &tfe->a.a4.sip; else paddr = &tfe->a.a6.sip6; inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf)); printf("%s%s", comma, tbuf); comma = ","; } if ((i->tflags & IPFW_TFFLAG_PROTO) != 0) { printf("%s%d", comma, tfe->proto); comma = ","; } if ((i->tflags & IPFW_TFFLAG_SRCPORT) != 0) { printf("%s%d", comma, ntohs(tfe->sport)); comma = ","; } if ((i->tflags & IPFW_TFFLAG_DSTIP) != 0) { if (tfe->af == AF_INET) paddr = &tfe->a.a4.dip; else paddr = &tfe->a.a6.dip6; inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf)); printf("%s%s", comma, tbuf); comma = ","; } if ((i->tflags & IPFW_TFFLAG_DSTPORT) != 0) { printf("%s%d", comma, ntohs(tfe->dport)); comma = ","; } printf(" %s\n", pval); } } static int table_do_get_stdlist(uint16_t opcode, ipfw_obj_lheader **polh) { ipfw_obj_lheader req, *olh; size_t sz; memset(&req, 0, sizeof(req)); sz = sizeof(req); if (do_get3(opcode, &req.opheader, &sz) != 0) if (errno != ENOMEM) return (errno); sz = req.size; if ((olh = calloc(1, sz)) == NULL) return (ENOMEM); olh->size = sz; if (do_get3(opcode, &olh->opheader, &sz) != 0) { free(olh); return (errno); } *polh = olh; return (0); } static int table_do_get_algolist(ipfw_obj_lheader **polh) { return (table_do_get_stdlist(IP_FW_TABLES_ALIST, polh)); } static int table_do_get_vlist(ipfw_obj_lheader **polh) { return (table_do_get_stdlist(IP_FW_TABLE_VLIST, polh)); } void ipfw_list_ta(int ac, char *av[]) { ipfw_obj_lheader *olh; ipfw_ta_info *info; int error, i; const char *atype; error = table_do_get_algolist(&olh); if (error != 0) err(EX_OSERR, "Unable to request algorithm list"); info = (ipfw_ta_info *)(olh + 1); for (i = 0; i < olh->count; i++) { if ((atype = match_value(tabletypes, info->type)) == NULL) atype = "unknown"; printf("--- %s ---\n", info->algoname); printf(" type: %s\n refcount: %u\n", atype, info->refcnt); info = (ipfw_ta_info *)((caddr_t)info + olh->objsize); } free(olh); } /* Copy of current kernel table_value structure */ struct _table_value { uint32_t tag; /* O_TAG/O_TAGGED */ uint32_t pipe; /* O_PIPE/O_QUEUE */ uint16_t divert; /* O_DIVERT/O_TEE */ uint16_t skipto; /* skipto, CALLRET */ uint32_t netgraph; /* O_NETGRAPH/O_NGTEE */ uint32_t fib; /* O_SETFIB */ uint32_t nat; /* O_NAT */ uint32_t nh4; uint8_t dscp; uint8_t spare0; uint16_t spare1; /* -- 32 bytes -- */ struct in6_addr nh6; uint32_t limit; /* O_LIMIT */ uint32_t zoneid; uint64_t refcnt; /* Number of references */ }; int compare_values(const void *_a, const void *_b) { struct _table_value *a, *b; a = (struct _table_value *)_a; b = (struct _table_value *)_b; if (a->spare1 < b->spare1) return (-1); else if (a->spare1 > b->spare1) return (1); return (0); } void ipfw_list_values(int ac, char *av[]) { ipfw_obj_lheader *olh; struct _table_value *v; int error, i; uint32_t vmask; char buf[128]; error = table_do_get_vlist(&olh); if (error != 0) err(EX_OSERR, "Unable to request value list"); vmask = 0x7FFFFFFF; /* Similar to IPFW_VTYPE_LEGACY */ table_print_valheader(buf, sizeof(buf), vmask); printf("HEADER: %s\n", buf); v = (struct _table_value *)(olh + 1); qsort(v, olh->count, olh->objsize, compare_values); for (i = 0; i < olh->count; i++) { table_show_value(buf, sizeof(buf), (ipfw_table_value *)v, vmask, 0); printf("[%u] refs=%lu %s\n", v->spare1, (u_long)v->refcnt, buf); v = (struct _table_value *)((caddr_t)v + olh->objsize); } free(olh); } int table_check_name(const char *tablename) { if (ipfw_check_object_name(tablename) != 0) return (EINVAL); /* Restrict some 'special' names */ if (strcmp(tablename, "all") == 0) return (EINVAL); return (0); } Index: user/alc/PQ_LAUNDRY/sbin/resolvconf/Makefile =================================================================== --- user/alc/PQ_LAUNDRY/sbin/resolvconf/Makefile (revision 303641) +++ user/alc/PQ_LAUNDRY/sbin/resolvconf/Makefile (revision 303642) @@ -1,38 +1,42 @@ # $FreeBSD$ PACKAGE=runtime DIST= ${.CURDIR}/../../contrib/openresolv .PATH: ${DIST} SCRIPTS= resolvconf FILES= libc dnsmasq named pdnsd pdns_recursor unbound FILESDIR= /libexec/resolvconf MAN= resolvconf.conf.5 resolvconf.8 CLEANFILES= ${SCRIPTS} ${FILES} ${MAN} SYSCONFDIR= /etc RCDIR= ${SYSCONFDIR}/rc.d VARDIR= /var/run/resolvconf # We don't assume to restart the services in /sbin. So, though # our service(8) is in /usr/sbin, we can use it, here. -CMD1= \1 onestatus >/dev/null 2>\&1 -CMD2= \1 restart -RESTARTCMD= /usr/sbin/service ${CMD1} \&\& /usr/sbin/service ${CMD2} +CMD1_WITH_ARG= \1 onestatus >/dev/null 2>\&1 +CMD2_WITH_ARG= \1 restart +RESTARTCMD_WITH_ARG= /usr/sbin/service ${CMD1_WITH_ARG} \&\& /usr/sbin/service ${CMD2_WITH_ARG} +CMD1= \\$$1 onestatus >/dev/null 2>\&1 +CMD2= \\$$1 restart +RESTARTCMD= "/usr/sbin/service ${CMD1} \&\& /usr/sbin/service ${CMD2}" .for f in ${SCRIPTS} ${FILES} ${MAN} ${f}: ${f}.in sed -e 's:@PREFIX@::g' \ -e 's:@SYSCONFDIR@:${SYSCONFDIR}:g' \ -e 's:@LIBEXECDIR@:${FILESDIR}:g' \ -e 's:@VARDIR@:${VARDIR}:g' \ - -e 's:@RESTARTCMD \(.*\)@:${RESTARTCMD}:g' \ + -e 's:@RESTARTCMD \(.*\)@:${RESTARTCMD_WITH_ARG}:g' \ + -e 's:@RESTARTCMD@:${RESTARTCMD}:g' \ -e 's:@RCDIR@:${RCDIR}:g' \ -e 's: vpn : ng[0-9]*&:g' \ ${DIST}/$@.in > $@ .endfor .include Index: user/alc/PQ_LAUNDRY/share/man/man4/gpioled.4 =================================================================== --- user/alc/PQ_LAUNDRY/share/man/man4/gpioled.4 (revision 303641) +++ user/alc/PQ_LAUNDRY/share/man/man4/gpioled.4 (revision 303642) @@ -1,156 +1,158 @@ .\" Copyright (c) 2013, Luiz Otavio O Souza .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd May 14, 2014 +.Dd July 30, 2016 .Dt GPIOLED 4 .Os .Sh NAME .Nm gpioled .Nd GPIO LED generic device driver .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device gpio" .Cd "device gpioled" .Ed .Sh DESCRIPTION The .Nm driver provides glue to attach a .Xr led 4 compatible device to a GPIO pin. Each LED in the system has a .Pa name which is used to export a device as .Pa /dev/led/ . The GPIO pin can then be controlled by writing to this device as described in .Xr led 4 . .Pp On a .Xr device.hints 5 based system, like .Li MIPS , these values are configurable for .Nm : .Bl -tag -width ".Va hint.gpioiic.%d.atXXX" .It Va hint.gpioled.%d.at The gpiobus you are attaching to. Normally assigned to gpiobus0. .It Va hint.gpioled.%d.name Arbitrary name of device in .Pa /dev/led/ to create for .Xr led 4 . .It Va hint.gpioled.%d.pins Which pin on the GPIO interface to map to this instance. Please note that this mask should only ever have one bit set (any other bits - i.e., pins - will be ignored). +.It Va hint.gpioled.%d.invert +If set to 1, the pin will be set to 0 to light the LED, and 1 to clear it. .El .Pp On a .Xr FDT 4 based system, like .Li ARM , the DTS part for a .Nm gpioled device usually looks like: .Bd -literal gpio: gpio { gpio-controller; ... led0 { compatible = "gpioled"; gpios = <&gpio 16 2 0>; /* GPIO pin 16. */ name = "ok"; }; led1 { compatible = "gpioled"; gpios = <&gpio 17 2 0>; /* GPIO pin 17. */ name = "user-led1"; }; }; .Ed .Pp Optionally, you can choose to combine all the LEDs under a single .Dq gpio-leds compatible node: .Bd -literal simplebus0 { ... leds { compatible = "gpio-leds"; led0 { gpios = <&gpio 16 2 0>; name = "ok" }; led1 { gpios = <&gpio 17 2 0>; name = "user-led1" }; }; }; .Ed .Pp Both methods are equally supported and it is possible to have the LEDs defined with any sort of mix between the methods. The only restriction is that a GPIO pin cannot be mapped by two different (gpio)leds. .Pp For more details about the .Va gpios property, please consult .Pa /usr/src/sys/boot/fdt/dts/bindings-gpio.txt . .Pp The property .Va name is the arbitrary name of the device in .Pa /dev/led/ to create for .Xr led 4 . .Sh SEE ALSO .Xr fdt 4 , .Xr gpio 4 , .Xr gpioiic 4 , .Xr led 4 .Sh HISTORY The .Nm manual page first appeared in .Fx 10.1 . .Sh AUTHORS This manual page was written by .An Luiz Otavio O Souza . Index: user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4 =================================================================== --- user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4 (nonexistent) +++ user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4 (revision 303642) @@ -0,0 +1,141 @@ +.\" Copyright (c) 2015 Dmitry Vagin +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd October 29, 2015 +.Dt NG_CHECKSUM 4 +.Os +.Sh NAME +.Nm ng_checksum +.Nd IP checksum node type +.Sh SYNOPSIS +.In netgraph/ng_checksum.h +.Sh DESCRIPTION +The +.Nm checksum +node can calculate and prepare for calculation in hardware +IPv4 header, TCP, UDP checksum. +.Sh HOOKS +This node type has two hooks: +.Bl -tag -width ".Va out" +.It Va in +Packets received on this hook are processed according to settings specified +in config and then forwarded to +.Ar out +hook, if it exists and connected. Otherwise they are reflected back to the +.Ar in +hook. +.It Va out +Packets received on this hook are forwarded to +.Ar in +hook without any changes. +.El +.Sh CONTROL MESSAGES +This node type supports the generic control messages, plus the following: +.Bl -tag -width foo +.It Dv NGM_CHECKSUM_SETDLT Pq Ic setdlt +Sets data link type on the +.Va in +hook. Currently, supported types are +.Cm DLT_RAW +(raw IP datagrams) and +.Cm DLT_EN10MB +(Ethernet). DLT_ definitions can be found in +.In net/bpf.h +header. Currently used values are +.Cm DLT_EN10MB += 1 and +.Cm DLT_RAW += 12. +.It Dv NGM_CHECKSUM_GETDLT Pq Ic getdlt +This control message obtains data link type on the +.Va in +hook. +.It Dv NGM_CHECKSUM_SETCONFIG Pq Ic setconfig +Sets node configuration. The following +.Vt "struct ng_checksum_config" +must be supplied as an argument: +.Bd -literal -offset 4n +struct ng_checksum_config { + uint64_t csum_flags; + uint64_t csum_offload; +}; +.Ed +.Pp +The +.Va csum_flags +can be set to any combination of CSUM_IP, CSUM_TCP, CSUM_UDP, CSUM_TCP_IPV6 and CSUM_UDP_IPV6 +(other values are ignored) for instructing node need calculate the corresponding checksum. +.Pp +The +.Va csum_offload +can be set to any combination of CSUM_IP, CSUM_TCP, CSUM_UDP, CSUM_TCP_IPV6 and CSUM_UDP_IPV6 +(other values are ignored) for instructing node what checksum can calculate in hardware. +.Pp +Also processed any combination of CSUM_IP, CSUM_TCP, CSUM_UDP, CSUM_TCP_IPV6 and CSUM_UDP_IPV6 sets before on mbuf. +.It Dv NGM_CHECKSUM_GETCONFIG Pq Ic getconfig +This control message obtains current node configuration, +returned as +.Vt "struct ng_checksum_config" . +.It Dv NGM_CHECKSUM_GET_STATS Pq Ic getstats +Returns node statistics as a +.Vt "struct ng_checksum_stats" . +.It Dv NGM_CHECKSUM_CLR_STATS Pq Ic clrstats +Clear node statistics. +.It Dv NGM_CHECKSUM_GETCLR_STATS Pq Ic getclrstats +This command is identical to +.Dv NGM_CHECKSUM_GET_STATS , +except that the statistics are also atomically cleared. +.El +.Sh SHUTDOWN +This node shuts down upon receipt of a +.Dv NGM_SHUTDOWN +control message, or when all hooks have been disconnected. +.Sh EXAMPLES +.Xr ngctl 8 +script: +.Bd -literal -offset 4n +/usr/sbin/ngctl -f- <<-SEQ + msg checksum-1: "setdlt 1" + ngctl msg checksum-1: "setconfig { csum_flags=0 csum_offload=6 }" +.Ed +.Pp +Set data link type to +.Cm DLT_EN10MB +(Ethernet), not set additional checksum flags, set hardware +can calculate CSUM_IP_UDP|CSUM_IP_TCP. +.Sh SEE ALSO +.Xr netgraph 4 , +.Xr ng_patch 4 , +.Xr ngctl 8 +.Sh HISTORY +The +.Nm +node type was implemented in +.Fx 10.2 +and first submitted in +.Fx 12.0 . +.Sh AUTHORS +.An "Dmitry Vagin" Aq daemon.hammer@ya.ru . Property changes on: user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4 ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/alc/PQ_LAUNDRY/share/man/man9/bitset.9 =================================================================== --- user/alc/PQ_LAUNDRY/share/man/man9/bitset.9 (revision 303641) +++ user/alc/PQ_LAUNDRY/share/man/man9/bitset.9 (revision 303642) @@ -1,391 +1,391 @@ .\" Copyright (c) 2015 Conrad Meyer .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' .\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 20, 2015 +.Dd July 29, 2016 .Dt BITSET 9 .Os .Sh NAME .Nm bitset(9) \(em .Nm BITSET_DEFINE , .Nm BITSET_T_INITIALIZER , .Nm BITSET_FSET , .Nm BIT_CLR , .Nm BIT_COPY , .Nm BIT_ISSET , .Nm BIT_SET , .Nm BIT_ZERO , .Nm BIT_FILL , .Nm BIT_SETOF , .Nm BIT_EMPTY , .Nm BIT_ISFULLSET , .Nm BIT_FFS , .Nm BIT_COUNT , .Nm BIT_SUBSET , .Nm BIT_OVERLAP , .Nm BIT_CMP , .Nm BIT_OR , .Nm BIT_AND , .Nm BIT_NAND , .Nm BIT_CLR_ATOMIC , .Nm BIT_SET_ATOMIC , .Nm BIT_SET_ATOMIC_ACQ , .Nm BIT_AND_ATOMIC , .Nm BIT_OR_ATOMIC , .Nm BIT_COPY_STORE_REL .Nd bitset manipulation macros .Sh SYNOPSIS .In sys/_bitset.h .In sys/bitset.h .\" .Fn BITSET_DEFINE "STRUCTNAME" "const SETSIZE" .Fn BITSET_T_INITIALIZER "ARRAY_CONTENTS" .Fn BITSET_FSET "N_WORDS" .\" .Fn BIT_CLR "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset" .Fn BIT_COPY "const SETSIZE" "struct STRUCTNAME *from" "struct STRUCTNAME *to" .Ft bool .Fn BIT_ISSET "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset" .Fn BIT_SET "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset" .Fn BIT_ZERO "const SETSIZE" "struct STRUCTNAME *bitset" .Fn BIT_FILL "const SETSIZE" "struct STRUCTNAME *bitset" .Fn BIT_SETOF "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset" .Ft bool .Fn BIT_EMPTY "const SETSIZE" "struct STRUCTNAME *bitset" .Ft bool .Fn BIT_ISFULLSET "const SETSIZE" "struct STRUCTNAME *bitset" -.Ft size_t +.Ft int .Fn BIT_FFS "const SETSIZE" "struct STRUCTNAME *bitset" -.Ft size_t +.Ft int .Fn BIT_COUNT "const SETSIZE" "struct STRUCTNAME *bitset" .\" .Ft bool .Fo BIT_SUBSET .Fa "const SETSIZE" "struct STRUCTNAME *haystack" "struct STRUCTNAME *needle" .Fc .Ft bool .Fo BIT_OVERLAP .Fa "const SETSIZE" "struct STRUCTNAME *bitset1" "struct STRUCTNAME *bitset2" .Fc .Ft bool .Fo BIT_CMP .Fa "const SETSIZE" "struct STRUCTNAME *bitset1" "struct STRUCTNAME *bitset2" .Fc .Fn BIT_OR "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src" .Fn BIT_AND "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src" .Fn BIT_NAND "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src" .\" .Fn BIT_CLR_ATOMIC "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset" .Fn BIT_SET_ATOMIC "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset" .Fn BIT_SET_ATOMIC_ACQ "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset" .\" .Fo BIT_AND_ATOMIC .Fa "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src" .Fc .Fo BIT_OR_ATOMIC .Fa "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src" .Fc .Fo BIT_COPY_STORE_REL .Fa "const SETSIZE" "struct STRUCTNAME *from" "struct STRUCTNAME *to" .Fc .Sh DESCRIPTION The .Nm family of macros provide a flexible and efficient bitset implementation if the maximum size of the set is known at compilation. Throughout this manual page, the name .Fa SETSIZE refers to the size of the bitset in bits. Individual bits in bitsets are referenced with indices zero through .Fa SETSIZE - 1 . One example use of .In sys/bitset.h is .In sys/cpuset.h . .Pp The .Fn BITSET_DEFINE macro defines a bitset struct .Fa STRUCTNAME with room to represent .Fa SETSIZE bits. .Pp The .Fn BITSET_T_INITIALIZER macro allows one to initialize a bitset struct with a compile time literal value. .Pp The .Fn BITSET_FSET macro generates a compile time literal, usable by .Fn BITSET_T_INITIALIZER , representing a full bitset (all bits set). For examples of .Fn BITSET_T_INITIALIZER and .Fn BITSET_FSET usage, see the .Sx BITSET_T_INITIALIZER EXAMPLE section. The .Fa N_WORDS parameter to .Fn BITSET_FSET should be: .Bd -literal -offset indent __bitset_words(SETSIZE) .Ed .Pp The .Fn BIT_CLR macro clears bit .Fa bit in the bitset pointed to by .Fa bitset . The .Fn BIT_CLR_ATOMIC macro is identical, but the bit is cleared atomically. .Pp The .Fn BIT_COPY macro copies the contents of the bitset .Fa from to the bitset .Fa to . .Fn BIT_COPY_STORE_REL is similar, but copies component machine words from .Fa from and writes them to .Fa to with atomic store with release semantics. (That is, if .Fa to is composed of multiple machine words, .Fn BIT_COPY_STORE_REL performs multiple individually atomic operations.) .Pp The .Fn BIT_SET macro sets bit .Fa bit in the bitset pointed to by .Fa bitset . The .Fn BIT_SET_ATOMIC macro is identical, but the bit is set atomically. The .Fn BIT_SET_ATOMIC_ACQ macro sets the bit with acquire semantics. .Pp The .Fn BIT_ZERO macro clears all bits in .Fa bitset . .Pp The .Fn BIT_FILL macro sets all bits in .Fa bitset . .Pp The .Fn BIT_SETOF macro clears all bits in .Fa bitset before setting only bit .Fa bit . .Pp The .Fn BIT_EMPTY macro returns .Dv true if .Fa bitset is empty. .Pp The .Fn BIT_ISFULLSET macro returns .Dv true if .Fa bitset is full (all bits set). .Pp The .Fn BIT_FFS macro returns the 1-index of the first (lowest) set bit in .Fa bitset , or zero if .Fa bitset is empty. Like with .Xr ffs 3 , to use the non-zero result of .Fn BIT_FFS as a .Fa bit index parameter to any other .Nm macro, you must subtract one from the result. .Pp The .Fn BIT_COUNT macro returns the total number of set bits in .Fa bitset . .Pp The .Fn BIT_SUBSET macro returns .Dv true if .Fa needle is a subset of .Fa haystack . .Pp The .Fn BIT_OVERLAP macro returns .Dv true if .Fa bitset1 and .Fa bitset2 have any common bits. (That is, if .Fa bitset1 AND .Fa bitset2 is not the empty set.) .Pp The .Fn BIT_CMP macro returns .Dv true if .Fa bitset1 is NOT equal to .Fa bitset2 . .Pp The .Fn BIT_OR macro sets bits present in .Fa src in .Fa dst . (It is the .Nm equivalent of the scalar: .Fa dst |= .Fa src . ) .Fn BIT_OR_ATOMIC is similar, but sets bits in the component machine words in .Fa dst atomically. (That is, if .Fa dst is composed of multiple machine words, .Fn BIT_OR_ATOMIC performs multiple individually atomic operations.) .Pp The .Fn BIT_AND macro clears bits absent from .Fa src from .Fa dst . (It is the .Nm equivalent of the scalar: .Fa dst &= .Fa src . ) .Fn BIT_AND_ATOMIC is similar, with the same atomic semantics as .Fn BIT_OR_ATOMIC . .Pp The .Fn BIT_NAND macro clears bits set in .Fa src from .Fa dst . (It is the .Nm equivalent of the scalar: .Fa dst &= .Fa ~ src . ) .Sh BITSET_T_INITIALIZER EXAMPLE .Bd -literal BITSET_DEFINE(_myset, MYSETSIZE); struct _myset myset; /* Initialize myset to filled (all bits set) */ myset = BITSET_T_INITIALIZER(BITSET_FSET(__bitset_words(MYSETSIZE))); /* Initialize myset to only the lowest bit set */ myset = BITSET_T_INITIALIZER(0x1); .Ed .Sh SEE ALSO .Xr bitstring 3 , .Xr cpuset 9 .Sh HISTORY The .Nm macros first appeared in .Fx 10.0 in January 2014. They were MFCed to .Fx 9.3 , released in July 2014. .Pp This manual page first appeared in .Fx 11.0 . .Sh AUTHORS .An -nosplit The .Nm macros were generalized and pulled out of .In sys/cpuset.h as .In sys/_bitset.h and .In sys/bitset.h by .An Attilio Rao Aq Mt attilio@FreeBSD.org . This manual page was written by .An Conrad Meyer Aq Mt cem@FreeBSD.org . .Sh CAVEATS The .Fa SETSIZE argument to all of these macros must match the value given to .Fn BITSET_DEFINE . .Pp Unlike every other reference to individual set members, which are zero-indexed, .Fn BIT_FFS returns a one-indexed result (or zero if the set is empty). Index: user/alc/PQ_LAUNDRY/share/man/man9/cpuset.9 =================================================================== --- user/alc/PQ_LAUNDRY/share/man/man9/cpuset.9 (revision 303641) +++ user/alc/PQ_LAUNDRY/share/man/man9/cpuset.9 (revision 303642) @@ -1,352 +1,352 @@ .\" Copyright (c) 2015 Conrad Meyer .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' .\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 20, 2015 +.Dd July 29, 2016 .Dt CPUSET 9 .Os .Sh NAME .Nm cpuset(9) \(em .Nm CPUSET_T_INITIALIZER , .Nm CPUSET_FSET , .Nm CPU_CLR , .Nm CPU_COPY , .Nm CPU_ISSET , .Nm CPU_SET , .Nm CPU_ZERO , .Nm CPU_FILL , .Nm CPU_SETOF , .Nm CPU_EMPTY , .Nm CPU_ISFULLSET , .Nm CPU_FFS , .Nm CPU_COUNT , .Nm CPU_SUBSET , .Nm CPU_OVERLAP , .Nm CPU_CMP , .Nm CPU_OR , .Nm CPU_AND , .Nm CPU_NAND , .Nm CPU_CLR_ATOMIC , .Nm CPU_SET_ATOMIC , .Nm CPU_SET_ATOMIC_ACQ , .Nm CPU_AND_ATOMIC , .Nm CPU_OR_ATOMIC , .Nm CPU_COPY_STORE_REL .Nd cpuset manipulation macros .Sh SYNOPSIS .In sys/_cpuset.h .In sys/cpuset.h .\" .Fn CPUSET_T_INITIALIZER "ARRAY_CONTENTS" .Vt CPUSET_FSET .\" .Fn CPU_CLR "size_t cpu_idx" "cpuset_t *cpuset" .Fn CPU_COPY "cpuset_t *from" "cpuset_t *to" .Ft bool .Fn CPU_ISSET "size_t cpu_idx" "cpuset_t *cpuset" .Fn CPU_SET "size_t cpu_idx" "cpuset_t *cpuset" .Fn CPU_ZERO "cpuset_t *cpuset" .Fn CPU_FILL "cpuset_t *cpuset" .Fn CPU_SETOF "size_t cpu_idx" "cpuset_t *cpuset" .Ft bool .Fn CPU_EMPTY "cpuset_t *cpuset" .Ft bool .Fn CPU_ISFULLSET "cpuset_t *cpuset" -.Ft size_t +.Ft int .Fn CPU_FFS "cpuset_t *cpuset" -.Ft size_t +.Ft int .Fn CPU_COUNT "cpuset_t *cpuset" .\" .Ft bool .Fn CPU_SUBSET "cpuset_t *haystack" "cpuset_t *needle" .Ft bool .Fn CPU_OVERLAP "cpuset_t *cpuset1" "cpuset_t *cpuset2" .Ft bool .Fn CPU_CMP "cpuset_t *cpuset1" "cpuset_t *cpuset2" .Fn CPU_OR "cpuset_t *dst" "cpuset_t *src" .Fn CPU_AND "cpuset_t *dst" "cpuset_t *src" .Fn CPU_NAND "cpuset_t *dst" "cpuset_t *src" .\" .Fn CPU_CLR_ATOMIC "size_t cpu_idx" "cpuset_t *cpuset" .Fn CPU_SET_ATOMIC "size_t cpu_idx" "cpuset_t *cpuset" .Fn CPU_SET_ATOMIC_ACQ "size_t cpu_idx" "cpuset_t *cpuset" .\" .Fn CPU_AND_ATOMIC "cpuset_t *dst" "cpuset_t *src" .Fn CPU_OR_ATOMIC "cpuset_t *dst" "cpuset_t *src" .Fn CPU_COPY_STORE_REL "cpuset_t *from" "cpuset_t *to" .Sh DESCRIPTION The .Nm family of macros provide a flexible and efficient CPU set implementation, backed by the .Xr bitset 9 macros. Each CPU is represented by a single bit. The maximum number of CPUs representable by .Vt cpuset_t is .Va MAXCPU . Individual CPUs in cpusets are referenced with indices zero through .Fa MAXCPU - 1 . .Pp The .Fn CPUSET_T_INITIALIZER macro allows one to initialize a .Vt cpuset_t with a compile time literal value. .Pp The .Fn CPUSET_FSET macro defines a compile time literal, usable by .Fn CPUSET_T_INITIALIZER , representing a full cpuset (all CPUs present). For examples of .Fn CPUSET_T_INITIALIZER and .Fn CPUSET_FSET usage, see the .Sx CPUSET_T_INITIALIZER EXAMPLE section. .Pp The .Fn CPU_CLR macro removes CPU .Fa cpu_idx from the cpuset pointed to by .Fa cpuset . The .Fn CPU_CLR_ATOMIC macro is identical, but the bit representing the CPU is cleared with atomic machine instructions. .Pp The .Fn CPU_COPY macro copies the contents of the cpuset .Fa from to the cpuset .Fa to . .Fn CPU_COPY_STORE_REL is similar, but copies component machine words from .Fa from and writes them to .Fa to with atomic store with release semantics. (That is, if .Fa to is composed of multiple machine words, .Fn CPU_COPY_STORE_REL performs multiple individually atomic operations.) .Pp The .Fn CPU_SET macro adds CPU .Fa cpu_idx to the cpuset pointed to by .Fa cpuset , if it is not already present. The .Fn CPU_SET_ATOMIC macro is identical, but the bit representing the CPU is set with atomic machine instructions. The .Fn CPU_SET_ATOMIC_ACQ macro sets the bit representing the CPU with atomic acquire semantics. .Pp The .Fn CPU_ZERO macro removes all CPUs from .Fa cpuset . .Pp The .Fn CPU_FILL macro adds all CPUs to .Fa cpuset . .Pp The .Fn CPU_SETOF macro removes all CPUs in .Fa cpuset before adding only CPU .Fa cpu_idx . .Pp The .Fn CPU_EMPTY macro returns .Dv true if .Fa cpuset is empty. .Pp The .Fn CPU_ISFULLSET macro returns .Dv true if .Fa cpuset is full (the set of all CPUs). .Pp The .Fn CPU_FFS macro returns the 1-index of the first (lowest) CPU in .Fa cpuset , or zero if .Fa cpuset is empty. Like with .Xr ffs 3 , to use the non-zero result of .Fn CPU_FFS as a .Fa cpu_idx index parameter to any other .Nm macro, you must subtract one from the result. .Pp The .Fn CPU_COUNT macro returns the total number of CPUs in .Fa cpuset . .Pp The .Fn CPU_SUBSET macro returns .Dv true if .Fa needle is a subset of .Fa haystack . .Pp The .Fn CPU_OVERLAP macro returns .Dv true if .Fa cpuset1 and .Fa cpuset2 have any common CPUs. (That is, if .Fa cpuset1 AND .Fa cpuset2 is not the empty set.) .Pp The .Fn CPU_CMP macro returns .Dv true if .Fa cpuset1 is NOT equal to .Fa cpuset2 . .Pp The .Fn CPU_OR macro adds CPUs present in .Fa src to .Fa dst . (It is the .Nm equivalent of the scalar: .Fa dst |= .Fa src . ) .Fn CPU_OR_ATOMIC is similar, but sets the bits representing CPUs in the component machine words in .Fa dst with atomic machine instructions. (That is, if .Fa dst is composed of multiple machine words, .Fn CPU_OR_ATOMIC performs multiple individually atomic operations.) .Pp The .Fn CPU_AND macro removes CPUs absent from .Fa src from .Fa dst . (It is the .Nm equivalent of the scalar: .Fa dst &= .Fa src . ) .Fn CPU_AND_ATOMIC is similar, with the same atomic semantics as .Fn CPU_OR_ATOMIC . .Pp The .Fn CPU_NAND macro removes CPUs in .Fa src from .Fa dst . (It is the .Nm equivalent of the scalar: .Fa dst &= .Fa ~ src . ) .Sh CPUSET_T_INITIALIZER EXAMPLE .Bd -literal cpuset_t myset; /* Initialize myset to filled (all CPUs) */ myset = CPUSET_T_INITIALIZER(CPUSET_FSET); /* Initialize myset to only the lowest CPU */ myset = CPUSET_T_INITIALIZER(0x1); .Ed .Sh SEE ALSO .Xr cpuset 1 , .Xr cpuset 2 , .Xr bitset 9 .Sh HISTORY .In sys/cpuset.h first appeared in .Fx 7.1 , released in January 2009, and in .Fx 8.0 , released in November 2009. .Pp This manual page first appeared in .Fx 11.0 . .Sh AUTHORS .An -nosplit The .Nm macros were written by .An Jeff Roberson Aq Mt jeff@FreeBSD.org . This manual page was written by .An Conrad Meyer Aq Mt cem@FreeBSD.org . .Sh CAVEATS Unlike every other reference to individual set members, which are zero-indexed, .Fn CPU_FFS returns a one-indexed result (or zero if the cpuset is empty). Index: user/alc/PQ_LAUNDRY/share/mk/src.libnames.mk =================================================================== --- user/alc/PQ_LAUNDRY/share/mk/src.libnames.mk (revision 303641) +++ user/alc/PQ_LAUNDRY/share/mk/src.libnames.mk (revision 303642) @@ -1,576 +1,577 @@ # $FreeBSD$ # # The include file define library names suitable # for INTERNALLIB and PRIVATELIB definition .if !target(____) .error src.libnames.mk cannot be included directly. .endif .if !target(____) ____: .include _PRIVATELIBS= \ atf_c \ atf_cxx \ bsdstat \ devdctl \ event \ heimipcc \ heimipcs \ ldns \ sqlite3 \ ssh \ ucl \ unbound _INTERNALLIBS= \ amu \ bsnmptools \ cron \ elftc \ fifolog \ ipf \ lpr \ netbsd \ ntp \ ntpevent \ openbsd \ opts \ parse \ pe \ readline \ sl \ sm \ smdb \ smutil \ telnet \ vers _LIBRARIES= \ ${_PRIVATELIBS} \ ${_INTERNALLIBS} \ ${LOCAL_LIBRARIES} \ 80211 \ alias \ archive \ asn1 \ auditd \ avl \ begemot \ bluetooth \ bsdxml \ bsm \ bsnmp \ bz2 \ c \ c_pic \ calendar \ cam \ casper \ cap_dns \ cap_grp \ cap_pwd \ cap_random \ cap_sysctl \ com_err \ compiler_rt \ crypt \ crypto \ ctf \ cuse \ cxxrt \ devctl \ devdctl \ devinfo \ devstat \ dialog \ dpv \ dtrace \ dwarf \ edit \ elf \ execinfo \ fetch \ figpar \ geom \ gnuregex \ gpio \ gssapi \ gssapi_krb5 \ hdb \ heimbase \ heimntlm \ heimsqlite \ hx509 \ ipsec \ jail \ kadm5clnt \ kadm5srv \ kafs5 \ kdc \ kiconv \ krb5 \ kvm \ l \ lzma \ m \ magic \ md \ memstat \ mp \ mt \ nandfs \ ncurses \ ncursesw \ netgraph \ ngatm \ nv \ nvpair \ opie \ pam \ panel \ panelw \ pcap \ pcsclite \ pjdlog \ pmc \ proc \ procstat \ pthread \ radius \ readline \ roken \ rpcsec_gss \ rpcsvc \ rt \ rtld_db \ sbuf \ sdp \ sm \ smb \ ssl \ ssp_nonshared \ stdthreads \ supcplusplus \ sysdecode \ tacplus \ termcap \ termcapw \ ufs \ ugidfw \ ulog \ umem \ usb \ usbhid \ util \ uutil \ vmmapi \ wind \ wrap \ xo \ y \ ypclnt \ z \ zfs_core \ zfs \ zpool \ .if ${MK_BLACKLIST} != "no" _LIBRARIES+= \ blacklist \ .endif .if ${MK_OFED} != "no" _LIBRARIES+= \ cxgb4 \ ibcm \ ibcommon \ ibmad \ ibsdp \ ibumad \ ibverbs \ mlx4 \ mthca \ opensm \ osmcomp \ osmvendor \ rdmacm \ .endif # Each library's LIBADD needs to be duplicated here for static linkage of # 2nd+ order consumers. Auto-generating this would be better. _DP_80211= sbuf bsdxml _DP_archive= z bz2 lzma bsdxml .if ${MK_BLACKLIST} != "no" _DP_blacklist+= pthread .endif .if ${MK_OPENSSL} != "no" _DP_archive+= crypto .else _DP_archive+= md .endif _DP_sqlite3= pthread _DP_ssl= crypto _DP_ssh= crypto crypt z .if ${MK_LDNS} != "no" _DP_ssh+= ldns .endif _DP_edit= ncursesw .if ${MK_OPENSSL} != "no" _DP_bsnmp= crypto .endif _DP_geom= bsdxml sbuf _DP_cam= sbuf _DP_kvm= elf _DP_casper= nv _DP_cap_dns= nv _DP_cap_grp= nv _DP_cap_pwd= nv _DP_cap_random= nv _DP_cap_sysctl= nv _DP_pjdlog= util _DP_opie= md _DP_usb= pthread _DP_unbound= ssl crypto pthread _DP_rt= pthread .if ${MK_OPENSSL} == "no" _DP_radius= md .else _DP_radius= crypto .endif +_DP_rtld_db= elf procstat _DP_procstat= kvm util elf .if ${MK_CXX} == "yes" .if ${MK_LIBCPLUSPLUS} != "no" _DP_proc= cxxrt .else _DP_proc= supcplusplus .endif .endif .if ${MK_CDDL} != "no" _DP_proc+= ctf .endif -_DP_proc+= elf rtld_db util +_DP_proc+= elf procstat rtld_db util _DP_mp= crypto _DP_memstat= kvm _DP_magic= z _DP_mt= sbuf bsdxml _DP_ldns= crypto .if ${MK_OPENSSL} != "no" _DP_fetch= ssl crypto .else _DP_fetch= md .endif _DP_execinfo= elf _DP_dwarf= elf _DP_dpv= dialog figpar util ncursesw _DP_dialog= ncursesw m _DP_cuse= pthread _DP_atf_cxx= atf_c _DP_devstat= kvm _DP_pam= radius tacplus opie md util .if ${MK_KERBEROS} != "no" _DP_pam+= krb5 .endif .if ${MK_OPENSSH} != "no" _DP_pam+= ssh .endif .if ${MK_NIS} != "no" _DP_pam+= ypclnt .endif _DP_readline= ncursesw _DP_roken= crypt _DP_kadm5clnt= com_err krb5 roken _DP_kadm5srv= com_err hdb krb5 roken _DP_heimntlm= crypto com_err krb5 roken _DP_hx509= asn1 com_err crypto roken wind _DP_hdb= asn1 com_err krb5 roken sqlite3 _DP_asn1= com_err roken _DP_kdc= roken hdb hx509 krb5 heimntlm asn1 crypto _DP_wind= com_err roken _DP_heimbase= pthread _DP_heimipcc= heimbase roken pthread _DP_heimipcs= heimbase roken pthread _DP_kafs5= asn1 krb5 roken _DP_krb5+= asn1 com_err crypt crypto hx509 roken wind heimbase heimipcc _DP_gssapi_krb5+= gssapi krb5 crypto roken asn1 com_err _DP_lzma= pthread _DP_ucl= m _DP_vmmapi= util _DP_ctf= z _DP_dtrace= ctf elf proc pthread rtld_db _DP_xo= util # The libc dependencies are not strictly needed but are defined to make the # assert happy. _DP_c= compiler_rt .if ${MK_SSP} != "no" _DP_c+= ssp_nonshared .endif _DP_stdthreads= pthread _DP_tacplus= md _DP_panel= ncurses _DP_panelw= ncursesw _DP_rpcsec_gss= gssapi _DP_smb= kiconv _DP_ulog= md _DP_fifolog= z _DP_ipf= kvm _DP_zfs= md pthread umem util uutil m nvpair avl bsdxml geom nvpair z \ zfs_core _DP_zfs_core= nvpair _DP_zpool= md pthread z nvpair avl umem .if ${MK_OFED} != "no" _DP_cxgb4= ibverbs pthread _DP_ibcm= ibverbs _DP_ibmad= ibcommon ibumad _DP_ibumad= ibcommon _DP_mlx4= ibverbs pthread _DP_mthca= ibverbs pthread _DP_opensm= pthread _DP_osmcomp= pthread _DP_osmvendor= ibumad opensm osmcomp pthread _DP_rdmacm= ibverbs .endif # Define special cases LDADD_supcplusplus= -lsupc++ LIBATF_C= ${DESTDIR}${LIBDIR}/libprivateatf-c.a LIBATF_CXX= ${DESTDIR}${LIBDIR}/libprivateatf-c++.a LDADD_atf_c= -lprivateatf-c LDADD_atf_cxx= -lprivateatf-c++ .for _l in ${_PRIVATELIBS} LIB${_l:tu}?= ${DESTDIR}${LIBDIR}/libprivate${_l}.a .endfor .for _l in ${_LIBRARIES} .if ${_INTERNALLIBS:M${_l}} LDADD_${_l}_L+= -L${LIB${_l:tu}DIR} .endif DPADD_${_l}?= ${LIB${_l:tu}} .if ${_PRIVATELIBS:M${_l}} LDADD_${_l}?= -lprivate${_l} .else LDADD_${_l}?= ${LDADD_${_l}_L} -l${_l} .endif # Add in all dependencies for static linkage. .if defined(_DP_${_l}) && (${_INTERNALLIBS:M${_l}} || \ (defined(NO_SHARED) && (${NO_SHARED} != "no" && ${NO_SHARED} != "NO"))) .for _d in ${_DP_${_l}} DPADD_${_l}+= ${DPADD_${_d}} LDADD_${_l}+= ${LDADD_${_d}} .endfor .endif .endfor # These are special cases where the library is broken and anything that uses # it needs to add more dependencies. Broken usually means that it has a # cyclic dependency and cannot link its own dependencies. This is bad, please # fix the library instead. # Unless the library itself is broken then the proper place to define # dependencies is _DP_* above. # libatf-c++ exposes libatf-c abi hence we need to explicit link to atf_c for # atf_cxx DPADD_atf_cxx+= ${DPADD_atf_c} LDADD_atf_cxx+= ${LDADD_atf_c} # Detect LDADD/DPADD that should be LIBADD, before modifying LDADD here. _BADLDADD= .for _l in ${LDADD:M-l*:N-l*/*:C,^-l,,} .if ${_LIBRARIES:M${_l}} && !${_PRIVATELIBS:M${_l}} _BADLDADD+= ${_l} .endif .endfor .if !empty(_BADLDADD) .error ${.CURDIR}: These libraries should be LIBADD+=foo rather than DPADD/LDADD+=-lfoo: ${_BADLDADD} .endif .for _l in ${LIBADD} DPADD+= ${DPADD_${_l}} LDADD+= ${LDADD_${_l}} .endfor # INTERNALLIB definitions. LIBELFTCDIR= ${OBJTOP}/lib/libelftc LIBELFTC?= ${LIBELFTCDIR}/libelftc.a LIBPEDIR= ${OBJTOP}/lib/libpe LIBPE?= ${LIBPEDIR}/libpe.a LIBREADLINEDIR= ${OBJTOP}/gnu/lib/libreadline/readline LIBREADLINE?= ${LIBREADLINEDIR}/libreadline.a LIBOPENBSDDIR= ${OBJTOP}/lib/libopenbsd LIBOPENBSD?= ${LIBOPENBSDDIR}/libopenbsd.a LIBSMDIR= ${OBJTOP}/lib/libsm LIBSM?= ${LIBSMDIR}/libsm.a LIBSMDBDIR= ${OBJTOP}/lib/libsmdb LIBSMDB?= ${LIBSMDBDIR}/libsmdb.a LIBSMUTILDIR= ${OBJTOP}/lib/libsmutil LIBSMUTIL?= ${LIBSMDBDIR}/libsmutil.a LIBNETBSDDIR?= ${OBJTOP}/lib/libnetbsd LIBNETBSD?= ${LIBNETBSDDIR}/libnetbsd.a LIBVERSDIR?= ${OBJTOP}/kerberos5/lib/libvers LIBVERS?= ${LIBVERSDIR}/libvers.a LIBSLDIR= ${OBJTOP}/kerberos5/lib/libsl LIBSL?= ${LIBSLDIR}/libsl.a LIBIPFDIR= ${OBJTOP}/sbin/ipf/libipf LIBIPF?= ${LIBIPFDIR}/libipf.a LIBTELNETDIR= ${OBJTOP}/lib/libtelnet LIBTELNET?= ${LIBTELNETDIR}/libtelnet.a LIBCRONDIR= ${OBJTOP}/usr.sbin/cron/lib LIBCRON?= ${LIBCRONDIR}/libcron.a LIBNTPDIR= ${OBJTOP}/usr.sbin/ntp/libntp LIBNTP?= ${LIBNTPDIR}/libntp.a LIBNTPEVENTDIR= ${OBJTOP}/usr.sbin/ntp/libntpevent LIBNTPEVENT?= ${LIBNTPEVENTDIR}/libntpevent.a LIBOPTSDIR= ${OBJTOP}/usr.sbin/ntp/libopts LIBOPTS?= ${LIBOPTSDIR}/libopts.a LIBPARSEDIR= ${OBJTOP}/usr.sbin/ntp/libparse LIBPARSE?= ${LIBPARSEDIR}/libparse.a LIBLPRDIR= ${OBJTOP}/usr.sbin/lpr/common_source LIBLPR?= ${LIBOPTSDIR}/liblpr.a LIBFIFOLOGDIR= ${OBJTOP}/usr.sbin/fifolog/lib LIBFIFOLOG?= ${LIBOPTSDIR}/libfifolog.a LIBBSNMPTOOLSDIR= ${OBJTOP}/usr.sbin/bsnmpd/tools/libbsnmptools LIBBSNMPTOOLS?= ${LIBBSNMPTOOLSDIR}/libbsnmptools.a LIBAMUDIR= ${OBJTOP}/usr.sbin/amd/libamu LIBAMU?= ${LIBAMUDIR}/libamu/libamu.a # Define a directory for each library. This is useful for adding -L in when # not using a --sysroot or for meta mode bootstrapping when there is no # Makefile.depend. These are sorted by directory. LIBAVLDIR= ${OBJTOP}/cddl/lib/libavl LIBCTFDIR= ${OBJTOP}/cddl/lib/libctf LIBDTRACEDIR= ${OBJTOP}/cddl/lib/libdtrace LIBNVPAIRDIR= ${OBJTOP}/cddl/lib/libnvpair LIBUMEMDIR= ${OBJTOP}/cddl/lib/libumem LIBUUTILDIR= ${OBJTOP}/cddl/lib/libuutil LIBZFSDIR= ${OBJTOP}/cddl/lib/libzfs LIBZFS_COREDIR= ${OBJTOP}/cddl/lib/libzfs_core LIBZPOOLDIR= ${OBJTOP}/cddl/lib/libzpool LIBCXGB4DIR= ${OBJTOP}/contrib/ofed/usr.lib/libcxgb4 LIBIBCMDIR= ${OBJTOP}/contrib/ofed/usr.lib/libibcm LIBIBCOMMONDIR= ${OBJTOP}/contrib/ofed/usr.lib/libibcommon LIBIBMADDIR= ${OBJTOP}/contrib/ofed/usr.lib/libibmad LIBIBUMADDIR= ${OBJTOP}/contrib/ofed/usr.lib/libibumad LIBIBVERBSDIR= ${OBJTOP}/contrib/ofed/usr.lib/libibverbs LIBMLX4DIR= ${OBJTOP}/contrib/ofed/usr.lib/libmlx4 LIBMTHCADIR= ${OBJTOP}/contrib/ofed/usr.lib/libmthca LIBOPENSMDIR= ${OBJTOP}/contrib/ofed/usr.lib/libopensm LIBOSMCOMPDIR= ${OBJTOP}/contrib/ofed/usr.lib/libosmcomp LIBOSMVENDORDIR= ${OBJTOP}/contrib/ofed/usr.lib/libosmvendor LIBRDMACMDIR= ${OBJTOP}/contrib/ofed/usr.lib/librdmacm LIBIBSDPDIR= ${OBJTOP}/contrib/ofed/usr.lib/libsdp LIBDIALOGDIR= ${OBJTOP}/gnu/lib/libdialog LIBGCOVDIR= ${OBJTOP}/gnu/lib/libgcov LIBGOMPDIR= ${OBJTOP}/gnu/lib/libgomp LIBGNUREGEXDIR= ${OBJTOP}/gnu/lib/libregex LIBSSPDIR= ${OBJTOP}/gnu/lib/libssp LIBSSP_NONSHAREDDIR= ${OBJTOP}/gnu/lib/libssp/libssp_nonshared LIBSUPCPLUSPLUSDIR= ${OBJTOP}/gnu/lib/libsupc++ LIBASN1DIR= ${OBJTOP}/kerberos5/lib/libasn1 LIBGSSAPI_KRB5DIR= ${OBJTOP}/kerberos5/lib/libgssapi_krb5 LIBGSSAPI_NTLMDIR= ${OBJTOP}/kerberos5/lib/libgssapi_ntlm LIBGSSAPI_SPNEGODIR= ${OBJTOP}/kerberos5/lib/libgssapi_spnego LIBHDBDIR= ${OBJTOP}/kerberos5/lib/libhdb LIBHEIMBASEDIR= ${OBJTOP}/kerberos5/lib/libheimbase LIBHEIMIPCCDIR= ${OBJTOP}/kerberos5/lib/libheimipcc LIBHEIMIPCSDIR= ${OBJTOP}/kerberos5/lib/libheimipcs LIBHEIMNTLMDIR= ${OBJTOP}/kerberos5/lib/libheimntlm LIBHX509DIR= ${OBJTOP}/kerberos5/lib/libhx509 LIBKADM5CLNTDIR= ${OBJTOP}/kerberos5/lib/libkadm5clnt LIBKADM5SRVDIR= ${OBJTOP}/kerberos5/lib/libkadm5srv LIBKAFS5DIR= ${OBJTOP}/kerberos5/lib/libkafs5 LIBKDCDIR= ${OBJTOP}/kerberos5/lib/libkdc LIBKRB5DIR= ${OBJTOP}/kerberos5/lib/libkrb5 LIBROKENDIR= ${OBJTOP}/kerberos5/lib/libroken LIBWINDDIR= ${OBJTOP}/kerberos5/lib/libwind LIBATF_CDIR= ${OBJTOP}/lib/atf/libatf-c LIBATF_CXXDIR= ${OBJTOP}/lib/atf/libatf-c++ LIBALIASDIR= ${OBJTOP}/lib/libalias/libalias LIBBLACKLISTDIR= ${OBJTOP}/lib/libblacklist LIBBLOCKSRUNTIMEDIR= ${OBJTOP}/lib/libblocksruntime LIBBSNMPDIR= ${OBJTOP}/lib/libbsnmp/libbsnmp LIBCASPERDIR= ${OBJTOP}/lib/libcasper/libcasper LIBCAP_DNSDIR= ${OBJTOP}/lib/libcasper/services/cap_dns LIBCAP_GRPDIR= ${OBJTOP}/lib/libcasper/services/cap_grp LIBCAP_PWDDIR= ${OBJTOP}/lib/libcasper/services/cap_pwd LIBCAP_RANDOMDIR= ${OBJTOP}/lib/libcasper/services/cap_random LIBCAP_SYSCTLDIR= ${OBJTOP}/lib/libcasper/services/cap_sysctl LIBBSDXMLDIR= ${OBJTOP}/lib/libexpat LIBKVMDIR= ${OBJTOP}/lib/libkvm LIBPTHREADDIR= ${OBJTOP}/lib/libthr LIBMDIR= ${OBJTOP}/lib/msun LIBFORMDIR= ${OBJTOP}/lib/ncurses/form LIBFORMLIBWDIR= ${OBJTOP}/lib/ncurses/formw LIBMENUDIR= ${OBJTOP}/lib/ncurses/menu LIBMENULIBWDIR= ${OBJTOP}/lib/ncurses/menuw LIBNCURSESDIR= ${OBJTOP}/lib/ncurses/ncurses LIBNCURSESWDIR= ${OBJTOP}/lib/ncurses/ncursesw LIBPANELDIR= ${OBJTOP}/lib/ncurses/panel LIBPANELWDIR= ${OBJTOP}/lib/ncurses/panelw LIBCRYPTODIR= ${OBJTOP}/secure/lib/libcrypto LIBSSHDIR= ${OBJTOP}/secure/lib/libssh LIBSSLDIR= ${OBJTOP}/secure/lib/libssl LIBTEKENDIR= ${OBJTOP}/sys/teken/libteken LIBEGACYDIR= ${OBJTOP}/tools/build LIBLNDIR= ${OBJTOP}/usr.bin/lex/lib LIBTERMCAPDIR= ${LIBNCURSESDIR} LIBTERMCAPWDIR= ${LIBNCURSESWDIR} # Default other library directories to lib/libNAME. .for lib in ${_LIBRARIES} LIB${lib:tu}DIR?= ${OBJTOP}/lib/lib${lib} .endfor # Validate that listed LIBADD are valid. .for _l in ${LIBADD} .if empty(_LIBRARIES:M${_l}) _BADLIBADD+= ${_l} .endif .endfor .if !empty(_BADLIBADD) .error ${.CURDIR}: Invalid LIBADD used which may need to be added to ${_this:T}: ${_BADLIBADD} .endif # Sanity check that libraries are defined here properly when building them. .if defined(LIB) && ${_LIBRARIES:M${LIB}} != "" .if !empty(LIBADD) && \ (!defined(_DP_${LIB}) || ${LIBADD:O:u} != ${_DP_${LIB}:O:u}) .error ${.CURDIR}: Missing or incorrect _DP_${LIB} entry in ${_this:T}. Should match LIBADD for ${LIB} ('${LIBADD}' vs '${_DP_${LIB}}') .endif # Note that OBJTOP is not yet defined here but for the purpose of the check # it is fine as it resolves to the SRC directory. .if !defined(LIB${LIB:tu}DIR) || !exists(${SRCTOP}/${LIB${LIB:tu}DIR:S,^${OBJTOP}/,,}) .error ${.CURDIR}: Missing or incorrect value for LIB${LIB:tu}DIR in ${_this:T}: ${LIB${LIB:tu}DIR:S,^${OBJTOP}/,,} .endif .if ${_INTERNALLIBS:M${LIB}} != "" && !defined(LIB${LIB:tu}) .error ${.CURDIR}: Missing value for LIB${LIB:tu} in ${_this:T}. Likely should be: LIB${LIB:tu}?= $${LIB${LIB:tu}DIR}/lib${LIB}.a .endif .endif .endif # !target(____) Index: user/alc/PQ_LAUNDRY/sys/amd64/amd64/support.S =================================================================== --- user/alc/PQ_LAUNDRY/sys/amd64/amd64/support.S (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/amd64/amd64/support.S (revision 303642) @@ -1,796 +1,789 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_ddb.h" #include #include #include "assym.s" .text /* * bcopy family * void bzero(void *buf, u_int len) */ /* done */ ENTRY(bzero) PUSH_FRAME_POINTER movq %rsi,%rcx xorl %eax,%eax shrq $3,%rcx cld rep stosq movq %rsi,%rcx andq $7,%rcx rep stosb POP_FRAME_POINTER ret END(bzero) /* Address: %rdi */ ENTRY(pagezero) PUSH_FRAME_POINTER - movq $-PAGE_SIZE,%rdx - subq %rdx,%rdi + movq $PAGE_SIZE/8,%rcx xorl %eax,%eax -1: - movnti %rax,(%rdi,%rdx) - movnti %rax,8(%rdi,%rdx) - movnti %rax,16(%rdi,%rdx) - movnti %rax,24(%rdi,%rdx) - addq $32,%rdx - jne 1b - sfence + rep + stosq POP_FRAME_POINTER ret END(pagezero) ENTRY(bcmp) PUSH_FRAME_POINTER movq %rdx,%rcx shrq $3,%rcx cld /* compare forwards */ repe cmpsq jne 1f movq %rdx,%rcx andq $7,%rcx repe cmpsb 1: setne %al movsbl %al,%eax POP_FRAME_POINTER ret END(bcmp) /* * bcopy(src, dst, cnt) * rdi, rsi, rdx * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(bcopy) PUSH_FRAME_POINTER xchgq %rsi,%rdi movq %rdx,%rcx movq %rdi,%rax subq %rsi,%rax cmpq %rcx,%rax /* overlapping && src < dst? */ jb 1f shrq $3,%rcx /* copy by 64-bit words */ cld /* nope, copy forwards */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ rep movsb POP_FRAME_POINTER ret /* ALIGN_TEXT */ 1: addq %rcx,%rdi /* copy backwards */ addq %rcx,%rsi decq %rdi decq %rsi andq $7,%rcx /* any fractional bytes? */ std rep movsb movq %rdx,%rcx /* copy remainder by 32-bit words */ shrq $3,%rcx subq $7,%rsi subq $7,%rdi rep movsq cld POP_FRAME_POINTER ret END(bcopy) /* * Note: memcpy does not support overlapping copies */ ENTRY(memcpy) PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx shrq $3,%rcx /* copy by 64-bit words */ cld /* copy forwards */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ rep movsb POP_FRAME_POINTER ret END(memcpy) /* * pagecopy(%rdi=from, %rsi=to) */ ENTRY(pagecopy) PUSH_FRAME_POINTER movq $-PAGE_SIZE,%rax movq %rax,%rdx subq %rax,%rdi subq %rax,%rsi 1: prefetchnta (%rdi,%rax) addq $64,%rax jne 1b 2: movq (%rdi,%rdx),%rax movnti %rax,(%rsi,%rdx) movq 8(%rdi,%rdx),%rax movnti %rax,8(%rsi,%rdx) movq 16(%rdi,%rdx),%rax movnti %rax,16(%rsi,%rdx) movq 24(%rdi,%rdx),%rax movnti %rax,24(%rsi,%rdx) addq $32,%rdx jne 2b sfence POP_FRAME_POINTER ret END(pagecopy) /* fillw(pat, base, cnt) */ /* %rdi,%rsi, %rdx */ ENTRY(fillw) PUSH_FRAME_POINTER movq %rdi,%rax movq %rsi,%rdi movq %rdx,%rcx cld rep stosw POP_FRAME_POINTER ret END(fillw) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines should be * the only places that do this. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ /* * copyout(from_kernel, to_user, len) - MP SAFE * %rdi, %rsi, %rdx */ ENTRY(copyout) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyout_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movq %rsi,%rax addq %rdx,%rax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copyout_fault xchgq %rdi,%rsi /* bcopy(%rsi, %rdi, %rdx) */ movq %rdx,%rcx shrq $3,%rcx cld rep movsq movb %dl,%cl andb $7,%cl rep movsb done_copyout: xorl %eax,%eax movq PCPU(CURPCB),%rdx movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret ALIGN_TEXT copyout_fault: movq PCPU(CURPCB),%rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax POP_FRAME_POINTER ret END(copyout) /* * copyin(from_user, to_kernel, len) - MP SAFE * %rdi, %rsi, %rdx */ ENTRY(copyin) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyin_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyin /* * make sure address is valid */ movq %rdi,%rax addq %rdx,%rax jc copyin_fault movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copyin_fault xchgq %rdi,%rsi movq %rdx,%rcx movb %cl,%al shrq $3,%rcx /* copy longword-wise */ cld rep movsq movb %al,%cl andb $7,%cl /* copy remaining bytes */ rep movsb done_copyin: xorl %eax,%eax movq PCPU(CURPCB),%rdx movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret ALIGN_TEXT copyin_fault: movq PCPU(CURPCB),%rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax POP_FRAME_POINTER ret END(copyin) /* * casueword32. Compare and set user integer. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx */ ENTRY(casueword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movl %esi,%eax /* old */ #ifdef SMP lock #endif cmpxchgl %ecx,(%rdi) /* new = %ecx */ /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. Save %eax into %esi to prepare the return * value. */ movl %eax,%esi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) /* * Access the oldp after the pcb_onfault is cleared, to correctly * catch corrupted pointer. */ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER ret END(casueword32) /* * casueword. Compare and set user long. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx */ ENTRY(casueword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movq %rsi,%rax /* old */ #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ /* * The old value is in %rax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movq %rax,%rsi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) movq %rsi,(%rdx) POP_FRAME_POINTER ret END(casueword) /* * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit * byte from user memory. * addr = %rdi, valp = %rsi */ ALTENTRY(fueword64) ENTRY(fueword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movq (%rdi),%r11 movq %rax,PCB_ONFAULT(%rcx) movq %r11,(%rsi) POP_FRAME_POINTER ret END(fueword64) END(fueword) ENTRY(fueword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movl (%rdi),%r11d movq %rax,PCB_ONFAULT(%rcx) movl %r11d,(%rsi) POP_FRAME_POINTER ret END(fueword32) /* * fuswintr() and suswintr() are specialized variants of fuword16() and * suword16(), respectively. They are called from the profiling code, * potentially at interrupt time. If they fail, that's okay; good things * will happen later. They always fail for now, until the trap code is * able to deal with this. */ ALTENTRY(suswintr) ENTRY(fuswintr) movq $-1,%rax ret END(suswintr) END(fuswintr) ENTRY(fuword16) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault movzwl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fuword16) ENTRY(fubyte) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault movzbl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fubyte) ALIGN_TEXT fusufault: movq PCPU(CURPCB),%rcx xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) decq %rax POP_FRAME_POINTER ret /* * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to * user memory. All these functions are MPSAFE. * addr = %rdi, value = %rsi */ ALTENTRY(suword64) ENTRY(suword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movq %rsi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword64) END(suword) ENTRY(suword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword32) ENTRY(suword16) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movw %si,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword16) ENTRY(subyte) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,%eax movb %al,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(subyte) /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE * %rdi, %rsi, %rdx, %rcx * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ movq %rcx,%r9 /* %r9 = *len */ xchgq %rdi,%rsi /* %rdi = from, %rsi = to */ movq PCPU(CURPCB),%rcx movq $cpystrflt,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS,%rax /* make sure 'from' is within bounds */ subq %rsi,%rax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpq %rdx,%rax jae 1f movq %rax,%rdx movq %rax,%r8 1: incq %rdx cld 2: decq %rdx jz 3f lodsb stosb orb %al,%al jnz 2b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp cpystrflt_x 3: /* rdx is zero - return ENAMETOOLONG or EFAULT */ movq $VM_MAXUSER_ADDRESS,%rax cmpq %rax,%rsi jae cpystrflt 4: movq $ENAMETOOLONG,%rax jmp cpystrflt_x cpystrflt: movq $EFAULT,%rax cpystrflt_x: /* set *lencopied and return %eax */ movq PCPU(CURPCB),%rcx movq $0,PCB_ONFAULT(%rcx) testq %r9,%r9 jz 1f subq %rdx,%r8 movq %r8,(%r9) 1: POP_FRAME_POINTER ret END(copyinstr) /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ xchgq %rdi,%rsi incq %rdx cld 1: decq %rdx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp 6f 4: /* rdx is zero -- return ENAMETOOLONG */ movq $ENAMETOOLONG,%rax 6: testq %rcx,%rcx jz 7f /* set *lencopied and return %rax */ subq %rdx,%r8 movq %r8,(%rcx) 7: POP_FRAME_POINTER ret END(copystr) /* * Handling of special amd64 registers and descriptor tables etc * %rdi */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ lgdt (%rdi) /* flush the prefetch q */ jmp 1f nop 1: movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%fs /* Beware, use wrmsr to set 64 bit base */ movl %eax,%gs movl %eax,%ss /* reload code selector by turning return into intersegmental return */ popq %rax pushq $KCSEL pushq %rax MEXITCOUNT lretq END(lgdt) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movq %rbx,0(%rdi) /* save rbx */ movq %rsp,8(%rdi) /* save rsp */ movq %rbp,16(%rdi) /* save rbp */ movq %r12,24(%rdi) /* save r12 */ movq %r13,32(%rdi) /* save r13 */ movq %r14,40(%rdi) /* save r14 */ movq %r15,48(%rdi) /* save r15 */ movq 0(%rsp),%rdx /* get rta */ movq %rdx,56(%rdi) /* save rip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movq 0(%rdi),%rbx /* restore rbx */ movq 8(%rdi),%rsp /* restore rsp */ movq 16(%rdi),%rbp /* restore rbp */ movq 24(%rdi),%r12 /* restore r12 */ movq 32(%rdi),%r13 /* restore r13 */ movq 40(%rdi),%r14 /* restore r14 */ movq 48(%rdi),%r15 /* restore r15 */ movq 56(%rdi),%rdx /* get rta */ movq %rdx,0(%rsp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx rdmsr /* Read MSR pointed by %ecx. Returns hi byte in edx, lo in %eax */ salq $32,%rdx /* sign-shift %rdx left */ movl %eax,%eax /* zero-extend %eax -> %rax */ orq %rdx,%rax movq %rax,(%rsi) xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * Support for writing MSRs in the safe manner. */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx movl %esi,%eax sarq $32,%rsi movl %esi,%edx wrmsr /* Write MSR pointed by %ecx. Accepts hi byte in edx, lo in %eax. */ xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movq $0,PCB_ONFAULT(%r8) movl $EFAULT,%eax POP_FRAME_POINTER ret Index: user/alc/PQ_LAUNDRY/sys/arm/arm/gic.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm/arm/gic.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/arm/arm/gic.c (revision 303642) @@ -1,1806 +1,1546 @@ /*- * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * Developed by Damjan Marion * * Based on OMAP4 GIC code by Ben Gray * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INTRNG #include #endif #include #include #include #include #include #include -#include -#include #include +#include + #ifdef INTRNG #include "pic_if.h" #include "msi_if.h" #endif -#define GIC_DEBUG_SPURIOUS - /* We are using GICv2 register naming */ /* Distributor Registers */ #define GICD_CTLR 0x000 /* v1 ICDDCR */ #define GICD_TYPER 0x004 /* v1 ICDICTR */ #define GICD_IIDR 0x008 /* v1 ICDIIDR */ #define GICD_IGROUPR(n) (0x0080 + ((n) * 4)) /* v1 ICDISER */ #define GICD_ISENABLER(n) (0x0100 + ((n) * 4)) /* v1 ICDISER */ #define GICD_ICENABLER(n) (0x0180 + ((n) * 4)) /* v1 ICDICER */ #define GICD_ISPENDR(n) (0x0200 + ((n) * 4)) /* v1 ICDISPR */ #define GICD_ICPENDR(n) (0x0280 + ((n) * 4)) /* v1 ICDICPR */ #define GICD_ICACTIVER(n) (0x0380 + ((n) * 4)) /* v1 ICDABR */ #define GICD_IPRIORITYR(n) (0x0400 + ((n) * 4)) /* v1 ICDIPR */ #define GICD_ITARGETSR(n) (0x0800 + ((n) * 4)) /* v1 ICDIPTR */ #define GICD_ICFGR(n) (0x0C00 + ((n) * 4)) /* v1 ICDICFR */ #define GICD_SGIR(n) (0x0F00 + ((n) * 4)) /* v1 ICDSGIR */ #define GICD_SGI_TARGET_SHIFT 16 /* CPU Registers */ #define GICC_CTLR 0x0000 /* v1 ICCICR */ #define GICC_PMR 0x0004 /* v1 ICCPMR */ #define GICC_BPR 0x0008 /* v1 ICCBPR */ #define GICC_IAR 0x000C /* v1 ICCIAR */ #define GICC_EOIR 0x0010 /* v1 ICCEOIR */ #define GICC_RPR 0x0014 /* v1 ICCRPR */ #define GICC_HPPIR 0x0018 /* v1 ICCHPIR */ #define GICC_ABPR 0x001C /* v1 ICCABPR */ #define GICC_IIDR 0x00FC /* v1 ICCIIDR*/ -#define GIC_FIRST_SGI 0 /* Irqs 0-15 are SGIs/IPIs. */ -#define GIC_LAST_SGI 15 -#define GIC_FIRST_PPI 16 /* Irqs 16-31 are private (per */ -#define GIC_LAST_PPI 31 /* core) peripheral interrupts. */ -#define GIC_FIRST_SPI 32 /* Irqs 32+ are shared peripherals. */ - /* TYPER Registers */ #define GICD_TYPER_SECURITYEXT 0x400 #define GIC_SUPPORT_SECEXT(_sc) \ ((_sc->typer & GICD_TYPER_SECURITYEXT) == GICD_TYPER_SECURITYEXT) /* First bit is a polarity bit (0 - low, 1 - high) */ #define GICD_ICFGR_POL_LOW (0 << 0) #define GICD_ICFGR_POL_HIGH (1 << 0) #define GICD_ICFGR_POL_MASK 0x1 /* Second bit is a trigger bit (0 - level, 1 - edge) */ #define GICD_ICFGR_TRIG_LVL (0 << 1) #define GICD_ICFGR_TRIG_EDGE (1 << 1) #define GICD_ICFGR_TRIG_MASK 0x2 #ifndef GIC_DEFAULT_ICFGR_INIT #define GIC_DEFAULT_ICFGR_INIT 0x00000000 #endif #ifdef INTRNG struct gic_irqsrc { struct intr_irqsrc gi_isrc; uint32_t gi_irq; enum intr_polarity gi_pol; enum intr_trigger gi_trig; #define GI_FLAG_EARLY_EOI (1 << 0) #define GI_FLAG_MSI (1 << 1) /* This interrupt source should only */ /* be used for MSI/MSI-X interrupts */ #define GI_FLAG_MSI_USED (1 << 2) /* This irq is already allocated */ /* for a MSI/MSI-X interrupt */ u_int gi_flags; }; static u_int gic_irq_cpu; -static int arm_gic_intr(void *); static int arm_gic_bind_intr(device_t dev, struct intr_irqsrc *isrc); #ifdef SMP static u_int sgi_to_ipi[GIC_LAST_SGI - GIC_FIRST_SGI + 1]; static u_int sgi_first_unused = GIC_FIRST_SGI; #endif -#endif -#ifdef INTRNG -struct arm_gic_range { - uint64_t bus; - uint64_t host; - uint64_t size; +#define GIC_INTR_ISRC(sc, irq) (&sc->gic_irqs[irq].gi_isrc) +#else /* !INTRNG */ +static struct ofw_compat_data compat_data[] = { + {"arm,gic", true}, /* Non-standard, used in FreeBSD dts. */ + {"arm,gic-400", true}, + {"arm,cortex-a15-gic", true}, + {"arm,cortex-a9-gic", true}, + {"arm,cortex-a7-gic", true}, + {"arm,arm11mp-gic", true}, + {"brcm,brahma-b15-gic", true}, + {"qcom,msm-qgic2", true}, + {NULL, false} }; - -struct arm_gic_devinfo { - struct ofw_bus_devinfo obdinfo; - struct resource_list rl; -}; #endif -struct arm_gic_softc { - device_t gic_dev; -#ifdef INTRNG - void * gic_intrhand; - struct gic_irqsrc * gic_irqs; -#endif - struct resource * gic_res[3]; - bus_space_tag_t gic_c_bst; - bus_space_tag_t gic_d_bst; - bus_space_handle_t gic_c_bsh; - bus_space_handle_t gic_d_bsh; - uint8_t ver; - struct mtx mutex; - uint32_t nirqs; - uint32_t typer; -#ifdef GIC_DEBUG_SPURIOUS - uint32_t last_irq[MAXCPU]; -#endif - -#ifdef INTRNG - /* FDT child data */ - pcell_t addr_cells; - pcell_t size_cells; - int nranges; - struct arm_gic_range * ranges; -#endif -}; - -#ifdef INTRNG -#define GIC_INTR_ISRC(sc, irq) (&sc->gic_irqs[irq].gi_isrc) -#endif - static struct resource_spec arm_gic_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, /* Distributor registers */ { SYS_RES_MEMORY, 1, RF_ACTIVE }, /* CPU Interrupt Intf. registers */ #ifdef INTRNG { SYS_RES_IRQ, 0, RF_ACTIVE | RF_OPTIONAL }, /* Parent interrupt */ #endif { -1, 0 } }; static u_int arm_gic_map[MAXCPU]; static struct arm_gic_softc *gic_sc = NULL; #define gic_c_read_4(_sc, _reg) \ bus_space_read_4((_sc)->gic_c_bst, (_sc)->gic_c_bsh, (_reg)) #define gic_c_write_4(_sc, _reg, _val) \ bus_space_write_4((_sc)->gic_c_bst, (_sc)->gic_c_bsh, (_reg), (_val)) #define gic_d_read_4(_sc, _reg) \ bus_space_read_4((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg)) #define gic_d_write_1(_sc, _reg, _val) \ bus_space_write_1((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg), (_val)) #define gic_d_write_4(_sc, _reg, _val) \ bus_space_write_4((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg), (_val)) #ifndef INTRNG static int gic_config_irq(int irq, enum intr_trigger trig, enum intr_polarity pol); static void gic_post_filter(void *); #endif -static struct ofw_compat_data compat_data[] = { - {"arm,gic", true}, /* Non-standard, used in FreeBSD dts. */ - {"arm,gic-400", true}, - {"arm,cortex-a15-gic", true}, - {"arm,cortex-a9-gic", true}, - {"arm,cortex-a7-gic", true}, - {"arm,arm11mp-gic", true}, - {"brcm,brahma-b15-gic", true}, - {"qcom,msm-qgic2", true}, - {NULL, false} -}; - -static int -arm_gic_probe(device_t dev) -{ - - if (!ofw_bus_status_okay(dev)) - return (ENXIO); - - if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data) - return (ENXIO); - device_set_desc(dev, "ARM Generic Interrupt Controller"); - return (BUS_PROBE_DEFAULT); -} - #ifdef INTRNG static inline void gic_irq_unmask(struct arm_gic_softc *sc, u_int irq) { gic_d_write_4(sc, GICD_ISENABLER(irq >> 5), (1UL << (irq & 0x1F))); } static inline void gic_irq_mask(struct arm_gic_softc *sc, u_int irq) { gic_d_write_4(sc, GICD_ICENABLER(irq >> 5), (1UL << (irq & 0x1F))); } #endif static uint8_t gic_cpu_mask(struct arm_gic_softc *sc) { uint32_t mask; int i; /* Read the current cpuid mask by reading ITARGETSR{0..7} */ for (i = 0; i < 8; i++) { mask = gic_d_read_4(sc, GICD_ITARGETSR(i)); if (mask != 0) break; } /* No mask found, assume we are on CPU interface 0 */ if (mask == 0) return (1); /* Collect the mask in the lower byte */ mask |= mask >> 16; mask |= mask >> 8; return (mask); } #ifdef SMP #ifdef INTRNG static void arm_gic_init_secondary(device_t dev) { struct arm_gic_softc *sc = device_get_softc(dev); u_int irq, cpu; /* Set the mask so we can find this CPU to send it IPIs */ cpu = PCPU_GET(cpuid); arm_gic_map[cpu] = gic_cpu_mask(sc); for (irq = 0; irq < sc->nirqs; irq += 4) gic_d_write_4(sc, GICD_IPRIORITYR(irq >> 2), 0); /* Set all the interrupts to be in Group 0 (secure) */ for (irq = 0; GIC_SUPPORT_SECEXT(sc) && irq < sc->nirqs; irq += 32) { gic_d_write_4(sc, GICD_IGROUPR(irq >> 5), 0); } /* Enable CPU interface */ gic_c_write_4(sc, GICC_CTLR, 1); /* Set priority mask register. */ gic_c_write_4(sc, GICC_PMR, 0xff); /* Enable interrupt distribution */ gic_d_write_4(sc, GICD_CTLR, 0x01); /* Unmask attached SGI interrupts. */ for (irq = GIC_FIRST_SGI; irq <= GIC_LAST_SGI; irq++) if (intr_isrc_init_on_cpu(GIC_INTR_ISRC(sc, irq), cpu)) gic_irq_unmask(sc, irq); /* Unmask attached PPI interrupts. */ for (irq = GIC_FIRST_PPI; irq <= GIC_LAST_PPI; irq++) if (intr_isrc_init_on_cpu(GIC_INTR_ISRC(sc, irq), cpu)) gic_irq_unmask(sc, irq); } #else static void arm_gic_init_secondary(device_t dev) { struct arm_gic_softc *sc = device_get_softc(dev); int i; /* Set the mask so we can find this CPU to send it IPIs */ arm_gic_map[PCPU_GET(cpuid)] = gic_cpu_mask(sc); for (i = 0; i < sc->nirqs; i += 4) gic_d_write_4(sc, GICD_IPRIORITYR(i >> 2), 0); /* Set all the interrupts to be in Group 0 (secure) */ for (i = 0; GIC_SUPPORT_SECEXT(sc) && i < sc->nirqs; i += 32) { gic_d_write_4(sc, GICD_IGROUPR(i >> 5), 0); } /* Enable CPU interface */ gic_c_write_4(sc, GICC_CTLR, 1); /* Set priority mask register. */ gic_c_write_4(sc, GICC_PMR, 0xff); /* Enable interrupt distribution */ gic_d_write_4(sc, GICD_CTLR, 0x01); /* * Activate the timer interrupts: virtual, secure, and non-secure. */ gic_d_write_4(sc, GICD_ISENABLER(27 >> 5), (1UL << (27 & 0x1F))); gic_d_write_4(sc, GICD_ISENABLER(29 >> 5), (1UL << (29 & 0x1F))); gic_d_write_4(sc, GICD_ISENABLER(30 >> 5), (1UL << (30 & 0x1F))); } #endif /* INTRNG */ #endif /* SMP */ #ifndef INTRNG int gic_decode_fdt(phandle_t iparent, pcell_t *intr, int *interrupt, int *trig, int *pol) { static u_int num_intr_cells; static phandle_t self; struct ofw_compat_data *ocd; if (self == 0) { for (ocd = compat_data; ocd->ocd_str != NULL; ocd++) { if (fdt_is_compatible(iparent, ocd->ocd_str)) { self = iparent; break; } } } if (self != iparent) return (ENXIO); if (num_intr_cells == 0) { if (OF_searchencprop(OF_node_from_xref(iparent), "#interrupt-cells", &num_intr_cells, sizeof(num_intr_cells)) == -1) { num_intr_cells = 1; } } if (num_intr_cells == 1) { *interrupt = fdt32_to_cpu(intr[0]); *trig = INTR_TRIGGER_CONFORM; *pol = INTR_POLARITY_CONFORM; } else { if (fdt32_to_cpu(intr[0]) == 0) *interrupt = fdt32_to_cpu(intr[1]) + GIC_FIRST_SPI; else *interrupt = fdt32_to_cpu(intr[1]) + GIC_FIRST_PPI; /* * In intr[2], bits[3:0] are trigger type and level flags. * 1 = low-to-high edge triggered * 2 = high-to-low edge triggered * 4 = active high level-sensitive * 8 = active low level-sensitive * The hardware only supports active-high-level or rising-edge * for SPIs */ if (*interrupt >= GIC_FIRST_SPI && fdt32_to_cpu(intr[2]) & 0x0a) { printf("unsupported trigger/polarity configuration " "0x%02x\n", fdt32_to_cpu(intr[2]) & 0x0f); } *pol = INTR_POLARITY_CONFORM; if (fdt32_to_cpu(intr[2]) & 0x03) *trig = INTR_TRIGGER_EDGE; else *trig = INTR_TRIGGER_LEVEL; } return (0); } #endif #ifdef INTRNG -static inline intptr_t -gic_xref(device_t dev) -{ -#ifdef FDT - return (OF_xref_from_node(ofw_bus_get_node(dev))); -#else - return (0); -#endif -} - static int arm_gic_register_isrcs(struct arm_gic_softc *sc, uint32_t num) { int error; uint32_t irq; struct gic_irqsrc *irqs; struct intr_irqsrc *isrc; const char *name; irqs = malloc(num * sizeof(struct gic_irqsrc), M_DEVBUF, M_WAITOK | M_ZERO); name = device_get_nameunit(sc->gic_dev); for (irq = 0; irq < num; irq++) { irqs[irq].gi_irq = irq; irqs[irq].gi_pol = INTR_POLARITY_CONFORM; irqs[irq].gi_trig = INTR_TRIGGER_CONFORM; isrc = &irqs[irq].gi_isrc; if (irq <= GIC_LAST_SGI) { error = intr_isrc_register(isrc, sc->gic_dev, INTR_ISRCF_IPI, "%s,i%u", name, irq - GIC_FIRST_SGI); } else if (irq <= GIC_LAST_PPI) { error = intr_isrc_register(isrc, sc->gic_dev, INTR_ISRCF_PPI, "%s,p%u", name, irq - GIC_FIRST_PPI); } else { error = intr_isrc_register(isrc, sc->gic_dev, 0, "%s,s%u", name, irq - GIC_FIRST_SPI); } if (error != 0) { /* XXX call intr_isrc_deregister() */ free(irqs, M_DEVBUF); return (error); } } sc->gic_irqs = irqs; sc->nirqs = num; return (0); } -static int -arm_gic_fill_ranges(phandle_t node, struct arm_gic_softc *sc) -{ - pcell_t host_cells; - cell_t *base_ranges; - ssize_t nbase_ranges; - int i, j, k; - - host_cells = 1; - OF_getencprop(OF_parent(node), "#address-cells", &host_cells, - sizeof(host_cells)); - sc->addr_cells = 2; - OF_getencprop(node, "#address-cells", &sc->addr_cells, - sizeof(sc->addr_cells)); - sc->size_cells = 2; - OF_getencprop(node, "#size-cells", &sc->size_cells, - sizeof(sc->size_cells)); - - nbase_ranges = OF_getproplen(node, "ranges"); - if (nbase_ranges < 0) - return (-1); - sc->nranges = nbase_ranges / sizeof(cell_t) / - (sc->addr_cells + host_cells + sc->size_cells); - if (sc->nranges == 0) - return (0); - - sc->ranges = malloc(sc->nranges * sizeof(sc->ranges[0]), - M_DEVBUF, M_WAITOK); - base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK); - OF_getencprop(node, "ranges", base_ranges, nbase_ranges); - - for (i = 0, j = 0; i < sc->nranges; i++) { - sc->ranges[i].bus = 0; - for (k = 0; k < sc->addr_cells; k++) { - sc->ranges[i].bus <<= 32; - sc->ranges[i].bus |= base_ranges[j++]; - } - sc->ranges[i].host = 0; - for (k = 0; k < host_cells; k++) { - sc->ranges[i].host <<= 32; - sc->ranges[i].host |= base_ranges[j++]; - } - sc->ranges[i].size = 0; - for (k = 0; k < sc->size_cells; k++) { - sc->ranges[i].size <<= 32; - sc->ranges[i].size |= base_ranges[j++]; - } - } - - free(base_ranges, M_DEVBUF); - return (sc->nranges); -} - -static bool -arm_gic_add_children(device_t dev) -{ - struct arm_gic_softc *sc; - struct arm_gic_devinfo *dinfo; - phandle_t child, node; - device_t cdev; - - sc = device_get_softc(dev); - node = ofw_bus_get_node(dev); - - /* If we have no children don't probe for them */ - child = OF_child(node); - if (child == 0) - return (false); - - if (arm_gic_fill_ranges(node, sc) < 0) { - device_printf(dev, "Have a child, but no ranges\n"); - return (false); - } - - for (; child != 0; child = OF_peer(child)) { - dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO); - - if (ofw_bus_gen_setup_devinfo(&dinfo->obdinfo, child) != 0) { - free(dinfo, M_DEVBUF); - continue; - } - - resource_list_init(&dinfo->rl); - ofw_bus_reg_to_rl(dev, child, sc->addr_cells, - sc->size_cells, &dinfo->rl); - - cdev = device_add_child(dev, NULL, -1); - if (cdev == NULL) { - device_printf(dev, "<%s>: device_add_child failed\n", - dinfo->obdinfo.obd_name); - resource_list_free(&dinfo->rl); - ofw_bus_gen_destroy_devinfo(&dinfo->obdinfo); - free(dinfo, M_DEVBUF); - continue; - } - device_set_ivars(cdev, dinfo); - } - - return (true); -} - static void arm_gic_reserve_msi_range(device_t dev, u_int start, u_int count) { struct arm_gic_softc *sc; int i; sc = device_get_softc(dev); KASSERT((start + count) < sc->nirqs, ("%s: Trying to allocate too many MSI IRQs: %d + %d > %d", __func__, start, count, sc->nirqs)); for (i = 0; i < count; i++) { KASSERT(sc->gic_irqs[start + i].gi_isrc.isrc_handlers == 0, ("%s: MSI interrupt %d already has a handler", __func__, count + i)); KASSERT(sc->gic_irqs[start + i].gi_pol == INTR_POLARITY_CONFORM, ("%s: MSI interrupt %d already has a polarity", __func__, count + i)); KASSERT(sc->gic_irqs[start + i].gi_trig == INTR_TRIGGER_CONFORM, ("%s: MSI interrupt %d already has a trigger", __func__, count + i)); sc->gic_irqs[start + i].gi_pol = INTR_POLARITY_HIGH; sc->gic_irqs[start + i].gi_trig = INTR_TRIGGER_EDGE; sc->gic_irqs[start + i].gi_flags |= GI_FLAG_MSI; } } #endif -static int +int arm_gic_attach(device_t dev) { struct arm_gic_softc *sc; int i; uint32_t icciidr, mask, nirqs; -#ifdef INTRNG - phandle_t pxref; - intptr_t xref = gic_xref(dev); -#endif if (gic_sc) return (ENXIO); sc = device_get_softc(dev); if (bus_alloc_resources(dev, arm_gic_spec, sc->gic_res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } sc->gic_dev = dev; gic_sc = sc; /* Initialize mutex */ mtx_init(&sc->mutex, "GIC lock", "", MTX_SPIN); /* Distributor Interface */ sc->gic_d_bst = rman_get_bustag(sc->gic_res[0]); sc->gic_d_bsh = rman_get_bushandle(sc->gic_res[0]); /* CPU Interface */ sc->gic_c_bst = rman_get_bustag(sc->gic_res[1]); sc->gic_c_bsh = rman_get_bushandle(sc->gic_res[1]); /* Disable interrupt forwarding to the CPU interface */ gic_d_write_4(sc, GICD_CTLR, 0x00); /* Get the number of interrupts */ sc->typer = gic_d_read_4(sc, GICD_TYPER); nirqs = 32 * ((sc->typer & 0x1f) + 1); #ifdef INTRNG if (arm_gic_register_isrcs(sc, nirqs)) { device_printf(dev, "could not register irqs\n"); goto cleanup; } #else sc->nirqs = nirqs; /* Set up function pointers */ arm_post_filter = gic_post_filter; arm_config_irq = gic_config_irq; #endif icciidr = gic_c_read_4(sc, GICC_IIDR); device_printf(dev,"pn 0x%x, arch 0x%x, rev 0x%x, implementer 0x%x irqs %u\n", icciidr>>20, (icciidr>>16) & 0xF, (icciidr>>12) & 0xf, (icciidr & 0xfff), sc->nirqs); /* Set all global interrupts to be level triggered, active low. */ for (i = 32; i < sc->nirqs; i += 16) { gic_d_write_4(sc, GICD_ICFGR(i >> 4), GIC_DEFAULT_ICFGR_INIT); } /* Disable all interrupts. */ for (i = 32; i < sc->nirqs; i += 32) { gic_d_write_4(sc, GICD_ICENABLER(i >> 5), 0xFFFFFFFF); } /* Find the current cpu mask */ mask = gic_cpu_mask(sc); /* Set the mask so we can find this CPU to send it IPIs */ arm_gic_map[PCPU_GET(cpuid)] = mask; /* Set all four targets to this cpu */ mask |= mask << 8; mask |= mask << 16; for (i = 0; i < sc->nirqs; i += 4) { gic_d_write_4(sc, GICD_IPRIORITYR(i >> 2), 0); if (i > 32) { gic_d_write_4(sc, GICD_ITARGETSR(i >> 2), mask); } } /* Set all the interrupts to be in Group 0 (secure) */ for (i = 0; GIC_SUPPORT_SECEXT(sc) && i < sc->nirqs; i += 32) { gic_d_write_4(sc, GICD_IGROUPR(i >> 5), 0); } /* Enable CPU interface */ gic_c_write_4(sc, GICC_CTLR, 1); /* Set priority mask register. */ gic_c_write_4(sc, GICC_PMR, 0xff); /* Enable interrupt distribution */ gic_d_write_4(sc, GICD_CTLR, 0x01); -#ifndef INTRNG return (0); -#else - /* - * Now, when everything is initialized, it's right time to - * register interrupt controller to interrupt framefork. - */ - if (intr_pic_register(dev, xref) == NULL) { - device_printf(dev, "could not register PIC\n"); - goto cleanup; - } - /* - * Controller is root if: - * - doesn't have interrupt parent - * - his interrupt parent is this controller - */ - pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev)); - if (pxref == 0 || xref == pxref) { - if (intr_pic_claim_root(dev, xref, arm_gic_intr, sc, - GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) { - device_printf(dev, "could not set PIC as a root\n"); - intr_pic_deregister(dev, xref); - goto cleanup; - } - } else { - if (sc->gic_res[2] == NULL) { - device_printf(dev, - "not root PIC must have defined interrupt\n"); - intr_pic_deregister(dev, xref); - goto cleanup; - } - if (bus_setup_intr(dev, sc->gic_res[2], INTR_TYPE_CLK, - arm_gic_intr, NULL, sc, &sc->gic_intrhand)) { - device_printf(dev, "could not setup irq handler\n"); - intr_pic_deregister(dev, xref); - goto cleanup; - } - } +#ifdef INTRNG +cleanup: + arm_gic_detach(dev); + return(ENXIO); +#endif +} - OF_device_register_xref(xref, dev); +int +arm_gic_detach(device_t dev) +{ +#ifdef INTRNG + struct arm_gic_softc *sc; - /* If we have children probe and attach them */ - if (arm_gic_add_children(dev)) { - bus_generic_probe(dev); - return (bus_generic_attach(dev)); - } + sc = device_get_softc(dev); - return (0); - -cleanup: - /* - * XXX - not implemented arm_gic_detach() should be called ! - */ if (sc->gic_irqs != NULL) free(sc->gic_irqs, M_DEVBUF); + bus_release_resources(dev, arm_gic_spec, sc->gic_res); - return(ENXIO); #endif + + return (0); } #ifdef INTRNG +static int +arm_gic_print_child(device_t bus, device_t child) +{ + struct resource_list *rl; + int rv; + + rv = bus_print_child_header(bus, child); + + rl = BUS_GET_RESOURCE_LIST(bus, child); + if (rl != NULL) { + rv += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, + "%#jx"); + rv += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd"); + } + + rv += bus_print_child_footer(bus, child); + + return (rv); +} + static struct resource * arm_gic_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct arm_gic_softc *sc; - struct arm_gic_devinfo *di; struct resource_list_entry *rle; + struct resource_list *rl; int j; KASSERT(type == SYS_RES_MEMORY, ("Invalid resoure type %x", type)); sc = device_get_softc(bus); /* * Request for the default allocation with a given rid: use resource * list stored in the local device info. */ if (RMAN_IS_DEFAULT_RANGE(start, end)) { - if ((di = device_get_ivars(child)) == NULL) - return (NULL); + rl = BUS_GET_RESOURCE_LIST(bus, child); if (type == SYS_RES_IOPORT) type = SYS_RES_MEMORY; - rle = resource_list_find(&di->rl, type, *rid); + rle = resource_list_find(rl, type, *rid); if (rle == NULL) { if (bootverbose) device_printf(bus, "no default resources for " "rid = %d, type = %d\n", *rid, type); return (NULL); } start = rle->start; end = rle->end; count = rle->count; } /* Remap through ranges property */ for (j = 0; j < sc->nranges; j++) { if (start >= sc->ranges[j].bus && end < sc->ranges[j].bus + sc->ranges[j].size) { start -= sc->ranges[j].bus; start += sc->ranges[j].host; end -= sc->ranges[j].bus; end += sc->ranges[j].host; break; } } if (j == sc->nranges && sc->nranges != 0) { if (bootverbose) device_printf(bus, "Could not map resource " "%#jx-%#jx\n", (uintmax_t)start, (uintmax_t)end); return (NULL); } return (bus_generic_alloc_resource(bus, child, type, rid, start, end, count, flags)); } -static const struct ofw_bus_devinfo * -arm_gic_ofw_get_devinfo(device_t bus __unused, device_t child) -{ - struct arm_gic_devinfo *di; - - di = device_get_ivars(child); - - return (&di->obdinfo); -} - -static int +int arm_gic_intr(void *arg) { struct arm_gic_softc *sc = arg; struct gic_irqsrc *gi; uint32_t irq_active_reg, irq; struct trapframe *tf; irq_active_reg = gic_c_read_4(sc, GICC_IAR); irq = irq_active_reg & 0x3FF; /* * 1. We do EOI here because recent read value from active interrupt * register must be used for it. Another approach is to save this * value into associated interrupt source. * 2. EOI must be done on same CPU where interrupt has fired. Thus * we must ensure that interrupted thread does not migrate to * another CPU. * 3. EOI cannot be delayed by any preemption which could happen on * critical_exit() used in MI intr code, when interrupt thread is * scheduled. See next point. * 4. IPI_RENDEZVOUS assumes that no preemption is permitted during * an action and any use of critical_exit() could break this * assumption. See comments within smp_rendezvous_action(). * 5. We always return FILTER_HANDLED as this is an interrupt * controller dispatch function. Otherwise, in cascaded interrupt * case, the whole interrupt subtree would be masked. */ if (irq >= sc->nirqs) { #ifdef GIC_DEBUG_SPURIOUS device_printf(sc->gic_dev, "Spurious interrupt detected: last irq: %d on CPU%d\n", sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid)); #endif return (FILTER_HANDLED); } tf = curthread->td_intr_frame; dispatch_irq: gi = sc->gic_irqs + irq; /* * Note that GIC_FIRST_SGI is zero and is not used in 'if' statement * as compiler complains that comparing u_int >= 0 is always true. */ if (irq <= GIC_LAST_SGI) { #ifdef SMP /* Call EOI for all IPI before dispatch. */ gic_c_write_4(sc, GICC_EOIR, irq_active_reg); intr_ipi_dispatch(sgi_to_ipi[gi->gi_irq], tf); goto next_irq; #else device_printf(sc->gic_dev, "SGI %u on UP system detected\n", irq - GIC_FIRST_SGI); gic_c_write_4(sc, GICC_EOIR, irq_active_reg); goto next_irq; #endif } #ifdef GIC_DEBUG_SPURIOUS sc->last_irq[PCPU_GET(cpuid)] = irq; #endif if ((gi->gi_flags & GI_FLAG_EARLY_EOI) == GI_FLAG_EARLY_EOI) gic_c_write_4(sc, GICC_EOIR, irq_active_reg); if (intr_isrc_dispatch(&gi->gi_isrc, tf) != 0) { gic_irq_mask(sc, irq); if ((gi->gi_flags & GI_FLAG_EARLY_EOI) != GI_FLAG_EARLY_EOI) gic_c_write_4(sc, GICC_EOIR, irq_active_reg); device_printf(sc->gic_dev, "Stray irq %u disabled\n", irq); } next_irq: arm_irq_memory_barrier(irq); irq_active_reg = gic_c_read_4(sc, GICC_IAR); irq = irq_active_reg & 0x3FF; if (irq < sc->nirqs) goto dispatch_irq; return (FILTER_HANDLED); } static void gic_config(struct arm_gic_softc *sc, u_int irq, enum intr_trigger trig, enum intr_polarity pol) { uint32_t reg; uint32_t mask; if (irq < GIC_FIRST_SPI) return; mtx_lock_spin(&sc->mutex); reg = gic_d_read_4(sc, GICD_ICFGR(irq >> 4)); mask = (reg >> 2*(irq % 16)) & 0x3; if (pol == INTR_POLARITY_LOW) { mask &= ~GICD_ICFGR_POL_MASK; mask |= GICD_ICFGR_POL_LOW; } else if (pol == INTR_POLARITY_HIGH) { mask &= ~GICD_ICFGR_POL_MASK; mask |= GICD_ICFGR_POL_HIGH; } if (trig == INTR_TRIGGER_LEVEL) { mask &= ~GICD_ICFGR_TRIG_MASK; mask |= GICD_ICFGR_TRIG_LVL; } else if (trig == INTR_TRIGGER_EDGE) { mask &= ~GICD_ICFGR_TRIG_MASK; mask |= GICD_ICFGR_TRIG_EDGE; } /* Set mask */ reg = reg & ~(0x3 << 2*(irq % 16)); reg = reg | (mask << 2*(irq % 16)); gic_d_write_4(sc, GICD_ICFGR(irq >> 4), reg); mtx_unlock_spin(&sc->mutex); } static int gic_bind(struct arm_gic_softc *sc, u_int irq, cpuset_t *cpus) { uint32_t cpu, end, mask; end = min(mp_ncpus, 8); for (cpu = end; cpu < MAXCPU; cpu++) if (CPU_ISSET(cpu, cpus)) return (EINVAL); for (mask = 0, cpu = 0; cpu < end; cpu++) if (CPU_ISSET(cpu, cpus)) mask |= arm_gic_map[cpu]; gic_d_write_1(sc, GICD_ITARGETSR(0) + irq, mask); return (0); } #ifdef FDT static int gic_map_fdt(device_t dev, u_int ncells, pcell_t *cells, u_int *irqp, enum intr_polarity *polp, enum intr_trigger *trigp) { if (ncells == 1) { *irqp = cells[0]; *polp = INTR_POLARITY_CONFORM; *trigp = INTR_TRIGGER_CONFORM; return (0); } if (ncells == 3) { u_int irq, tripol; /* * The 1st cell is the interrupt type: * 0 = SPI * 1 = PPI * The 2nd cell contains the interrupt number: * [0 - 987] for SPI * [0 - 15] for PPI * The 3rd cell is the flags, encoded as follows: * bits[3:0] trigger type and level flags * 1 = low-to-high edge triggered * 2 = high-to-low edge triggered * 4 = active high level-sensitive * 8 = active low level-sensitive * bits[15:8] PPI interrupt cpu mask * Each bit corresponds to each of the 8 possible cpus * attached to the GIC. A bit set to '1' indicated * the interrupt is wired to that CPU. */ switch (cells[0]) { case 0: irq = GIC_FIRST_SPI + cells[1]; /* SPI irq is checked later. */ break; case 1: irq = GIC_FIRST_PPI + cells[1]; if (irq > GIC_LAST_PPI) { device_printf(dev, "unsupported PPI interrupt " "number %u\n", cells[1]); return (EINVAL); } break; default: device_printf(dev, "unsupported interrupt type " "configuration %u\n", cells[0]); return (EINVAL); } tripol = cells[2] & 0xff; if (tripol & 0xf0 || (tripol & 0x0a && cells[0] == 0)) device_printf(dev, "unsupported trigger/polarity " "configuration 0x%02x\n", tripol); *irqp = irq; *polp = INTR_POLARITY_CONFORM; *trigp = tripol & 0x03 ? INTR_TRIGGER_EDGE : INTR_TRIGGER_LEVEL; return (0); } return (EINVAL); } #endif static int gic_map_intr(device_t dev, struct intr_map_data *data, u_int *irqp, enum intr_polarity *polp, enum intr_trigger *trigp) { u_int irq; enum intr_polarity pol; enum intr_trigger trig; struct arm_gic_softc *sc; #ifdef FDT struct intr_map_data_fdt *daf; #endif sc = device_get_softc(dev); switch (data->type) { #ifdef FDT case INTR_MAP_DATA_FDT: daf = (struct intr_map_data_fdt *)data; if (gic_map_fdt(dev, daf->ncells, daf->cells, &irq, &pol, &trig) != 0) return (EINVAL); KASSERT(irq >= sc->nirqs || (sc->gic_irqs[irq].gi_flags & GI_FLAG_MSI) == 0, ("%s: Attempting to map a MSI interrupt from FDT", __func__)); break; #endif default: return (ENOTSUP); } if (irq >= sc->nirqs) return (EINVAL); if (pol != INTR_POLARITY_CONFORM && pol != INTR_POLARITY_LOW && pol != INTR_POLARITY_HIGH) return (EINVAL); if (trig != INTR_TRIGGER_CONFORM && trig != INTR_TRIGGER_EDGE && trig != INTR_TRIGGER_LEVEL) return (EINVAL); *irqp = irq; if (polp != NULL) *polp = pol; if (trigp != NULL) *trigp = trig; return (0); } static int arm_gic_map_intr(device_t dev, struct intr_map_data *data, struct intr_irqsrc **isrcp) { int error; u_int irq; struct arm_gic_softc *sc; error = gic_map_intr(dev, data, &irq, NULL, NULL); if (error == 0) { sc = device_get_softc(dev); *isrcp = GIC_INTR_ISRC(sc, irq); } return (error); } static int arm_gic_setup_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { struct arm_gic_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; enum intr_trigger trig; enum intr_polarity pol; if ((gi->gi_flags & GI_FLAG_MSI) == GI_FLAG_MSI) { pol = gi->gi_pol; trig = gi->gi_trig; KASSERT(pol == INTR_POLARITY_HIGH, ("%s: MSI interrupts must be active-high", __func__)); KASSERT(trig == INTR_TRIGGER_EDGE, ("%s: MSI interrupts must be edge triggered", __func__)); } else if (data != NULL) { u_int irq; /* Get config for resource. */ if (gic_map_intr(dev, data, &irq, &pol, &trig) || gi->gi_irq != irq) return (EINVAL); } else { pol = INTR_POLARITY_CONFORM; trig = INTR_TRIGGER_CONFORM; } /* Compare config if this is not first setup. */ if (isrc->isrc_handlers != 0) { if ((pol != INTR_POLARITY_CONFORM && pol != gi->gi_pol) || (trig != INTR_TRIGGER_CONFORM && trig != gi->gi_trig)) return (EINVAL); else return (0); } /* For MSI/MSI-X we should have already configured these */ if ((gi->gi_flags & GI_FLAG_MSI) == 0) { if (pol == INTR_POLARITY_CONFORM) pol = INTR_POLARITY_LOW; /* just pick some */ if (trig == INTR_TRIGGER_CONFORM) trig = INTR_TRIGGER_EDGE; /* just pick some */ gi->gi_pol = pol; gi->gi_trig = trig; /* Edge triggered interrupts need an early EOI sent */ if (gi->gi_pol == INTR_TRIGGER_EDGE) gi->gi_flags |= GI_FLAG_EARLY_EOI; } /* * XXX - In case that per CPU interrupt is going to be enabled in time * when SMP is already started, we need some IPI call which * enables it on others CPUs. Further, it's more complicated as * pic_enable_source() and pic_disable_source() should act on * per CPU basis only. Thus, it should be solved here somehow. */ if (isrc->isrc_flags & INTR_ISRCF_PPI) CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu); gic_config(sc, gi->gi_irq, gi->gi_trig, gi->gi_pol); arm_gic_bind_intr(dev, isrc); return (0); } static int arm_gic_teardown_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; if (isrc->isrc_handlers == 0 && (gi->gi_flags & GI_FLAG_MSI) == 0) { gi->gi_pol = INTR_POLARITY_CONFORM; gi->gi_trig = INTR_TRIGGER_CONFORM; } return (0); } static void arm_gic_enable_intr(device_t dev, struct intr_irqsrc *isrc) { struct arm_gic_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; arm_irq_memory_barrier(gi->gi_irq); gic_irq_unmask(sc, gi->gi_irq); } static void arm_gic_disable_intr(device_t dev, struct intr_irqsrc *isrc) { struct arm_gic_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; gic_irq_mask(sc, gi->gi_irq); } static void arm_gic_pre_ithread(device_t dev, struct intr_irqsrc *isrc) { struct arm_gic_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; arm_gic_disable_intr(dev, isrc); gic_c_write_4(sc, GICC_EOIR, gi->gi_irq); } static void arm_gic_post_ithread(device_t dev, struct intr_irqsrc *isrc) { arm_irq_memory_barrier(0); arm_gic_enable_intr(dev, isrc); } static void arm_gic_post_filter(device_t dev, struct intr_irqsrc *isrc) { struct arm_gic_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; /* EOI for edge-triggered done earlier. */ if ((gi->gi_flags & GI_FLAG_EARLY_EOI) == GI_FLAG_EARLY_EOI) return; arm_irq_memory_barrier(0); gic_c_write_4(sc, GICC_EOIR, gi->gi_irq); } static int arm_gic_bind_intr(device_t dev, struct intr_irqsrc *isrc) { struct arm_gic_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; if (gi->gi_irq < GIC_FIRST_SPI) return (EINVAL); if (CPU_EMPTY(&isrc->isrc_cpu)) { gic_irq_cpu = intr_irq_next_cpu(gic_irq_cpu, &all_cpus); CPU_SETOF(gic_irq_cpu, &isrc->isrc_cpu); } return (gic_bind(sc, gi->gi_irq, &isrc->isrc_cpu)); } #ifdef SMP static void arm_gic_ipi_send(device_t dev, struct intr_irqsrc *isrc, cpuset_t cpus, u_int ipi) { struct arm_gic_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; uint32_t val = 0, i; for (i = 0; i < MAXCPU; i++) if (CPU_ISSET(i, &cpus)) val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT; gic_d_write_4(sc, GICD_SGIR(0), val | gi->gi_irq); } static int arm_gic_ipi_setup(device_t dev, u_int ipi, struct intr_irqsrc **isrcp) { struct intr_irqsrc *isrc; struct arm_gic_softc *sc = device_get_softc(dev); if (sgi_first_unused > GIC_LAST_SGI) return (ENOSPC); isrc = GIC_INTR_ISRC(sc, sgi_first_unused); sgi_to_ipi[sgi_first_unused++] = ipi; CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu); *isrcp = isrc; return (0); } #endif #else static int arm_gic_next_irq(struct arm_gic_softc *sc, int last_irq) { uint32_t active_irq; active_irq = gic_c_read_4(sc, GICC_IAR); /* * Immediately EOIR the SGIs, because doing so requires the other * bits (ie CPU number), not just the IRQ number, and we do not * have this information later. */ if ((active_irq & 0x3ff) <= GIC_LAST_SGI) gic_c_write_4(sc, GICC_EOIR, active_irq); active_irq &= 0x3FF; if (active_irq == 0x3FF) { if (last_irq == -1) device_printf(sc->gic_dev, "Spurious interrupt detected\n"); return -1; } return active_irq; } static int arm_gic_config(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) { struct arm_gic_softc *sc = device_get_softc(dev); uint32_t reg; uint32_t mask; /* Function is public-accessible, so validate input arguments */ if ((irq < 0) || (irq >= sc->nirqs)) goto invalid_args; if ((trig != INTR_TRIGGER_EDGE) && (trig != INTR_TRIGGER_LEVEL) && (trig != INTR_TRIGGER_CONFORM)) goto invalid_args; if ((pol != INTR_POLARITY_HIGH) && (pol != INTR_POLARITY_LOW) && (pol != INTR_POLARITY_CONFORM)) goto invalid_args; mtx_lock_spin(&sc->mutex); reg = gic_d_read_4(sc, GICD_ICFGR(irq >> 4)); mask = (reg >> 2*(irq % 16)) & 0x3; if (pol == INTR_POLARITY_LOW) { mask &= ~GICD_ICFGR_POL_MASK; mask |= GICD_ICFGR_POL_LOW; } else if (pol == INTR_POLARITY_HIGH) { mask &= ~GICD_ICFGR_POL_MASK; mask |= GICD_ICFGR_POL_HIGH; } if (trig == INTR_TRIGGER_LEVEL) { mask &= ~GICD_ICFGR_TRIG_MASK; mask |= GICD_ICFGR_TRIG_LVL; } else if (trig == INTR_TRIGGER_EDGE) { mask &= ~GICD_ICFGR_TRIG_MASK; mask |= GICD_ICFGR_TRIG_EDGE; } /* Set mask */ reg = reg & ~(0x3 << 2*(irq % 16)); reg = reg | (mask << 2*(irq % 16)); gic_d_write_4(sc, GICD_ICFGR(irq >> 4), reg); mtx_unlock_spin(&sc->mutex); return (0); invalid_args: device_printf(dev, "gic_config_irg, invalid parameters\n"); return (EINVAL); } static void arm_gic_mask(device_t dev, int irq) { struct arm_gic_softc *sc = device_get_softc(dev); gic_d_write_4(sc, GICD_ICENABLER(irq >> 5), (1UL << (irq & 0x1F))); gic_c_write_4(sc, GICC_EOIR, irq); /* XXX - not allowed */ } static void arm_gic_unmask(device_t dev, int irq) { struct arm_gic_softc *sc = device_get_softc(dev); if (irq > GIC_LAST_SGI) arm_irq_memory_barrier(irq); gic_d_write_4(sc, GICD_ISENABLER(irq >> 5), (1UL << (irq & 0x1F))); } #ifdef SMP static void arm_gic_ipi_send(device_t dev, cpuset_t cpus, u_int ipi) { struct arm_gic_softc *sc = device_get_softc(dev); uint32_t val = 0, i; for (i = 0; i < MAXCPU; i++) if (CPU_ISSET(i, &cpus)) val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT; gic_d_write_4(sc, GICD_SGIR(0), val | ipi); } static int arm_gic_ipi_read(device_t dev, int i) { if (i != -1) { /* * The intr code will automagically give the frame pointer * if the interrupt argument is 0. */ if ((unsigned int)i > 16) return (0); return (i); } return (0x3ff); } static void arm_gic_ipi_clear(device_t dev, int ipi) { /* no-op */ } #endif static void gic_post_filter(void *arg) { struct arm_gic_softc *sc = gic_sc; uintptr_t irq = (uintptr_t) arg; if (irq > GIC_LAST_SGI) arm_irq_memory_barrier(irq); gic_c_write_4(sc, GICC_EOIR, irq); } static int gic_config_irq(int irq, enum intr_trigger trig, enum intr_polarity pol) { return (arm_gic_config(gic_sc->gic_dev, irq, trig, pol)); } void arm_mask_irq(uintptr_t nb) { arm_gic_mask(gic_sc->gic_dev, nb); } void arm_unmask_irq(uintptr_t nb) { arm_gic_unmask(gic_sc->gic_dev, nb); } int arm_get_next_irq(int last_irq) { return (arm_gic_next_irq(gic_sc, last_irq)); } #ifdef SMP void intr_pic_init_secondary(void) { arm_gic_init_secondary(gic_sc->gic_dev); } void pic_ipi_send(cpuset_t cpus, u_int ipi) { arm_gic_ipi_send(gic_sc->gic_dev, cpus, ipi); } int pic_ipi_read(int i) { return (arm_gic_ipi_read(gic_sc->gic_dev, i)); } void pic_ipi_clear(int ipi) { arm_gic_ipi_clear(gic_sc->gic_dev, ipi); } #endif #endif /* INTRNG */ static device_method_t arm_gic_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, arm_gic_probe), - DEVMETHOD(device_attach, arm_gic_attach), - #ifdef INTRNG /* Bus interface */ + DEVMETHOD(bus_print_child, arm_gic_print_child), DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_alloc_resource, arm_gic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource,bus_generic_activate_resource), - /* ofw_bus interface */ - DEVMETHOD(ofw_bus_get_devinfo, arm_gic_ofw_get_devinfo), - DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), - DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), - DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), - DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), - DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), - /* Interrupt controller interface */ DEVMETHOD(pic_disable_intr, arm_gic_disable_intr), DEVMETHOD(pic_enable_intr, arm_gic_enable_intr), DEVMETHOD(pic_map_intr, arm_gic_map_intr), DEVMETHOD(pic_setup_intr, arm_gic_setup_intr), DEVMETHOD(pic_teardown_intr, arm_gic_teardown_intr), DEVMETHOD(pic_post_filter, arm_gic_post_filter), DEVMETHOD(pic_post_ithread, arm_gic_post_ithread), DEVMETHOD(pic_pre_ithread, arm_gic_pre_ithread), #ifdef SMP DEVMETHOD(pic_bind_intr, arm_gic_bind_intr), DEVMETHOD(pic_init_secondary, arm_gic_init_secondary), DEVMETHOD(pic_ipi_send, arm_gic_ipi_send), DEVMETHOD(pic_ipi_setup, arm_gic_ipi_setup), #endif #endif { 0, 0 } }; -static driver_t arm_gic_driver = { - "gic", - arm_gic_methods, - sizeof(struct arm_gic_softc), -}; +DEFINE_CLASS_0(gic, arm_gic_driver, arm_gic_methods, + sizeof(struct arm_gic_softc)); -static devclass_t arm_gic_devclass; - -EARLY_DRIVER_MODULE(gic, simplebus, arm_gic_driver, arm_gic_devclass, 0, 0, - BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); -EARLY_DRIVER_MODULE(gic, ofwbus, arm_gic_driver, arm_gic_devclass, 0, 0, - BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); - #ifdef INTRNG /* * GICv2m support -- the GICv2 MSI/MSI-X controller. */ #define GICV2M_MSI_TYPER 0x008 #define MSI_TYPER_SPI_BASE(x) (((x) >> 16) & 0x3ff) #define MSI_TYPER_SPI_COUNT(x) (((x) >> 0) & 0x3ff) #define GICv2M_MSI_SETSPI_NS 0x040 #define GICV2M_MSI_IIDR 0xFCC -struct arm_gicv2m_softc { - struct resource *sc_mem; - struct mtx sc_mutex; - u_int sc_spi_start; - u_int sc_spi_end; - u_int sc_spi_count; -}; - -static struct ofw_compat_data gicv2m_compat_data[] = { - {"arm,gic-v2m-frame", true}, - {NULL, false} -}; - -static int -arm_gicv2m_probe(device_t dev) -{ - - if (!ofw_bus_status_okay(dev)) - return (ENXIO); - - if (!ofw_bus_search_compatible(dev, gicv2m_compat_data)->ocd_data) - return (ENXIO); - - device_set_desc(dev, "ARM Generic Interrupt Controller MSI/MSIX"); - return (BUS_PROBE_DEFAULT); -} - -static int +int arm_gicv2m_attach(device_t dev) { struct arm_gicv2m_softc *sc; struct arm_gic_softc *psc; uint32_t typer; int rid; psc = device_get_softc(device_get_parent(dev)); sc = device_get_softc(dev); rid = 0; sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_mem == NULL) { device_printf(dev, "Unable to allocate resources\n"); return (ENXIO); } typer = bus_read_4(sc->sc_mem, GICV2M_MSI_TYPER); sc->sc_spi_start = MSI_TYPER_SPI_BASE(typer); sc->sc_spi_count = MSI_TYPER_SPI_COUNT(typer); sc->sc_spi_end = sc->sc_spi_start + sc->sc_spi_count; /* Reserve these interrupts for MSI/MSI-X use */ arm_gic_reserve_msi_range(device_get_parent(dev), sc->sc_spi_start, sc->sc_spi_count); mtx_init(&sc->sc_mutex, "GICv2m lock", "", MTX_DEF); - intr_msi_register(dev, gic_xref(dev)); + intr_msi_register(dev, sc->sc_xref); if (bootverbose) device_printf(dev, "using spi %u to %u\n", sc->sc_spi_start, sc->sc_spi_start + sc->sc_spi_count - 1); return (0); } static int arm_gicv2m_alloc_msi(device_t dev, device_t child, int count, int maxcount, device_t *pic, struct intr_irqsrc **srcs) { struct arm_gic_softc *psc; struct arm_gicv2m_softc *sc; int i, irq, end_irq; bool found; KASSERT(powerof2(count), ("%s: bad count", __func__)); KASSERT(powerof2(maxcount), ("%s: bad maxcount", __func__)); psc = device_get_softc(device_get_parent(dev)); sc = device_get_softc(dev); mtx_lock(&sc->sc_mutex); found = false; for (irq = sc->sc_spi_start; irq < sc->sc_spi_end && !found; irq++) { /* Start on an aligned interrupt */ if ((irq & (maxcount - 1)) != 0) continue; /* Assume we found a valid range until shown otherwise */ found = true; /* Check this range is valid */ for (end_irq = irq; end_irq != irq + count - 1; end_irq++) { /* No free interrupts */ if (end_irq == sc->sc_spi_end) { found = false; break; } KASSERT((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI)!= 0, ("%s: Non-MSI interrupt found", __func__)); /* This is already used */ if ((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED) { found = false; break; } } } /* Not enough interrupts were found */ if (!found || irq == sc->sc_spi_end) { mtx_unlock(&sc->sc_mutex); return (ENXIO); } for (i = 0; i < count; i++) { /* Mark the interrupt as used */ psc->gic_irqs[irq + i].gi_flags |= GI_FLAG_MSI_USED; } mtx_unlock(&sc->sc_mutex); for (i = 0; i < count; i++) srcs[i] = (struct intr_irqsrc *)&psc->gic_irqs[irq + i]; *pic = device_get_parent(dev); return (0); } static int arm_gicv2m_release_msi(device_t dev, device_t child, int count, struct intr_irqsrc **isrc) { struct arm_gicv2m_softc *sc; struct gic_irqsrc *gi; int i; sc = device_get_softc(dev); mtx_lock(&sc->sc_mutex); for (i = 0; i < count; i++) { gi = (struct gic_irqsrc *)isrc; KASSERT((gi->gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED, ("%s: Trying to release an unused MSI-X interrupt", __func__)); gi->gi_flags &= ~GI_FLAG_MSI_USED; mtx_unlock(&sc->sc_mutex); } return (0); } static int arm_gicv2m_alloc_msix(device_t dev, device_t child, device_t *pic, struct intr_irqsrc **isrcp) { struct arm_gicv2m_softc *sc; struct arm_gic_softc *psc; int irq; psc = device_get_softc(device_get_parent(dev)); sc = device_get_softc(dev); mtx_lock(&sc->sc_mutex); /* Find an unused interrupt */ for (irq = sc->sc_spi_start; irq < sc->sc_spi_end; irq++) { KASSERT((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI) != 0, ("%s: Non-MSI interrupt found", __func__)); if ((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI_USED) == 0) break; } /* No free interrupt was found */ if (irq == sc->sc_spi_end) { mtx_unlock(&sc->sc_mutex); return (ENXIO); } /* Mark the interrupt as used */ psc->gic_irqs[irq].gi_flags |= GI_FLAG_MSI_USED; mtx_unlock(&sc->sc_mutex); *isrcp = (struct intr_irqsrc *)&psc->gic_irqs[irq]; *pic = device_get_parent(dev); return (0); } static int arm_gicv2m_release_msix(device_t dev, device_t child, struct intr_irqsrc *isrc) { struct arm_gicv2m_softc *sc; struct gic_irqsrc *gi; sc = device_get_softc(dev); gi = (struct gic_irqsrc *)isrc; KASSERT((gi->gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED, ("%s: Trying to release an unused MSI-X interrupt", __func__)); mtx_lock(&sc->sc_mutex); gi->gi_flags &= ~GI_FLAG_MSI_USED; mtx_unlock(&sc->sc_mutex); return (0); } static int arm_gicv2m_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc, uint64_t *addr, uint32_t *data) { struct arm_gicv2m_softc *sc = device_get_softc(dev); struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc; *addr = vtophys(rman_get_virtual(sc->sc_mem)) + GICv2M_MSI_SETSPI_NS; *data = gi->gi_irq; return (0); } static device_method_t arm_gicv2m_methods[] = { /* Device interface */ - DEVMETHOD(device_probe, arm_gicv2m_probe), DEVMETHOD(device_attach, arm_gicv2m_attach), /* MSI/MSI-X */ DEVMETHOD(msi_alloc_msi, arm_gicv2m_alloc_msi), DEVMETHOD(msi_release_msi, arm_gicv2m_release_msi), DEVMETHOD(msi_alloc_msix, arm_gicv2m_alloc_msix), DEVMETHOD(msi_release_msix, arm_gicv2m_release_msix), DEVMETHOD(msi_map_msi, arm_gicv2m_map_msi), /* End */ DEVMETHOD_END }; DEFINE_CLASS_0(gicv2m, arm_gicv2m_driver, arm_gicv2m_methods, sizeof(struct arm_gicv2m_softc)); - -static devclass_t arm_gicv2m_devclass; - -EARLY_DRIVER_MODULE(gicv2m, gic, arm_gicv2m_driver, - arm_gicv2m_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); #endif Index: user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h (nonexistent) +++ user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h (revision 303642) @@ -0,0 +1,106 @@ +/*- + * Copyright (c) 2011,2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Developed by Damjan Marion + * + * Based on OMAP4 GIC code by Ben Gray + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company nor the name of the author may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _ARM_GIC_H_ +#define _ARM_GIC_H_ + +#define GIC_DEBUG_SPURIOUS + +#define GIC_FIRST_SGI 0 /* Irqs 0-15 are SGIs/IPIs. */ +#define GIC_LAST_SGI 15 +#define GIC_FIRST_PPI 16 /* Irqs 16-31 are private (per */ +#define GIC_LAST_PPI 31 /* core) peripheral interrupts. */ +#define GIC_FIRST_SPI 32 /* Irqs 32+ are shared peripherals. */ + +#ifdef INTRNG +struct arm_gic_range { + uint64_t bus; + uint64_t host; + uint64_t size; +}; +#endif + +struct arm_gic_softc { + device_t gic_dev; +#ifdef INTRNG + void * gic_intrhand; + struct gic_irqsrc * gic_irqs; +#endif + struct resource * gic_res[3]; + bus_space_tag_t gic_c_bst; + bus_space_tag_t gic_d_bst; + bus_space_handle_t gic_c_bsh; + bus_space_handle_t gic_d_bsh; + uint8_t ver; + struct mtx mutex; + uint32_t nirqs; + uint32_t typer; +#ifdef GIC_DEBUG_SPURIOUS + uint32_t last_irq[MAXCPU]; +#endif + +#ifdef INTRNG + /* FDT child data */ + pcell_t addr_cells; + pcell_t size_cells; + int nranges; + struct arm_gic_range * ranges; +#endif +}; + +DECLARE_CLASS(arm_gic_driver); + +#ifdef INTRNG +struct arm_gicv2m_softc { + struct resource *sc_mem; + struct mtx sc_mutex; + uintptr_t sc_xref; + u_int sc_spi_start; + u_int sc_spi_end; + u_int sc_spi_count; +}; + +DECLARE_CLASS(arm_gicv2m_driver); +#endif + +int arm_gic_attach(device_t); +int arm_gic_detach(device_t); +int arm_gicv2m_attach(device_t); +int arm_gic_intr(void *); + +#endif /* _ARM_GIC_H_ */ Property changes on: user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c (nonexistent) +++ user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c (revision 303642) @@ -0,0 +1,369 @@ +/*- + * Copyright (c) 2011,2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Developed by Damjan Marion + * + * Based on OMAP4 GIC code by Ben Gray + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company nor the name of the author may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_platform.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#ifdef INTRNG +struct arm_gic_devinfo { + struct ofw_bus_devinfo obdinfo; + struct resource_list rl; +}; +#endif + +static device_probe_t gic_fdt_probe; +static device_attach_t gic_fdt_attach; +static ofw_bus_get_devinfo_t gic_ofw_get_devinfo; +#ifdef INTRNG +static bus_get_resource_list_t gic_fdt_get_resource_list; +static bool arm_gic_add_children(device_t); +#endif + +static struct ofw_compat_data compat_data[] = { + {"arm,gic", true}, /* Non-standard, used in FreeBSD dts. */ + {"arm,gic-400", true}, + {"arm,cortex-a15-gic", true}, + {"arm,cortex-a9-gic", true}, + {"arm,cortex-a7-gic", true}, + {"arm,arm11mp-gic", true}, + {"brcm,brahma-b15-gic", true}, + {"qcom,msm-qgic2", true}, + {NULL, false} +}; + +static device_method_t gic_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, gic_fdt_probe), + DEVMETHOD(device_attach, gic_fdt_attach), + +#ifdef INTRNG + /* Bus interface */ + DEVMETHOD(bus_get_resource_list,gic_fdt_get_resource_list), + + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_devinfo, gic_ofw_get_devinfo), + DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), + DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), + DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), + DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), + DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), +#endif + + DEVMETHOD_END, +}; + +DEFINE_CLASS_1(gic, gic_fdt_driver, gic_fdt_methods, + sizeof(struct arm_gic_softc), arm_gic_driver); + +static devclass_t gic_fdt_devclass; + +EARLY_DRIVER_MODULE(gic, simplebus, gic_fdt_driver, gic_fdt_devclass, 0, 0, + BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); +EARLY_DRIVER_MODULE(gic, ofwbus, gic_fdt_driver, gic_fdt_devclass, 0, 0, + BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); + +static int +gic_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data) + return (ENXIO); + device_set_desc(dev, "ARM Generic Interrupt Controller"); + return (BUS_PROBE_DEFAULT); +} + +static int +gic_fdt_attach(device_t dev) +{ +#ifdef INTRNG + struct arm_gic_softc *sc = device_get_softc(dev); + phandle_t pxref; + intptr_t xref; +#endif + int err; + + err = arm_gic_attach(dev); + if (err != 0) + return (err); + +#ifdef INTRNG + xref = OF_xref_from_node(ofw_bus_get_node(dev)); + + /* + * Now, when everything is initialized, it's right time to + * register interrupt controller to interrupt framefork. + */ + if (intr_pic_register(dev, xref) == NULL) { + device_printf(dev, "could not register PIC\n"); + goto cleanup; + } + + /* + * Controller is root if: + * - doesn't have interrupt parent + * - his interrupt parent is this controller + */ + pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev)); + if (pxref == 0 || xref == pxref) { + if (intr_pic_claim_root(dev, xref, arm_gic_intr, sc, + GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) { + device_printf(dev, "could not set PIC as a root\n"); + intr_pic_deregister(dev, xref); + goto cleanup; + } + } else { + if (sc->gic_res[2] == NULL) { + device_printf(dev, + "not root PIC must have defined interrupt\n"); + intr_pic_deregister(dev, xref); + goto cleanup; + } + if (bus_setup_intr(dev, sc->gic_res[2], INTR_TYPE_CLK, + arm_gic_intr, NULL, sc, &sc->gic_intrhand)) { + device_printf(dev, "could not setup irq handler\n"); + intr_pic_deregister(dev, xref); + goto cleanup; + } + } + + OF_device_register_xref(xref, dev); + + /* If we have children probe and attach them */ + if (arm_gic_add_children(dev)) { + bus_generic_probe(dev); + return (bus_generic_attach(dev)); + } +#endif + + return (0); + +#ifdef INTRNG +cleanup: + arm_gic_detach(dev); + return(ENXIO); +#endif +} + +#ifdef INTRNG +static struct resource_list * +gic_fdt_get_resource_list(device_t bus, device_t child) +{ + struct arm_gic_devinfo *di; + + di = device_get_ivars(child); + KASSERT(di != NULL, ("gic_fdt_get_resource_list: No devinfo")); + + return (&di->rl); +} + +static int +arm_gic_fill_ranges(phandle_t node, struct arm_gic_softc *sc) +{ + pcell_t host_cells; + cell_t *base_ranges; + ssize_t nbase_ranges; + int i, j, k; + + host_cells = 1; + OF_getencprop(OF_parent(node), "#address-cells", &host_cells, + sizeof(host_cells)); + sc->addr_cells = 2; + OF_getencprop(node, "#address-cells", &sc->addr_cells, + sizeof(sc->addr_cells)); + sc->size_cells = 2; + OF_getencprop(node, "#size-cells", &sc->size_cells, + sizeof(sc->size_cells)); + + nbase_ranges = OF_getproplen(node, "ranges"); + if (nbase_ranges < 0) + return (-1); + sc->nranges = nbase_ranges / sizeof(cell_t) / + (sc->addr_cells + host_cells + sc->size_cells); + if (sc->nranges == 0) + return (0); + + sc->ranges = malloc(sc->nranges * sizeof(sc->ranges[0]), + M_DEVBUF, M_WAITOK); + base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK); + OF_getencprop(node, "ranges", base_ranges, nbase_ranges); + + for (i = 0, j = 0; i < sc->nranges; i++) { + sc->ranges[i].bus = 0; + for (k = 0; k < sc->addr_cells; k++) { + sc->ranges[i].bus <<= 32; + sc->ranges[i].bus |= base_ranges[j++]; + } + sc->ranges[i].host = 0; + for (k = 0; k < host_cells; k++) { + sc->ranges[i].host <<= 32; + sc->ranges[i].host |= base_ranges[j++]; + } + sc->ranges[i].size = 0; + for (k = 0; k < sc->size_cells; k++) { + sc->ranges[i].size <<= 32; + sc->ranges[i].size |= base_ranges[j++]; + } + } + + free(base_ranges, M_DEVBUF); + return (sc->nranges); +} + +static bool +arm_gic_add_children(device_t dev) +{ + struct arm_gic_softc *sc; + struct arm_gic_devinfo *dinfo; + phandle_t child, node; + device_t cdev; + + sc = device_get_softc(dev); + node = ofw_bus_get_node(dev); + + /* If we have no children don't probe for them */ + child = OF_child(node); + if (child == 0) + return (false); + + if (arm_gic_fill_ranges(node, sc) < 0) { + device_printf(dev, "Have a child, but no ranges\n"); + return (false); + } + + for (; child != 0; child = OF_peer(child)) { + dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO); + + if (ofw_bus_gen_setup_devinfo(&dinfo->obdinfo, child) != 0) { + free(dinfo, M_DEVBUF); + continue; + } + + resource_list_init(&dinfo->rl); + ofw_bus_reg_to_rl(dev, child, sc->addr_cells, + sc->size_cells, &dinfo->rl); + + cdev = device_add_child(dev, NULL, -1); + if (cdev == NULL) { + device_printf(dev, "<%s>: device_add_child failed\n", + dinfo->obdinfo.obd_name); + resource_list_free(&dinfo->rl); + ofw_bus_gen_destroy_devinfo(&dinfo->obdinfo); + free(dinfo, M_DEVBUF); + continue; + } + device_set_ivars(cdev, dinfo); + } + + return (true); +} + +static const struct ofw_bus_devinfo * +gic_ofw_get_devinfo(device_t bus __unused, device_t child) +{ + struct arm_gic_devinfo *di; + + di = device_get_ivars(child); + + return (&di->obdinfo); +} + +static struct ofw_compat_data gicv2m_compat_data[] = { + {"arm,gic-v2m-frame", true}, + {NULL, false} +}; + +static int +arm_gicv2m_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_search_compatible(dev, gicv2m_compat_data)->ocd_data) + return (ENXIO); + + device_set_desc(dev, "ARM Generic Interrupt Controller MSI/MSIX"); + return (BUS_PROBE_DEFAULT); +} + +static int +arm_gicv2m_fdt_attach(device_t dev) +{ + struct arm_gicv2m_softc *sc; + + sc = device_get_softc(dev); + sc->sc_xref = OF_xref_from_node(ofw_bus_get_node(dev)); + + return (arm_gicv2m_attach(dev)); +} + +static device_method_t arm_gicv2m_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, arm_gicv2m_fdt_probe), + DEVMETHOD(device_attach, arm_gicv2m_fdt_attach), + + /* End */ + DEVMETHOD_END +}; + +DEFINE_CLASS_1(gicv2m, arm_gicv2m_fdt_driver, arm_gicv2m_fdt_methods, + sizeof(struct arm_gicv2m_softc), arm_gicv2m_driver); + +static devclass_t arm_gicv2m_fdt_devclass; + +EARLY_DRIVER_MODULE(gicv2m, gic, arm_gicv2m_fdt_driver, + arm_gicv2m_fdt_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); +#endif Property changes on: user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/machdep.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm64/arm64/machdep.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/machdep.c (revision 303642) @@ -1,1036 +1,1107 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ +#include "opt_acpi.h" #include "opt_platform.h" #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif +#ifdef DEV_ACPI +#include +#include +#endif + #ifdef FDT #include #include #endif + +enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; + struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ /* pagezero_* implementations are provided in support.S */ void pagezero_simple(void *); void pagezero_cache(void *); /* pagezero_simple is default pagezero */ void (*pagezero)(void *p) = pagezero_simple; static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sp = frame->tf_sp; regs->lr = frame->tf_lr; regs->elr = frame->tf_elr; regs->spsr = frame->tf_spsr; memcpy(regs->x, frame->tf_x, sizeof(regs->x)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sp = regs->sp; frame->tf_lr = regs->lr; frame->tf_elr = regs->elr; frame->tf_spsr = regs->spsr; memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, pcb); memcpy(regs->fp_q, pcb->pcb_vfp, sizeof(regs->fp_q)); regs->fp_cr = pcb->pcb_fpcr; regs->fp_sr = pcb->pcb_fpsr; } else #endif memset(regs->fp_q, 0, sizeof(regs->fp_q)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; memcpy(pcb->pcb_vfp, regs->fp_q, sizeof(regs->fp_q)); pcb->pcb_fpcr = regs->fp_cr; pcb->pcb_fpsr = regs->fp_sr; #endif return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { panic("ARM64TODO: ptrace_set_pc"); return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_spsr |= PSR_SS; td->td_pcb->pcb_flags |= PCB_SINGLE_STEP; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_spsr &= ~PSR_SS; td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = td->td_retval[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_lr = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct gpregs *)0)->gp_x); CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct reg *)0)->x); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_x[0] = 0; mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C; } else { mcp->mc_gpregs.gp_x[0] = tf->tf_x[0]; mcp->mc_gpregs.gp_spsr = tf->tf_spsr; } memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1], sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1)); mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_lr = tf->tf_lr; mcp->mc_gpregs.gp_elr = tf->tf_elr; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x)); tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_lr = mcp->mc_gpregs.gp_lr; tf->tf_elr = mcp->mc_gpregs.gp_elr; tf->tf_spsr = mcp->mc_gpregs.gp_spsr; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); curpcb = curthread->td_pcb; if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, curpcb); memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_vfp, sizeof(mcp->mc_fpregs)); mcp->mc_fpregs.fp_cr = curpcb->pcb_fpcr; mcp->mc_fpregs.fp_sr = curpcb->pcb_fpsr; mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; mcp->mc_flags |= _MC_FP_VALID; } critical_exit(); #endif } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); if ((mcp->mc_flags & _MC_FP_VALID) != 0) { curpcb = curthread->td_pcb; /* * Discard any vfp state for the current thread, we * are about to override it. */ vfp_discard(td); memcpy(curpcb->pcb_vfp, mcp->mc_fpregs.fp_q, sizeof(mcp->mc_fpregs)); curpcb->pcb_fpcr = mcp->mc_fpregs.fp_cr; curpcb->pcb_fpsr = mcp->mc_fpregs.fp_sr; curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags; } critical_exit(); #endif } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "dsb sy \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { /* We should have shutdown by now, if not enter a low power sleep */ intr_disable(); while (1) { __asm __volatile("wfi"); } } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* ARM64TODO TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("ARM64TODO: cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; register_t daif; td = curthread; if (td->td_md.md_spinlock_count == 0) { daif = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = daif; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t daif; td = curthread; critical_exit(); daif = td->td_md.md_saved_daif; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(daif); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; uint32_t spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); spsr = uc.uc_mcontext.mc_gpregs.gp_spsr; if ((spsr & PSR_M_MASK) != PSR_M_EL0t || (spsr & (PSR_F | PSR_I | PSR_A | PSR_D)) != 0) return (EINVAL); set_mcontext(td, &uc.uc_mcontext); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { int i; for (i = 0; i < PCB_LR; i++) pcb->pcb_x[i] = tf->tf_x[i]; pcb->pcb_x[PCB_LR] = tf->tf_lr; pcb->pcb_pc = tf->tf_elr; pcb->pcb_sp = tf->tf_sp; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int code, onstack, sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_x[0]= sig; tf->tf_x[1] = (register_t)&fp->sf_si; tf->tf_x[2] = (register_t)&fp->sf_uc; tf->tf_elr = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { struct pcpu *pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } typedef struct { uint32_t type; uint64_t phys_start; uint64_t virt_start; uint64_t num_pages; uint64_t attr; } EFI_MEMORY_DESCRIPTOR; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } #ifdef FDT static void add_fdt_mem_regions(struct mem_region *mr, int mrcnt, vm_paddr_t *physmap, u_int *physmap_idxp) { for (int i = 0; i < mrcnt; i++) { if (!add_physmap_entry(mr[i].mr_start, mr[i].mr_size, physmap, physmap_idxp)) break; } } #endif #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, u_int *physmap_idxp) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode", "PersistentMemory" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type < nitems(types)) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_NV) printf("NV "); if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) printf("MORE_RELIABLE "); if (p->md_attr & EFI_MD_ATTR_RO) printf("RO "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idxp)) break; } } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif +static bool +bus_probe(void) +{ + bool has_acpi, has_fdt; + char *order, *env; + + has_acpi = has_fdt = false; + +#ifdef FDT + has_fdt = (OF_peer(0) != 0); +#endif +#ifdef DEV_ACPI + has_acpi = (acpi_find_table(ACPI_SIG_SPCR) != 0); +#endif + + env = kern_getenv("kern.cfg.order"); + if (env != NULL) { + order = env; + while (order != NULL) { + if (has_acpi && + strncmp(order, "acpi", 4) == 0 && + (order[4] == ',' || order[4] == '\0')) { + arm64_bus_method = ARM64_BUS_ACPI; + break; + } + if (has_fdt && + strncmp(order, "fdt", 3) == 0 && + (order[3] == ',' || order[3] == '\0')) { + arm64_bus_method = ARM64_BUS_FDT; + break; + } + order = strchr(order, ','); + } + freeenv(env); + + /* If we set the bus method it is valid */ + if (arm64_bus_method != ARM64_BUS_NONE) + return (true); + } + /* If no order or an invalid order was set use the default */ + if (arm64_bus_method == ARM64_BUS_NONE) { + if (has_fdt) + arm64_bus_method = ARM64_BUS_FDT; + else if (has_acpi) + arm64_bus_method = ARM64_BUS_ACPI; + } + + /* + * If no option was set the default is valid, otherwise we are + * setting one to get cninit() working, then calling panic to tell + * the user about the invalid bus setup. + */ + return (env == NULL); +} + static void cache_setup(void) { int dcache_line_shift, icache_line_shift, dczva_line_shift; uint32_t ctr_el0; uint32_t dczid_el0; ctr_el0 = READ_SPECIALREG(ctr_el0); /* Read the log2 words in each D cache line */ dcache_line_shift = CTR_DLINE_SIZE(ctr_el0); /* Get the D cache line size */ dcache_line_size = sizeof(int) << dcache_line_shift; /* And the same for the I cache */ icache_line_shift = CTR_ILINE_SIZE(ctr_el0); icache_line_size = sizeof(int) << icache_line_shift; idcache_line_size = MIN(dcache_line_size, icache_line_size); dczid_el0 = READ_SPECIALREG(dczid_el0); /* Check if dc zva is not prohibited */ if (dczid_el0 & DCZID_DZP) dczva_line_size = 0; else { /* Same as with above calculations */ dczva_line_shift = DCZID_BS_SIZE(dczid_el0); dczva_line_size = sizeof(int) << dczva_line_shift; /* Change pagezero function */ pagezero = pagezero_cache; } } void initarm(struct arm64_bootparams *abp) { struct efi_map_header *efihdr; struct pcpu *pcpup; #ifdef FDT struct mem_region mem_regions[FDT_MEM_REGIONS]; int mem_regions_sz; #endif vm_offset_t lastaddr; caddr_t kmdp; vm_paddr_t mem_len; + bool valid; int i; /* Set the module data location */ preload_metadata = (caddr_t)(uintptr_t)(abp->modulep); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0); #ifdef FDT try_load_dtb(kmdp); #endif /* Find the address to start allocating from */ lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); /* Load the physical memory ranges */ physmap_idx = 0; efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr != NULL) add_efi_map_entries(efihdr, physmap, &physmap_idx); #ifdef FDT else { /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); add_fdt_mem_regions(mem_regions, mem_regions_sz, physmap, &physmap_idx); } #endif /* Print the memory map */ mem_len = 0; for (i = 0; i < physmap_idx; i += 2) { dump_avail[i] = physmap[i]; dump_avail[i + 1] = physmap[i + 1]; mem_len += physmap[i + 1] - physmap[i]; } dump_avail[i] = 0; dump_avail[i + 1] = 0; /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, KERNBASE - abp->kern_delta, lastaddr - KERNBASE); devmap_bootstrap(0, NULL); + valid = bus_probe(); + cninit(); + + if (!valid) + panic("Invalid bus configuration: %s", + kern_getenv("kern.cfg.order")); init_proc0(abp->kern_stack); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); dbg_monitor_init(); kdb_init(); early_boot = 0; } uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, struct timecounter *); uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { return (arm_cpu_fill_vdso_timehands != NULL ? arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); } #ifdef DDB #include DB_SHOW_COMMAND(specialregs, db_show_spregs) { #define PRINT_REG(reg) \ db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) PRINT_REG(actlr_el1); PRINT_REG(afsr0_el1); PRINT_REG(afsr1_el1); PRINT_REG(aidr_el1); PRINT_REG(amair_el1); PRINT_REG(ccsidr_el1); PRINT_REG(clidr_el1); PRINT_REG(contextidr_el1); PRINT_REG(cpacr_el1); PRINT_REG(csselr_el1); PRINT_REG(ctr_el0); PRINT_REG(currentel); PRINT_REG(daif); PRINT_REG(dczid_el0); PRINT_REG(elr_el1); PRINT_REG(esr_el1); PRINT_REG(far_el1); #if 0 /* ARM64TODO: Enable VFP before reading floating-point registers */ PRINT_REG(fpcr); PRINT_REG(fpsr); #endif PRINT_REG(id_aa64afr0_el1); PRINT_REG(id_aa64afr1_el1); PRINT_REG(id_aa64dfr0_el1); PRINT_REG(id_aa64dfr1_el1); PRINT_REG(id_aa64isar0_el1); PRINT_REG(id_aa64isar1_el1); PRINT_REG(id_aa64pfr0_el1); PRINT_REG(id_aa64pfr1_el1); PRINT_REG(id_afr0_el1); PRINT_REG(id_dfr0_el1); PRINT_REG(id_isar0_el1); PRINT_REG(id_isar1_el1); PRINT_REG(id_isar2_el1); PRINT_REG(id_isar3_el1); PRINT_REG(id_isar4_el1); PRINT_REG(id_isar5_el1); PRINT_REG(id_mmfr0_el1); PRINT_REG(id_mmfr1_el1); PRINT_REG(id_mmfr2_el1); PRINT_REG(id_mmfr3_el1); #if 0 /* Missing from llvm */ PRINT_REG(id_mmfr4_el1); #endif PRINT_REG(id_pfr0_el1); PRINT_REG(id_pfr1_el1); PRINT_REG(isr_el1); PRINT_REG(mair_el1); PRINT_REG(midr_el1); PRINT_REG(mpidr_el1); PRINT_REG(mvfr0_el1); PRINT_REG(mvfr1_el1); PRINT_REG(mvfr2_el1); PRINT_REG(revidr_el1); PRINT_REG(sctlr_el1); PRINT_REG(sp_el0); PRINT_REG(spsel); PRINT_REG(spsr_el1); PRINT_REG(tcr_el1); PRINT_REG(tpidr_el0); PRINT_REG(tpidr_el1); PRINT_REG(tpidrro_el0); PRINT_REG(ttbr0_el1); PRINT_REG(ttbr1_el1); PRINT_REG(vbar_el1); #undef PRINT_REG } DB_SHOW_COMMAND(vtop, db_show_vtop) { uint64_t phys; if (have_addr) { phys = arm64_address_translate_s1e1r(addr); db_printf("Physical address reg: 0x%016lx\n", phys); } else db_printf("show vtop \n"); } #endif Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/mp_machdep.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm64/arm64/mp_machdep.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/mp_machdep.c (revision 303642) @@ -1,707 +1,703 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_kstack_pages.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #ifdef VFP #include #endif #ifdef FDT #include #include #endif #include #include "pic_if.h" typedef void intr_ipi_send_t(void *, cpuset_t, u_int); typedef void intr_ipi_handler_t(void *); #define INTR_IPI_NAMELEN (MAXCOMLEN + 1) struct intr_ipi { intr_ipi_handler_t * ii_handler; void * ii_handler_arg; intr_ipi_send_t * ii_send; void * ii_send_arg; char ii_name[INTR_IPI_NAMELEN]; u_long * ii_count; }; static struct intr_ipi ipi_sources[INTR_IPI_COUNT]; static struct intr_ipi *intr_ipi_lookup(u_int); static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *, void *); boolean_t ofw_cpu_reg(phandle_t node, u_int, cell_t *); extern struct pcpu __pcpu[]; -static enum { - CPUS_UNKNOWN, -#ifdef FDT - CPUS_FDT, -#endif -} cpu_enum_method; - static device_identify_t arm64_cpu_identify; static device_probe_t arm64_cpu_probe; static device_attach_t arm64_cpu_attach; static void ipi_ast(void *); static void ipi_hardclock(void *); static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); static int ipi_handler(void *arg); struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; static device_t cpu_list[MAXCPU]; /* * Not all systems boot from the first CPU in the device tree. To work around * this we need to find which CPU we have booted from so when we later * enable the secondary CPUs we skip this one. */ static int cpu0 = -1; void mpentry(unsigned long cpuid); void init_secondary(uint64_t); uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); /* Set to 1 once we're ready to let the APs out of the pen. */ volatile int aps_ready = 0; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; static device_method_t arm64_cpu_methods[] = { /* Device interface */ DEVMETHOD(device_identify, arm64_cpu_identify), DEVMETHOD(device_probe, arm64_cpu_probe), DEVMETHOD(device_attach, arm64_cpu_attach), DEVMETHOD_END }; static devclass_t arm64_cpu_devclass; static driver_t arm64_cpu_driver = { "arm64_cpu", arm64_cpu_methods, 0 }; DRIVER_MODULE(arm64_cpu, cpu, arm64_cpu_driver, arm64_cpu_devclass, 0, 0); static void arm64_cpu_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "arm64_cpu", -1) != NULL) return; if (BUS_ADD_CHILD(parent, 0, "arm64_cpu", -1) == NULL) device_printf(parent, "add child failed\n"); } static int arm64_cpu_probe(device_t dev) { u_int cpuid; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); device_quiet(dev); return (0); } static int arm64_cpu_attach(device_t dev) { const uint32_t *reg; size_t reg_size; u_int cpuid; int i; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid)); reg = cpu_get_cpuid(dev, ®_size); if (reg == NULL) return (EINVAL); if (bootverbose) { device_printf(dev, "register <"); for (i = 0; i < reg_size; i++) printf("%s%x", (i == 0) ? "" : " ", reg[i]); printf(">\n"); } /* Set the device to start it later */ cpu_list[cpuid] = dev; return (0); } static void release_aps(void *dummy __unused) { int cpu, i; intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL); intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL); intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL); intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); atomic_store_rel_int(&aps_ready, 1); /* Wake up the other CPUs */ __asm __volatile("sev"); printf("Release APs\n"); for (i = 0; i < 2000; i++) { if (smp_started) { for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) continue; print_cpu_features(cpu); } return; } DELAY(1000); } printf("APs not started\n"); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void init_secondary(uint64_t cpu) { struct pcpu *pcpup; pcpup = &__pcpu[cpu]; /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); /* Spin until the BSP releases the APs */ while (!aps_ready) __asm __volatile("wfe"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* * Identify current CPU. This is necessary to setup * affinity registers and to provide support for * runtime chip identification. */ identify_cpu(); intr_pic_init_secondary(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); #ifdef VFP vfp_init(); #endif dbg_monitor_init(); /* Enable interrupts */ intr_enable(); mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } mtx_unlock_spin(&ap_boot_mtx); /* Enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to init_secondary"); /* NOTREACHED */ } /* * Send IPI thru interrupt controller. */ static void pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi) { KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi); } /* * Setup IPI handler on interrupt controller. * * Not SMP coherent. */ static void intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand, void *arg) { struct intr_irqsrc *isrc; struct intr_ipi *ii; int error; KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi)); error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc); if (error != 0) return; isrc->isrc_handlers++; ii = intr_ipi_lookup(ipi); KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi)); ii->ii_handler = hand; ii->ii_handler_arg = arg; ii->ii_send = pic_ipi_send; ii->ii_send_arg = isrc; strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN); ii->ii_count = intr_ipi_setup_counters(name); } static void intr_ipi_send(cpuset_t cpus, u_int ipi) { struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); ii->ii_send(ii->ii_send_arg, cpus, ipi); } static void ipi_ast(void *dummy __unused) { CTR0(KTR_SMP, "IPI_AST"); } static void ipi_hardclock(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); } static void ipi_preempt(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); } static void ipi_rendezvous(void *dummy __unused) { CTR0(KTR_SMP, "IPI_RENDEZVOUS"); smp_rendezvous_action(); } static void ipi_stop(void *dummy __unused) { u_int cpu; CTR0(KTR_SMP, "IPI_STOP"); cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); /* Indicate we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) cpu_spinwait(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); CTR0(KTR_SMP, "IPI_STOP (restart)"); } struct cpu_group * cpu_topo(void) { return (smp_topo_none()); } /* Determine if we running MP machine */ int cpu_mp_probe(void) { /* ARM64TODO: Read the u bit of mpidr_el1 to determine this */ return (1); } #ifdef FDT static boolean_t cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t target_cpu; struct pcpu *pcpup; vm_paddr_t pa; u_int cpuid; int err; /* Check we are able to start this cpu */ if (id > mp_maxid) return (0); KASSERT(id < MAXCPU, ("Too many CPUs")); /* We are already running on cpu 0 */ if (id == cpu0) return (1); /* * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other * CPUs ordered as the are likely grouped into clusters so it can be * useful to keep that property, e.g. for the GICv3 driver to send * an IPI to all CPUs in the cluster. */ cpuid = id; if (cpuid < cpu0) cpuid += mp_maxid + 1; cpuid -= cpu0; pcpup = &__pcpu[cpuid]; pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); dpcpu[cpuid - 1] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); target_cpu = reg[0]; if (addr_size == 2) { target_cpu <<= 32; target_cpu |= reg[1]; } printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); err = psci_cpu_on(target_cpu, pa, cpuid); if (err != PSCI_RETVAL_SUCCESS) { /* Panic here if INVARIANTS are enabled */ KASSERT(0, ("Failed to start CPU %u (%lx)\n", id, target_cpu)); pcpu_destroy(pcpup); kmem_free(kernel_arena, (vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; /* Notify the user that the CPU failed to start */ printf("Failed to start CPU %u (%lx)\n", id, target_cpu); } else CPU_SET(cpuid, &all_cpus); return (1); } #endif /* Initialize and fire up non-boot processors */ void cpu_mp_start(void) { mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); CPU_SET(0, &all_cpus); - switch(cpu_enum_method) { + switch(arm64_bus_method) { #ifdef FDT - case CPUS_FDT: + case ARM64_BUS_FDT: KASSERT(cpu0 >= 0, ("Current CPU was not found")); ofw_cpu_early_foreach(cpu_init_fdt, true); break; #endif - case CPUS_UNKNOWN: + default: break; } } /* Introduce rest of cores to the world */ void cpu_mp_announce(void) { } static boolean_t cpu_find_cpu0_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t mpidr_fdt, mpidr_reg; if (cpu0 < 0) { mpidr_fdt = reg[0]; if (addr_size == 2) { mpidr_fdt <<= 32; mpidr_fdt |= reg[1]; } mpidr_reg = READ_SPECIALREG(mpidr_el1); if ((mpidr_reg & 0xff00fffffful) == mpidr_fdt) cpu0 = id; } return (TRUE); } void cpu_mp_setmaxid(void) { #ifdef FDT int cores; - cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false); - if (cores > 0) { - cores = MIN(cores, MAXCPU); - if (bootverbose) - printf("Found %d CPUs in the device tree\n", cores); - mp_ncpus = cores; - mp_maxid = cores - 1; - cpu_enum_method = CPUS_FDT; - return; + if (arm64_bus_method == ARM64_BUS_FDT) { + cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false); + if (cores > 0) { + cores = MIN(cores, MAXCPU); + if (bootverbose) + printf("Found %d CPUs in the device tree\n", + cores); + mp_ncpus = cores; + mp_maxid = cores - 1; + return; + } } #endif if (bootverbose) printf("No CPU data, limiting to 1 core\n"); mp_ncpus = 1; mp_maxid = 0; } /* * Lookup IPI source. */ static struct intr_ipi * intr_ipi_lookup(u_int ipi) { if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); return (&ipi_sources[ipi]); } /* * interrupt controller dispatch function for IPIs. It should * be called straight from the interrupt controller, when associated * interrupt source is learned. Or from anybody who has an interrupt * source mapped. */ void intr_ipi_dispatch(u_int ipi, struct trapframe *tf) { void *arg; struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid)); /* * Supply ipi filter with trapframe argument * if none is registered. */ arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf; ii->ii_handler(arg); } #ifdef notyet /* * Map IPI into interrupt controller. * * Not SMP coherent. */ static int ipi_map(struct intr_irqsrc *isrc, u_int ipi) { boolean_t is_percpu; int error; if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); isrc->isrc_type = INTR_ISRCT_NAMESPACE; isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI; isrc->isrc_nspc_num = ipi_next_num; error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu); if (error == 0) { isrc->isrc_dev = intr_irq_root_dev; ipi_next_num++; } return (error); } /* * Setup IPI handler to interrupt source. * * Note that there could be more ways how to send and receive IPIs * on a platform like fast interrupts for example. In that case, * one can call this function with ASIF_NOALLOC flag set and then * call intr_ipi_dispatch() when appropriate. * * Not SMP coherent. */ int intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter, void *arg, u_int flags) { struct intr_irqsrc *isrc; int error; if (filter == NULL) return(EINVAL); isrc = intr_ipi_lookup(ipi); if (isrc->isrc_ipifilter != NULL) return (EEXIST); if ((flags & AISHF_NOALLOC) == 0) { error = ipi_map(isrc, ipi); if (error != 0) return (error); } isrc->isrc_ipifilter = filter; isrc->isrc_arg = arg; isrc->isrc_handlers = 1; isrc->isrc_count = intr_ipi_setup_counters(name); isrc->isrc_index = 0; /* it should not be used in IPI case */ if (isrc->isrc_dev != NULL) { PIC_ENABLE_INTR(isrc->isrc_dev, isrc); PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc); } return (0); } #endif /* Sending IPI */ void ipi_all_but_self(u_int ipi) { cpuset_t cpus; cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); } void ipi_cpu(int cpu, u_int ipi) { cpuset_t cpus; CPU_ZERO(&cpus); CPU_SET(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi); intr_ipi_send(cpus, ipi); } void ipi_selected(cpuset_t cpus, u_int ipi) { CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); } Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/nexus.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm64/arm64/nexus.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/nexus.c (revision 303642) @@ -1,472 +1,473 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * This code implements a `root nexus' for Arm Architecture * machines. The function of the root nexus is to serve as an * attachment point for both processors and buses, and to manage * resources which are common to all of them. In particular, * this code implements the core resource managers for interrupt * requests, DMA requests (which rightfully should be a part of the * ISA code but it's easier to do it here for now), I/O port addresses, * and I/O memory address space. */ #include "opt_acpi.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #ifdef FDT #include #include "ofw_bus_if.h" #endif #ifdef DEV_ACPI #include #include #endif extern struct bus_space memmap_bus; static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); struct nexus_device { struct resource_list nx_resources; }; #define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) static struct rman mem_rman; static struct rman irq_rman; static int nexus_attach(device_t); #ifdef FDT static device_probe_t nexus_fdt_probe; static device_attach_t nexus_fdt_attach; #endif #ifdef DEV_ACPI static device_probe_t nexus_acpi_probe; static device_attach_t nexus_acpi_attach; #endif static int nexus_print_child(device_t, device_t); static device_t nexus_add_child(device_t, u_int, const char *, int); static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, rman_res_t, rman_res_t, rman_res_t, u_int); static int nexus_activate_resource(device_t, device_t, int, int, struct resource *); static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol); static struct resource_list *nexus_get_reslist(device_t, device_t); static int nexus_set_resource(device_t, device_t, int, int, rman_res_t, rman_res_t); static int nexus_deactivate_resource(device_t, device_t, int, int, struct resource *); static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep); static int nexus_teardown_intr(device_t, device_t, struct resource *, void *); static bus_space_tag_t nexus_get_bus_tag(device_t, device_t); #ifdef SMP static int nexus_bind_intr(device_t, device_t, struct resource *, int); #endif #ifdef FDT static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr); #endif static device_method_t nexus_methods[] = { /* Bus interface */ DEVMETHOD(bus_print_child, nexus_print_child), DEVMETHOD(bus_add_child, nexus_add_child), DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), DEVMETHOD(bus_activate_resource, nexus_activate_resource), DEVMETHOD(bus_config_intr, nexus_config_intr), DEVMETHOD(bus_get_resource_list, nexus_get_reslist), DEVMETHOD(bus_set_resource, nexus_set_resource), DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), DEVMETHOD(bus_get_bus_tag, nexus_get_bus_tag), #ifdef SMP DEVMETHOD(bus_bind_intr, nexus_bind_intr), #endif { 0, 0 } }; static driver_t nexus_driver = { "nexus", nexus_methods, 1 /* no softc */ }; static int nexus_attach(device_t dev) { mem_rman.rm_start = 0; mem_rman.rm_end = BUS_SPACE_MAXADDR; mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) || rman_manage_region(&mem_rman, 0, BUS_SPACE_MAXADDR)) panic("nexus_attach mem_rman"); irq_rman.rm_start = 0; irq_rman.rm_end = ~0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupts"; if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, 0, ~0)) panic("nexus_attach irq_rman"); bus_generic_probe(dev); bus_generic_attach(dev); return (0); } static int nexus_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += printf("\n"); return (retval); } static device_t nexus_add_child(device_t bus, u_int order, const char *name, int unit) { device_t child; struct nexus_device *ndev; ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); if (!ndev) return (0); resource_list_init(&ndev->nx_resources); child = device_add_child_ordered(bus, order, name, unit); /* should we free this in nexus_child_detached? */ device_set_ivars(child, ndev); return (child); } /* * Allocate a resource on behalf of child. NB: child is usually going to be a * child of one of our descendants, not a direct child of nexus0. * (Exceptions include footbridge.) */ static struct resource * nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct nexus_device *ndev = DEVTONX(child); struct resource *rv; struct resource_list_entry *rle; struct rman *rm; int needactivate = flags & RF_ACTIVE; /* * If this is an allocation of the "default" range for a given * RID, and we know what the resources for this device are * (ie. they aren't maintained by a child bus), then work out * the start/end values. */ if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) { if (device_get_parent(child) != bus || ndev == NULL) return(NULL); rle = resource_list_find(&ndev->nx_resources, type, *rid); if (rle == NULL) return(NULL); start = rle->start; end = rle->end; count = rle->count; } switch (type) { case SYS_RES_IRQ: rm = &irq_rman; break; case SYS_RES_MEMORY: case SYS_RES_IOPORT: rm = &mem_rman; break; default: return (NULL); } rv = rman_reserve_resource(rm, start, end, count, flags, child); if (rv == NULL) return (NULL); rman_set_rid(rv, *rid); rman_set_bushandle(rv, rman_get_start(rv)); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { rman_release_resource(rv); return (NULL); } } return (rv); } static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) { /* TODO: This is wrong, it's needed for ACPI */ device_printf(dev, "bus_config_intr is obsolete and not supported!\n"); return (EOPNOTSUPP); } static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep) { int error; if ((rman_get_flags(res) & RF_SHAREABLE) == 0) flags |= INTR_EXCL; /* We depend here on rman_activate_resource() being idempotent. */ error = rman_activate_resource(res); if (error) return (error); error = intr_setup_irq(child, res, filt, intr, arg, flags, cookiep); return (error); } static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { return (intr_teardown_irq(child, r, ih)); } #ifdef SMP static int nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu) { return (intr_bind_irq(child, irq, cpu)); } #endif static bus_space_tag_t nexus_get_bus_tag(device_t bus __unused, device_t child __unused) { return(&memmap_bus); } static int nexus_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { int err; bus_addr_t paddr; bus_size_t psize; bus_space_handle_t vaddr; if ((err = rman_activate_resource(r)) != 0) return (err); /* * If this is a memory resource, map it into the kernel. */ if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { paddr = (bus_addr_t)rman_get_start(r); psize = (bus_size_t)rman_get_size(r); err = bus_space_map(&memmap_bus, paddr, psize, 0, &vaddr); if (err != 0) { rman_deactivate_resource(r); return (err); } rman_set_bustag(r, &memmap_bus); rman_set_virtual(r, (void *)vaddr); rman_set_bushandle(r, vaddr); } return (0); } static struct resource_list * nexus_get_reslist(device_t dev, device_t child) { struct nexus_device *ndev = DEVTONX(child); return (&ndev->nx_resources); } static int nexus_set_resource(device_t dev, device_t child, int type, int rid, rman_res_t start, rman_res_t count) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; /* XXX this should return a success/failure indicator */ resource_list_add(rl, type, rid, start, start + count - 1, count); return(0); } static int nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { bus_size_t psize; bus_space_handle_t vaddr; psize = (bus_size_t)rman_get_size(r); vaddr = rman_get_bushandle(r); if (vaddr != 0) { bus_space_unmap(&memmap_bus, vaddr, psize); rman_set_virtual(r, NULL); rman_set_bushandle(r, 0); } return (rman_deactivate_resource(r)); } #ifdef FDT static device_method_t nexus_fdt_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_fdt_probe), DEVMETHOD(device_attach, nexus_fdt_attach), /* OFW interface */ DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr), }; #define nexus_baseclasses nexus_fdt_baseclasses DEFINE_CLASS_1(nexus, nexus_fdt_driver, nexus_fdt_methods, 1, nexus_driver); #undef nexus_baseclasses static devclass_t nexus_fdt_devclass; EARLY_DRIVER_MODULE(nexus_fdt, root, nexus_fdt_driver, nexus_fdt_devclass, 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST); static int nexus_fdt_probe(device_t dev) { - if (OF_peer(0) == 0) + if (arm64_bus_method != ARM64_BUS_FDT) return (ENXIO); device_quiet(dev); return (BUS_PROBE_DEFAULT); } static int nexus_fdt_attach(device_t dev) { nexus_add_child(dev, 10, "ofwbus", 0); return (nexus_attach(dev)); } static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr) { return (INTR_IRQ_INVALID); } #endif #ifdef DEV_ACPI static device_method_t nexus_acpi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_acpi_probe), DEVMETHOD(device_attach, nexus_acpi_attach), }; #define nexus_baseclasses nexus_acpi_baseclasses DEFINE_CLASS_1(nexus, nexus_acpi_driver, nexus_acpi_methods, 1, nexus_driver); #undef nexus_baseclasses static devclass_t nexus_acpi_devclass; EARLY_DRIVER_MODULE(nexus_acpi, root, nexus_acpi_driver, nexus_acpi_devclass, 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST); static int nexus_acpi_probe(device_t dev) { - if (acpi_identify() != 0) + if (arm64_bus_method != ARM64_BUS_ACPI || acpi_identify() != 0) return (ENXIO); device_quiet(dev); return (BUS_PROBE_LOW_PRIORITY); } static int nexus_acpi_attach(device_t dev) { nexus_add_child(dev, 10, "acpi", 0); return (nexus_attach(dev)); } #endif Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c (revision 303642) @@ -1,3590 +1,3595 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. * Copyright (c) 2014-2016 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * This software was developed by Andrew Turner under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) #define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) #define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) #define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) #define NUL0E L0_ENTRIES #define NUL1E (NUL0E * NL1PG) #define NUL2E (NUL1E * NL2PG) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif /* * These are configured by the mair_el1 register. This is set up in locore.S */ #define DEVICE_MEMORY 0 #define UNCACHED_MEMORY 1 #define CACHED_MEMORY 2 #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; static struct rwlock_padalign pvh_global_lock; vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ /* This code assumes all L1 DMAP entries will be used */ CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) extern pt_entry_t pagetable_dmap[]; /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); /* * These load the old table data and store the new value. * They need to be atomic as the System MMU may write to the table at * the same time as the CPU. */ #define pmap_load_store(table, entry) atomic_swap_64(table, entry) #define pmap_set(table, mask) atomic_set_64(table, mask) #define pmap_load_clear(table) atomic_swap_64(table, 0) #define pmap_load(table) (*table) /********************/ /* Inline functions */ /********************/ static __inline void pagecopy(void *s, void *d) { memcpy(d, s, PAGE_SIZE); } #define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) static __inline pd_entry_t * pmap_l0(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_l0[pmap_l0_index(va)]); } static __inline pd_entry_t * pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) { pd_entry_t *l1; l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); return (&l1[pmap_l1_index(va)]); } static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { pd_entry_t *l0; l0 = pmap_l0(pmap, va); if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) return (NULL); return (pmap_l0_to_l1(l0, va)); } static __inline pd_entry_t * pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) { pd_entry_t *l2; l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); return (&l2[pmap_l2_index(va)]); } static __inline pd_entry_t * pmap_l2(pmap_t pmap, vm_offset_t va) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) return (NULL); return (pmap_l1_to_l2(l1, va)); } static __inline pt_entry_t * pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) { pt_entry_t *l3; l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); return (&l3[pmap_l3_index(va)]); } /* * Returns the lowest valid pde for a given virtual address. * The next level may or may not point to a valid page or block. */ static __inline pd_entry_t * pmap_pde(pmap_t pmap, vm_offset_t va, int *level) { pd_entry_t *l0, *l1, *l2, desc; l0 = pmap_l0(pmap, va); desc = pmap_load(l0) & ATTR_DESCR_MASK; if (desc != L0_TABLE) { *level = -1; return (NULL); } l1 = pmap_l0_to_l1(l0, va); desc = pmap_load(l1) & ATTR_DESCR_MASK; if (desc != L1_TABLE) { *level = 0; return (l0); } l2 = pmap_l1_to_l2(l1, va); desc = pmap_load(l2) & ATTR_DESCR_MASK; if (desc != L2_TABLE) { *level = 1; return (l1); } *level = 2; return (l2); } /* * Returns the lowest valid pte block or table entry for a given virtual * address. If there are no valid entries return NULL and set the level to * the first invalid level. */ static __inline pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va, int *level) { pd_entry_t *l1, *l2, desc; pt_entry_t *l3; l1 = pmap_l1(pmap, va); if (l1 == NULL) { *level = 0; return (NULL); } desc = pmap_load(l1) & ATTR_DESCR_MASK; if (desc == L1_BLOCK) { *level = 1; return (l1); } if (desc != L1_TABLE) { *level = 1; return (NULL); } l2 = pmap_l1_to_l2(l1, va); desc = pmap_load(l2) & ATTR_DESCR_MASK; if (desc == L2_BLOCK) { *level = 2; return (l2); } if (desc != L2_TABLE) { *level = 2; return (NULL); } *level = 3; l3 = pmap_l2_to_l3(l2, va); if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) return (NULL); return (l3); } bool pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, pd_entry_t **l2, pt_entry_t **l3) { pd_entry_t *l0p, *l1p, *l2p; if (pmap->pm_l0 == NULL) return (false); l0p = pmap_l0(pmap, va); *l0 = l0p; if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) return (false); l1p = pmap_l0_to_l1(l0p, va); *l1 = l1p; if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { *l2 = NULL; *l3 = NULL; return (true); } if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) return (false); l2p = pmap_l1_to_l2(l1p, va); *l2 = l2p; if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { *l3 = NULL; return (true); } *l3 = pmap_l2_to_l3(l2p, va); return (true); } static __inline int pmap_is_current(pmap_t pmap) { return ((pmap == pmap_kernel()) || (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); } static __inline int pmap_l3_valid(pt_entry_t l3) { return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); } static __inline int pmap_l3_valid_cacheable(pt_entry_t l3) { return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); } #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) /* * Checks if the page is dirty. We currently lack proper tracking of this on * arm64 so for now assume is a page mapped as rw was accessed it is. */ static inline int pmap_page_dirty(pt_entry_t pte) { return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == (ATTR_AF | ATTR_AP(ATTR_AP_RW))); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } static pt_entry_t * pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, u_int *l2_slot) { pt_entry_t *l2; pd_entry_t *l1; l1 = (pd_entry_t *)l1pt; *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; /* Check locore has used a table L1 map */ KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, ("Invalid bootstrap L1 table")); /* Find the address of the L2 table */ l2 = (pt_entry_t *)init_pt_va; *l2_slot = pmap_l2_index(va); return (l2); } static vm_paddr_t pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) { u_int l1_slot, l2_slot; pt_entry_t *l2; l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); } static void pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) { vm_offset_t va; vm_paddr_t pa; u_int l1_slot; pa = dmap_phys_base = min_pa & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; for (; va < DMAP_MAX_ADDRESS && pa < max_pa; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); pmap_load_store(&pagetable_dmap[l1_slot], (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); } /* Set the upper limit of the DMAP region */ dmap_phys_max = pa; dmap_max_addr = va; cpu_dcache_wb_range((vm_offset_t)pagetable_dmap, PAGE_SIZE * DMAP_TABLES); cpu_tlb_flushID(); } static vm_offset_t pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) { vm_offset_t l2pt; vm_paddr_t pa; pd_entry_t *l1; u_int l1_slot; KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); l1 = (pd_entry_t *)l1pt; l1_slot = pmap_l1_index(va); l2pt = l2_start; for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); pa = pmap_early_vtophys(l1pt, l2pt); pmap_load_store(&l1[l1_slot], (pa & ~Ln_TABLE_MASK) | L1_TABLE); l2pt += PAGE_SIZE; } /* Clean the L2 page table */ memset((void *)l2_start, 0, l2pt - l2_start); cpu_dcache_wb_range(l2_start, l2pt - l2_start); /* Flush the l1 table to ram */ cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); return l2pt; } static vm_offset_t pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) { vm_offset_t l2pt, l3pt; vm_paddr_t pa; pd_entry_t *l2; u_int l2_slot; KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); l2 = pmap_l2(kernel_pmap, va); l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); l2pt = (vm_offset_t)l2; l2_slot = pmap_l2_index(va); l3pt = l3_start; for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); pa = pmap_early_vtophys(l1pt, l3pt); pmap_load_store(&l2[l2_slot], (pa & ~Ln_TABLE_MASK) | L2_TABLE); l3pt += PAGE_SIZE; } /* Clean the L2 page table */ memset((void *)l3_start, 0, l3pt - l3_start); cpu_dcache_wb_range(l3_start, l3pt - l3_start); cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); return l3pt; } /* * Bootstrap the system enough to run with virtual memory. */ void pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; uint64_t kern_delta; pt_entry_t *l2; vm_offset_t va, freemempos; vm_offset_t dpcpu, msgbufpv; vm_paddr_t pa, max_pa, min_pa; int i; kern_delta = KERNBASE - kernstart; physmem = 0; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); printf("%lx\n", l1pt); printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; PMAP_LOCK_INIT(kernel_pmap); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /* Assume the address we were loaded to is a valid physical address */ min_pa = max_pa = KERNBASE - kern_delta; /* * Find the minimum physical address. physmap is sorted, * but may contain empty ranges. */ for (i = 0; i < (physmap_idx * 2); i += 2) { if (physmap[i] == physmap[i + 1]) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; if (physmap[i + 1] > max_pa) max_pa = physmap[i + 1]; } /* Create a direct map region early so we can use it for pa -> va */ pmap_bootstrap_dmap(l1pt, min_pa, max_pa); va = KERNBASE; pa = KERNBASE - kern_delta; /* * Start to initialise phys_avail by copying from physmap * up to the physical address KERNBASE points at. */ map_slot = avail_slot = 0; for (; map_slot < (physmap_idx * 2) && avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { if (physmap[map_slot] == physmap[map_slot + 1]) continue; if (physmap[map_slot] <= pa && physmap[map_slot + 1] > pa) break; phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } /* Add the memory before the kernel */ if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = pa; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } used_map_slot = map_slot; /* * Read the page table to find out what is already mapped. * This assumes we have mapped a block of memory from KERNBASE * using a single L1 entry. */ l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); /* Sanity check the index, KERNBASE should be the first VA */ KASSERT(l2_slot == 0, ("The L2 index is non-zero")); /* Find how many pages we have mapped */ for (; l2_slot < Ln_ENTRIES; l2_slot++) { if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) break; /* Check locore used L2 blocks */ KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, ("Invalid bootstrap L2 table")); KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, ("Incorrect PA in L2 table")); va += L2_SIZE; pa += L2_SIZE; } va = roundup2(va, L1_SIZE); freemempos = KERNBASE + kernlen; freemempos = roundup2(freemempos, PAGE_SIZE); /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); /* And the l3 tables for the early devmap */ freemempos = pmap_bootstrap_l3(l1pt, VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); cpu_tlb_flushID(); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); /* Allocate dynamic per-cpu area. */ alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); msgbufp = (void *)msgbufpv; virtual_avail = roundup2(freemempos, L1_SIZE); virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; kernel_vm_end = virtual_avail; pa = pmap_early_vtophys(l1pt, freemempos); /* Finish initialising physmap */ map_slot = used_map_slot; for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && map_slot < (physmap_idx * 2); map_slot += 2) { if (physmap[map_slot] == physmap[map_slot + 1]) continue; /* Have we used the current range? */ if (physmap[map_slot + 1] <= pa) continue; /* Do we need to split the entry? */ if (physmap[map_slot] < pa) { phys_avail[avail_slot] = pa; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } else { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } phys_avail[avail_slot] = 0; phys_avail[avail_slot + 1] = 0; /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = atop(phys_avail[avail_slot - 1]); cpu_tlb_flushID(); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { int i; /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); } /* - * Normal, non-SMP, invalidation functions. - * We inline these within pmap.c for speed. + * Invalidate a single TLB entry. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { sched_pin(); __asm __volatile( - "dsb sy \n" + "dsb ishst \n" "tlbi vaae1is, %0 \n" - "dsb sy \n" + "dsb ish \n" "isb \n" : : "r"(va >> PAGE_SHIFT)); sched_unpin(); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; sched_pin(); - __asm __volatile("dsb sy"); + dsb(ishst); for (addr = sva; addr < eva; addr += PAGE_SIZE) { __asm __volatile( "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); } __asm __volatile( - "dsb sy \n" + "dsb ish \n" "isb \n"); sched_unpin(); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { sched_pin(); __asm __volatile( - "dsb sy \n" + "dsb ishst \n" "tlbi vmalle1is \n" - "dsb sy \n" + "dsb ish \n" "isb \n"); sched_unpin(); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pt_entry_t *pte, tpte; vm_paddr_t pa; int lvl; pa = 0; PMAP_LOCK(pmap); /* * Find the block or page map for this virtual address. pmap_pte * will return either a valid block/page entry, or NULL. */ pte = pmap_pte(pmap, va, &lvl); if (pte != NULL) { tpte = pmap_load(pte); pa = tpte & ~ATTR_MASK; switch(lvl) { case 1: KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, ("pmap_extract: Invalid L1 pte found: %lx", tpte & ATTR_DESCR_MASK)); pa |= (va & L1_OFFSET); break; case 2: KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, ("pmap_extract: Invalid L2 pte found: %lx", tpte & ATTR_DESCR_MASK)); pa |= (va & L2_OFFSET); break; case 3: KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, ("pmap_extract: Invalid L3 pte found: %lx", tpte & ATTR_DESCR_MASK)); pa |= (va & L3_OFFSET); break; } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *pte, tpte; vm_paddr_t pa; vm_page_t m; int lvl; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: pte = pmap_pte(pmap, va, &lvl); if (pte != NULL) { tpte = pmap_load(pte); KASSERT(lvl > 0 && lvl <= 3, ("pmap_extract_and_hold: Invalid level %d", lvl)); CTASSERT(L1_BLOCK == L2_BLOCK); KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, tpte & ATTR_DESCR_MASK)); if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || ((prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) goto retry; m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); vm_page_hold(m); } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pt_entry_t *pte, tpte; vm_paddr_t pa; int lvl; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { pa = 0; pte = pmap_pte(kernel_pmap, va, &lvl); if (pte != NULL) { tpte = pmap_load(pte); pa = tpte & ~ATTR_MASK; switch(lvl) { case 1: KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, ("pmap_kextract: Invalid L1 pte found: %lx", tpte & ATTR_DESCR_MASK)); pa |= (va & L1_OFFSET); break; case 2: KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, ("pmap_kextract: Invalid L2 pte found: %lx", tpte & ATTR_DESCR_MASK)); pa |= (va & L2_OFFSET); break; case 3: KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, ("pmap_kextract: Invalid L3 pte found: %lx", tpte & ATTR_DESCR_MASK)); pa |= (va & L3_OFFSET); break; } } } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ -void -pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) +static void +pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t va; int lvl; KASSERT((pa & L3_OFFSET) == 0, - ("pmap_kenter_device: Invalid physical address")); + ("pmap_kenter: Invalid physical address")); KASSERT((sva & L3_OFFSET) == 0, - ("pmap_kenter_device: Invalid virtual address")); + ("pmap_kenter: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, - ("pmap_kenter_device: Mapping is not page-sized")); + ("pmap_kenter: Mapping is not page-sized")); va = sva; while (size != 0) { pde = pmap_pde(kernel_pmap, va, &lvl); KASSERT(pde != NULL, - ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va)); - KASSERT(lvl == 2, - ("pmap_kenter_device: Invalid level %d", lvl)); + ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); pte = pmap_l2_to_l3(pde, va); pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT | - ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); + ATTR_IDX(mode) | L3_PAGE); PTE_SYNC(pte); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); +} + +void +pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) +{ + + pmap_kenter(sva, size, pa, DEVICE_MEMORY); } /* * Remove a page from the kernel pagetables. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; int lvl; pte = pmap_pte(kernel_pmap, va, &lvl); KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); if (pmap_l3_valid_cacheable(pmap_load(pte))) cpu_dcache_wb_range(va, L3_SIZE); pmap_load_clear(pte); PTE_SYNC(pte); pmap_invalidate_page(kernel_pmap, va); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { pt_entry_t *pte; vm_offset_t va; int lvl; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kremove_device: Mapping is not page-sized")); va = sva; while (size != 0) { pte = pmap_pte(kernel_pmap, va, &lvl); KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); KASSERT(lvl == 3, ("Invalid device pagetable level: %d != 3", lvl)); pmap_load_clear(pte); PTE_SYNC(pte); va += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pd_entry_t *pde; pt_entry_t *pte, pa; vm_offset_t va; vm_page_t m; int i, lvl; va = sva; for (i = 0; i < count; i++) { pde = pmap_pde(kernel_pmap, va, &lvl); KASSERT(pde != NULL, ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); KASSERT(lvl == 2, ("pmap_qenter: Invalid level %d", lvl)); m = ma[i]; pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_IDX(m->md.pv_memattr) | L3_PAGE; pte = pmap_l2_to_l3(pde, va); pmap_load_store(pte, pa); PTE_SYNC(pte); va += L3_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. */ void pmap_qremove(vm_offset_t sva, int count) { pt_entry_t *pte; vm_offset_t va; int lvl; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); va = sva; while (count-- > 0) { pte = pmap_pte(kernel_pmap, va, &lvl); KASSERT(lvl == 3, ("Invalid device pagetable level: %d != 3", lvl)); if (pte != NULL) { if (pmap_l3_valid_cacheable(pmap_load(pte))) cpu_dcache_wb_range(va, L3_SIZE); pmap_load_clear(pte); PTE_SYNC(pte); } va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_l3(pmap, va, m, free); return (TRUE); } else return (FALSE); } static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ if (m->pindex >= (NUL2E + NUL1E)) { /* l1 page */ pd_entry_t *l0; l0 = pmap_l0(pmap, va); pmap_load_clear(l0); PTE_SYNC(l0); } else if (m->pindex >= NUL2E) { /* l2 page */ pd_entry_t *l1; l1 = pmap_l1(pmap, va); pmap_load_clear(l1); PTE_SYNC(l1); } else { /* l3 page */ pd_entry_t *l2; l2 = pmap_l2(pmap, va); pmap_load_clear(l2); PTE_SYNC(l2); } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUL2E) { /* We just released an l3, unhold the matching l2 */ pd_entry_t *l1, tl1; vm_page_t l2pg; l1 = pmap_l1(pmap, va); tl1 = pmap_load(l1); l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); pmap_unwire_l3(pmap, va, l2pg, free); } else if (m->pindex < (NUL2E + NUL1E)) { /* We just released an l2, unhold the matching l1 */ pd_entry_t *l0, tl0; vm_page_t l1pg; l0 = pmap_l0(pmap, va); tl0 = pmap_load(l0); l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); pmap_unwire_l3(pmap, va, l1pg, free); } pmap_invalidate_page(pmap, va); /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing an l3 entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); return (pmap_unwire_l3(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l0 = kernel_pmap->pm_l0; } int pmap_pinit(pmap_t pmap) { vm_paddr_t l0phys; vm_page_t l0pt; /* * allocate the l0 page */ while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; l0phys = VM_PAGE_TO_PHYS(l0pt); pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); if ((l0pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l0); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); return (1); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, l1pg, l2pg; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); rw_runlock(&pvh_global_lock); VM_WAIT; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= (NUL2E + NUL1E)) { pd_entry_t *l0; vm_pindex_t l0index; l0index = ptepindex - (NUL2E + NUL1E); l0 = &pmap->pm_l0[l0index]; pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); PTE_SYNC(l0); } else if (ptepindex >= NUL2E) { vm_pindex_t l0index, l1index; pd_entry_t *l0, *l1; pd_entry_t tl0; l1index = ptepindex - NUL2E; l0index = l1index >> L0_ENTRIES_SHIFT; l0 = &pmap->pm_l0[l0index]; tl0 = pmap_load(l0); if (tl0 == 0) { /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, lockp) == NULL) { --m->wire_count; /* XXX: release mem barrier? */ atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); l1pg->wire_count++; } l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); l1 = &l1[ptepindex & Ln_ADDR_MASK]; pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); PTE_SYNC(l1); } else { vm_pindex_t l0index, l1index; pd_entry_t *l0, *l1, *l2; pd_entry_t tl0, tl1; l1index = ptepindex >> Ln_ENTRIES_SHIFT; l0index = l1index >> L0_ENTRIES_SHIFT; l0 = &pmap->pm_l0[l0index]; tl0 = pmap_load(l0); if (tl0 == 0) { /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } tl0 = pmap_load(l0); l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); l1 = &l1[l1index & Ln_ADDR_MASK]; } else { l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); l1 = &l1[l1index & Ln_ADDR_MASK]; tl1 = pmap_load(l1); if (tl1 == 0) { /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { --m->wire_count; /* XXX: release mem barrier? */ atomic_subtract_int( &vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); l2pg->wire_count++; } } l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); l2 = &l2[ptepindex & Ln_ADDR_MASK]; pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); PTE_SYNC(l2); } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *pde, tpde; vm_page_t m; int lvl; /* * Calculate pagetable page index */ ptepindex = pmap_l2_pindex(va); retry: /* * Get the page directory entry */ pde = pmap_pde(pmap, va, &lvl); /* * If the page table page is mapped, we just increment the hold count, * and activate it. If we get a level 2 pde it will point to a level 3 * table. */ if (lvl == 2) { tpde = pmap_load(pde); if (tpde != 0) { m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); m->wire_count++; return (m); } } /* * Here if the pte page isn't mapped, or if it has been deallocated. */ m = _pmap_alloc_l3(pmap, ptepindex, lockp); if (m == NULL && lockp != NULL) goto retry; return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); } #if 0 static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); #endif /* 0 */ /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *l0, *l1, *l2; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { l0 = pmap_l0(kernel_pmap, kernel_vm_end); KASSERT(pmap_load(l0) != 0, ("pmap_growkernel: No level 0 kernel entry")); l1 = pmap_l0_to_l1(l0, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pmap_load_store(l1, paddr | L1_TABLE); PTE_SYNC(l1); continue; /* try again */ } l2 = pmap_l1_to_l2(l1, kernel_vm_end); if ((pmap_load(l2) & ATTR_AF) != 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pmap_load_store(l2, paddr | L2_TABLE); PTE_SYNC(l2); pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #if 0 #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif #endif /* 0 */ /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { panic("ARM64TODO: reclaim_pv_chunk"); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffsl(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * pmap_remove_l3: do the things to unmap a page in a process */ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { pt_entry_t old_l3; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); old_l3 = pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); if (old_l3 & ATTR_SW_WIRED) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (old_l3 & ATTR_SW_MANAGED) { m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); if (pmap_page_dirty(old_l3)) vm_page_dirty(m); if (old_l3 & ATTR_AF) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); } return (pmap_unuse_l3(pmap, va, l2e, free)); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; vm_offset_t va, va_next; pd_entry_t *l0, *l1, *l2; pt_entry_t l3_paddr, *l3; struct spglist free; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; l0 = pmap_l0(pmap, sva); if (pmap_load(l0) == 0) { va_next = (sva + L0_SIZE) & ~L0_OFFSET; if (va_next < sva) va_next = eva; continue; } l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; l3_paddr = pmap_load(l2); /* * Weed out invalid mappings. */ if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) continue; /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (l3 == NULL) panic("l3 == NULL"); if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if (va == va_next) va = sva; if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, &lock)) { sva += L3_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (lock != NULL) rw_wunlock(lock); if (anyvalid) pmap_invalidate_all(pmap); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { pv_entry_t pv; pmap_t pmap; pd_entry_t *pde, tpde; pt_entry_t *pte, tpte; struct spglist free; int lvl; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, pv->pv_va, &lvl); KASSERT(pde != NULL, ("pmap_remove_all: no page directory entry found")); KASSERT(lvl == 2, ("pmap_remove_all: invalid pde level %d", lvl)); tpde = pmap_load(pde); pte = pmap_l2_to_l3(pde, pv->pv_va); tpte = pmap_load(pte); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); pmap_load_clear(pte); PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); if (tpte & ATTR_SW_WIRED) pmap->pm_stats.wired_count--; if ((tpte & ATTR_AF) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if (pmap_page_dirty(tpte)) vm_page_dirty(m); pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); pmap_free_zero_pages(&free); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va, va_next; pd_entry_t *l0, *l1, *l2; pt_entry_t *l3p, l3; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) return; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l0 = pmap_l0(pmap, sva); if (pmap_load(l0) == 0) { va_next = (sva + L0_SIZE) & ~L0_OFFSET; if (va_next < sva) va_next = eva; continue; } l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) continue; if (va_next > eva) va_next = eva; va = va_next; for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, sva += L3_SIZE) { l3 = pmap_load(l3p); if (pmap_l3_valid(l3)) { pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); PTE_SYNC(l3p); /* XXX: Use pmap_invalidate_range */ pmap_invalidate_page(pmap, va); } } } PMAP_UNLOCK(pmap); /* TODO: Only invalidate entries we are touching */ pmap_invalidate_all(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind __unused) { struct rwlock *lock; pd_entry_t *pde; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; vm_page_t mpte, om, l1_m, l2_m, l3_m; boolean_t nosleep; int lvl; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | L3_PAGE); if ((prot & VM_PROT_WRITE) == 0) new_l3 |= ATTR_AP(ATTR_AP_RO); if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= ATTR_SW_WIRED; if ((va >> 63) == 0) new_l3 |= ATTR_AP(ATTR_AP_USER); CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); mpte = NULL; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } pde = pmap_pde(pmap, va, &lvl); KASSERT(pde != NULL, ("pmap_enter: Invalid page entry, va: 0x%lx", va)); KASSERT(lvl == 2, ("pmap_enter: Invalid level %d", lvl)); l3 = pmap_l2_to_l3(pde, va); } else { pde = pmap_pde(pmap, va, &lvl); /* * If we get a level 2 pde it must point to a level 3 entry * otherwise we will need to create the intermediate tables */ if (lvl < 2) { switch(lvl) { default: case -1: /* Get the l0 pde to update */ pde = pmap_l0(pmap, va); KASSERT(pde != NULL, ("...")); l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l1_m == NULL) panic("pmap_enter: l1 pte_m == NULL"); if ((l1_m->flags & PG_ZERO) == 0) pmap_zero_page(l1_m); l1_pa = VM_PAGE_TO_PHYS(l1_m); pmap_load_store(pde, l1_pa | L0_TABLE); PTE_SYNC(pde); /* FALLTHROUGH */ case 0: /* Get the l1 pde to update */ pde = pmap_l1_to_l2(pde, va); KASSERT(pde != NULL, ("...")); l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l2_m == NULL) panic("pmap_enter: l2 pte_m == NULL"); if ((l2_m->flags & PG_ZERO) == 0) pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); pmap_load_store(pde, l2_pa | L1_TABLE); PTE_SYNC(pde); /* FALLTHROUGH */ case 1: /* Get the l2 pde to update */ pde = pmap_l1_to_l2(pde, va); l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l3_m == NULL) panic("pmap_enter: l3 pte_m == NULL"); if ((l3_m->flags & PG_ZERO) == 0) pmap_zero_page(l3_m); l3_pa = VM_PAGE_TO_PHYS(l3_m); pmap_load_store(pde, l3_pa | L2_TABLE); PTE_SYNC(pde); break; } } l3 = pmap_l2_to_l3(pde, va); pmap_invalidate_page(pmap, va); } om = NULL; orig_l3 = pmap_load(l3); opa = orig_l3 & ~ATTR_MASK; /* * Is the specified virtual address already mapped? */ if (pmap_l3_valid(orig_l3)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((flags & PMAP_ENTER_WIRED) != 0 && (orig_l3 & ATTR_SW_WIRED) == 0) pmap->pm_stats.wired_count++; else if ((flags & PMAP_ENTER_WIRED) == 0 && (orig_l3 & ATTR_SW_WIRED) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((orig_l3 & ATTR_SW_MANAGED) != 0) { new_l3 |= ATTR_SW_MANAGED; if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == ATTR_AP(ATTR_AP_RW)) { vm_page_aflag_set(m, PGA_WRITEABLE); } } goto validate; } /* Flush the cache, there might be uncommitted data in it */ if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) cpu_dcache_wb_range(va, L3_SIZE); } else { /* * Increment the counters. */ if ((new_l3 & ATTR_SW_WIRED) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { new_l3 |= ATTR_SW_MANAGED; pv = get_pv_entry(pmap, &lock); pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * Update the L3 entry. */ if (orig_l3 != 0) { validate: orig_l3 = pmap_load_store(l3, new_l3); PTE_SYNC(l3); opa = orig_l3 & ~ATTR_MASK; if (opa != pa) { if ((orig_l3 & ATTR_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); if (pmap_page_dirty(orig_l3)) vm_page_dirty(om); if ((orig_l3 & ATTR_AF) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pmap_pvh_free(&om->md, pmap, va); } } else if (pmap_page_dirty(orig_l3)) { if ((orig_l3 & ATTR_SW_MANAGED) != 0) vm_page_dirty(m); } } else { pmap_load_store(l3, new_l3); PTE_SYNC(l3); } pmap_invalidate_page(pmap, va); if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; pd_entry_t *pde; pt_entry_t *l3; vm_paddr_t pa; int lvl; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t l2pindex; /* * Calculate pagetable page index */ l2pindex = pmap_l2_pindex(va); if (mpte && (mpte->pindex == l2pindex)) { mpte->wire_count++; } else { /* * Get the l2 entry */ pde = pmap_pde(pmap, va, &lvl); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ if (lvl == 2 && pmap_load(pde) != 0) { mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); mpte->wire_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); if (mpte == NULL) return (mpte); } } l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; pde = pmap_pde(kernel_pmap, va, &lvl); KASSERT(pde != NULL, ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", va)); KASSERT(lvl == 2, ("pmap_enter_quick_locked: Invalid level %d", lvl)); l3 = pmap_l2_to_l3(pde, va); } if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_l3(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RW) | L3_PAGE; /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) == 0) pa |= ATTR_SW_MANAGED; pmap_load_store(l3, pa); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); return (mpte); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; boolean_t pv_lists_locked; pv_lists_locked = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l0 = pmap_l0(pmap, sva); if (pmap_load(l0) == 0) { va_next = (sva + L0_SIZE) & ~L0_OFFSET; if (va_next < sva) va_next = eva; continue; } l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (pmap_load(l2) == 0) continue; if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (pmap_load(l3) == 0) continue; if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ atomic_clear_long(l3, ATTR_SW_WIRED); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t m_a, m_b; vm_paddr_t p_a, p_b; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; m_a = ma[a_offset >> PAGE_SHIFT]; p_a = m_a->phys_addr; b_pg_offset = b_offset & PAGE_MASK; m_b = mb[b_offset >> PAGE_SHIFT]; p_b = m_b->phys_addr; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); if (__predict_false(!PHYS_IN_DMAP(p_a))) { panic("!DMAP a %lx", p_a); } else { a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; } if (__predict_false(!PHYS_IN_DMAP(p_b))) { panic("!DMAP b %lx", p_b); } else { b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; } bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } void pmap_quick_remove_page(vm_offset_t addr) { } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; int count, lvl, md_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pte(pmap, pv->pv_va, &lvl); if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. */ void pmap_remove_pages(pmap_t pmap) { pd_entry_t *pde; pt_entry_t *pte, tpte; struct spglist free; vm_page_t m; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx, lvl; vm_paddr_t pa; lock = NULL; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = ffsl(inuse) - 1; bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pde = pmap_pde(pmap, pv->pv_va, &lvl); KASSERT(pde != NULL, ("Attempting to remove an unmapped page")); KASSERT(lvl == 2, ("Invalid page directory level: %d", lvl)); pte = pmap_l2_to_l3(pde, pv->pv_va); KASSERT(pte != NULL, ("Attempting to remove an unmapped page")); tpte = pmap_load(pte); /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & ATTR_SW_WIRED) { allfree = 0; continue; } pa = tpte & ~ATTR_MASK; m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad pte %#jx", (uintmax_t)tpte)); /* XXX: assumes tpte is level 3 */ if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); pmap_load_clear(pte); PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) vm_page_dirty(m); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } pmap_invalidate_all(pmap); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * This is used to check if a page has been accessed or modified. As we * don't have a bit to see if it has been modified we have to assume it * has been if the page is read/write. */ static boolean_t pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct rwlock *lock; pv_entry_t pv; pt_entry_t *pte, mask, value; pmap_t pmap; int lvl, md_gen; boolean_t rv; rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pte(pmap, pv->pv_va, &lvl); KASSERT(lvl == 3, ("pmap_page_test_mappings: Invalid level %d", lvl)); mask = 0; value = 0; if (modified) { mask |= ATTR_AP_RW_BIT; value |= ATTR_AP(ATTR_AP_RW); } if (accessed) { mask |= ATTR_AF | ATTR_DESCR_MASK; value |= ATTR_AF | L3_PAGE; } rv = (pmap_load(pte) & mask) == value; PMAP_UNLOCK(pmap); if (rv) goto out; } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pt_entry_t *pte; boolean_t rv; int lvl; rv = FALSE; PMAP_LOCK(pmap); pte = pmap_pte(pmap, addr, &lvl); if (pte != NULL && pmap_load(pte) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { pmap_t pmap; struct rwlock *lock; pv_entry_t pv; pt_entry_t oldpte, *pte; int lvl, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } pte = pmap_pte(pmap, pv->pv_va, &lvl); retry: oldpte = pmap_load(pte); if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { if (!atomic_cmpset_long(pte, oldpte, oldpte | ATTR_AP(ATTR_AP_RO))) goto retry; if ((oldpte & ATTR_AF) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); rw_runlock(&pvh_global_lock); } static __inline boolean_t safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) { return (FALSE); } #define PMAP_TS_REFERENCED_MAX 5 /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; pd_entry_t *pde, tpde; pt_entry_t *pte, tpte; vm_paddr_t pa; int cleared, md_gen, not_cleared, lvl; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } pde = pmap_pde(pmap, pv->pv_va, &lvl); KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); KASSERT(lvl == 2, ("pmap_ts_referenced: invalid pde level %d", lvl)); tpde = pmap_load(pde); KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, ("pmap_ts_referenced: found an invalid l2 table")); pte = pmap_l2_to_l3(pde, pv->pv_va); tpte = pmap_load(pte); if ((tpte & ATTR_AF) != 0) { if (safe_to_clear_referenced(pmap, tpte)) { /* * TODO: We don't handle the access flag * at all. We need to be able to set it in * the exception handler. */ panic("ARM64TODO: safe_to_clear_referenced\n"); } else if ((tpte & ATTR_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ pmap_remove_l3(pmap, pte, pv->pv_va, tpde, &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; pv = NULL; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); pmap_free_zero_pages(&free); return (cleared + not_cleared); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; /* ARM64TODO: We lack support for tracking if a page is modified */ } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return ((void *)PHYS_TO_DMAP(pa)); } void pmap_unmapbios(vm_paddr_t pa, vm_size_t size) { } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pv_memattr = ma; /* * ARM64TODO: Implement the below (from the amd64 pmap) * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) panic("ARM64TODO: pmap_page_set_memattr"); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *l1p, l1; pd_entry_t *l2p, l2; pt_entry_t *l3p, l3; vm_paddr_t pa; bool managed; int val; PMAP_LOCK(pmap); retry: pa = 0; val = 0; managed = false; l1p = pmap_l1(pmap, addr); if (l1p == NULL) /* No l1 */ goto done; l1 = pmap_load(l1p); if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) goto done; if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; val = MINCORE_SUPER | MINCORE_INCORE; if (pmap_page_dirty(l1)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((l1 & ATTR_AF) == ATTR_AF) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; goto done; } l2p = pmap_l1_to_l2(l1p, addr); if (l2p == NULL) /* No l2 */ goto done; l2 = pmap_load(l2p); if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) goto done; if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; val = MINCORE_SUPER | MINCORE_INCORE; if (pmap_page_dirty(l2)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((l2 & ATTR_AF) == ATTR_AF) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; goto done; } l3p = pmap_l2_to_l3(l2p, addr); if (l3p == NULL) /* No l3 */ goto done; l3 = pmap_load(l2p); if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) goto done; if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; val = MINCORE_INCORE; if (pmap_page_dirty(l3)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((l3 & ATTR_AF) == ATTR_AF) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } done: if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_activate(struct thread *td) { pmap_t pmap; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); pmap_invalidate_all(pmap); critical_exit(); } void pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) { if (va >= VM_MIN_KERNEL_ADDRESS) { cpu_icache_sync_range(va, sz); } else { u_int len, offset; vm_paddr_t pa; /* Find the length of data in this page to flush */ offset = va & PAGE_MASK; len = imin(PAGE_SIZE - offset, sz); while (sz != 0) { /* Extract the physical address & find it in the DMAP */ pa = pmap_extract(pmap, va); if (pa != 0) cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); /* Move to the next page */ sz -= len; va += len; /* Set the length for the next iteration */ len = imin(PAGE_SIZE, sz); } } } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; int error, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(!PHYS_IN_DMAP(paddr))) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (!PHYS_IN_DMAP(paddr)) { panic( "pmap_map_io_transient: TODO: Map out of DMAP data"); } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (!PHYS_IN_DMAP(paddr)) { panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); } } } Index: user/alc/PQ_LAUNDRY/sys/arm64/include/armreg.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm64/include/armreg.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/arm64/include/armreg.h (revision 303642) @@ -1,411 +1,458 @@ /*- * Copyright (c) 2013, 2014 Andrew Turner * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ARMREG_H_ #define _MACHINE_ARMREG_H_ #define INSN_SIZE 4 #define READ_SPECIALREG(reg) \ ({ uint64_t val; \ __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (val)); \ val; \ }) #define WRITE_SPECIALREG(reg, val) \ __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)val)) /* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */ #define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */ #define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */ #define CNTHCTL_EVNTEN (1 << 2) /* Enable event stream */ #define CNTHCTL_EL1PCEN (1 << 1) /* Allow EL0/1 physical timer access */ #define CNTHCTL_EL1PCTEN (1 << 0) /*Allow EL0/1 physical counter access*/ /* CPACR_EL1 */ #define CPACR_FPEN_MASK (0x3 << 20) #define CPACR_FPEN_TRAP_ALL1 (0x0 << 20) /* Traps from EL0 and EL1 */ #define CPACR_FPEN_TRAP_EL0 (0x1 << 20) /* Traps from EL0 */ #define CPACR_FPEN_TRAP_ALL2 (0x2 << 20) /* Traps from EL0 and EL1 */ #define CPACR_FPEN_TRAP_NONE (0x3 << 20) /* No traps */ #define CPACR_TTA (0x1 << 28) /* CTR_EL0 - Cache Type Register */ #define CTR_DLINE_SHIFT 16 #define CTR_DLINE_MASK (0xf << CTR_DLINE_SHIFT) #define CTR_DLINE_SIZE(reg) (((reg) & CTR_DLINE_MASK) >> CTR_DLINE_SHIFT) #define CTR_ILINE_SHIFT 0 #define CTR_ILINE_MASK (0xf << CTR_ILINE_SHIFT) #define CTR_ILINE_SIZE(reg) (((reg) & CTR_ILINE_MASK) >> CTR_ILINE_SHIFT) /* DCZID_EL0 - Data Cache Zero ID register */ #define DCZID_DZP (1 << 4) /* DC ZVA prohibited if non-0 */ #define DCZID_BS_SHIFT 0 #define DCZID_BS_MASK (0xf << DCZID_BS_SHIFT) #define DCZID_BS_SIZE(reg) (((reg) & DCZID_BS_MASK) >> DCZID_BS_SHIFT) /* ESR_ELx */ #define ESR_ELx_ISS_MASK 0x00ffffff #define ISS_INSN_FnV (0x01 << 10) #define ISS_INSN_EA (0x01 << 9) #define ISS_INSN_S1PTW (0x01 << 7) #define ISS_INSN_IFSC_MASK (0x1f << 0) #define ISS_DATA_ISV (0x01 << 24) #define ISS_DATA_SAS_MASK (0x03 << 22) #define ISS_DATA_SSE (0x01 << 21) #define ISS_DATA_SRT_MASK (0x1f << 16) #define ISS_DATA_SF (0x01 << 15) #define ISS_DATA_AR (0x01 << 14) #define ISS_DATA_FnV (0x01 << 10) #define ISS_DATa_EA (0x01 << 9) #define ISS_DATa_CM (0x01 << 8) #define ISS_INSN_S1PTW (0x01 << 7) #define ISS_DATa_WnR (0x01 << 6) #define ISS_DATA_DFSC_MASK (0x1f << 0) +#define ISS_DATA_DFSC_ASF_L0 (0x00 << 0) +#define ISS_DATA_DFSC_ASF_L1 (0x01 << 0) +#define ISS_DATA_DFSC_ASF_L2 (0x02 << 0) +#define ISS_DATA_DFSC_ASF_L3 (0x03 << 0) +#define ISS_DATA_DFSC_TF_L0 (0x04 << 0) +#define ISS_DATA_DFSC_TF_L1 (0x05 << 0) +#define ISS_DATA_DFSC_TF_L2 (0x06 << 0) +#define ISS_DATA_DFSC_TF_L3 (0x07 << 0) +#define ISS_DATA_DFSC_AFF_L1 (0x09 << 0) +#define ISS_DATA_DFSC_AFF_L2 (0x0a << 0) +#define ISS_DATA_DFSC_AFF_L3 (0x0b << 0) +#define ISS_DATA_DFSC_PF_L1 (0x0d << 0) +#define ISS_DATA_DFSC_PF_L2 (0x0e << 0) +#define ISS_DATA_DFSC_PF_L3 (0x0f << 0) +#define ISS_DATA_DFSC_EXT (0x10 << 0) +#define ISS_DATA_DFSC_EXT_L0 (0x14 << 0) +#define ISS_DATA_DFSC_EXT_L1 (0x15 << 0) +#define ISS_DATA_DFSC_EXT_L2 (0x16 << 0) +#define ISS_DATA_DFSC_EXT_L3 (0x17 << 0) +#define ISS_DATA_DFSC_ECC (0x18 << 0) +#define ISS_DATA_DFSC_ECC_L0 (0x1c << 0) +#define ISS_DATA_DFSC_ECC_L1 (0x1d << 0) +#define ISS_DATA_DFSC_ECC_L2 (0x1e << 0) +#define ISS_DATA_DFSC_ECC_L3 (0x1f << 0) +#define ISS_DATA_DFSC_ALIGN (0x21 << 0) +#define ISS_DATA_DFSC_TLB_CONFLICT (0x28 << 0) #define ESR_ELx_IL (0x01 << 25) #define ESR_ELx_EC_SHIFT 26 #define ESR_ELx_EC_MASK (0x3f << 26) #define ESR_ELx_EXCEPTION(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define EXCP_UNKNOWN 0x00 /* Unkwn exception */ #define EXCP_FP_SIMD 0x07 /* VFP/SIMD trap */ #define EXCP_ILL_STATE 0x0e /* Illegal execution state */ #define EXCP_SVC 0x15 /* SVC trap */ #define EXCP_MSR 0x18 /* MSR/MRS trap */ #define EXCP_INSN_ABORT_L 0x20 /* Instruction abort, from lower EL */ #define EXCP_INSN_ABORT 0x21 /* Instruction abort, from same EL */ #define EXCP_PC_ALIGN 0x22 /* PC alignment fault */ #define EXCP_DATA_ABORT_L 0x24 /* Data abort, from lower EL */ #define EXCP_DATA_ABORT 0x25 /* Data abort, from same EL */ #define EXCP_SP_ALIGN 0x26 /* SP slignment fault */ #define EXCP_TRAP_FP 0x2c /* Trapped FP exception */ #define EXCP_SERROR 0x2f /* SError interrupt */ #define EXCP_SOFTSTP_EL0 0x32 /* Software Step, from lower EL */ #define EXCP_SOFTSTP_EL1 0x33 /* Software Step, from same EL */ #define EXCP_WATCHPT_EL1 0x35 /* Watchpoint, from same EL */ #define EXCP_BRK 0x3c /* Breakpoint */ /* ICC_CTLR_EL1 */ #define ICC_CTLR_EL1_EOIMODE (1U << 1) /* ICC_IAR1_EL1 */ #define ICC_IAR1_EL1_SPUR (0x03ff) /* ICC_IGRPEN0_EL1 */ #define ICC_IGRPEN0_EL1_EN (1U << 0) /* ICC_PMR_EL1 */ #define ICC_PMR_EL1_PRIO_MASK (0xFFUL) /* ICC_SGI1R_EL1 */ #define ICC_SGI1R_EL1_TL_MASK 0xffffUL #define ICC_SGI1R_EL1_AFF1_SHIFT 16 #define ICC_SGI1R_EL1_SGIID_SHIFT 24 #define ICC_SGI1R_EL1_AFF2_SHIFT 32 #define ICC_SGI1R_EL1_AFF3_SHIFT 48 #define ICC_SGI1R_EL1_SGIID_MASK 0xfUL #define ICC_SGI1R_EL1_IRM (0x1UL << 40) /* ICC_SRE_EL1 */ #define ICC_SRE_EL1_SRE (1U << 0) /* ICC_SRE_EL2 */ #define ICC_SRE_EL2_SRE (1U << 0) #define ICC_SRE_EL2_EN (1U << 3) /* ID_AA64DFR0_EL1 */ #define ID_AA64DFR0_MASK 0xf0f0ffff #define ID_AA64DFR0_DEBUG_VER_SHIFT 0 #define ID_AA64DFR0_DEBUG_VER_MASK (0xf << ID_AA64DFR0_DEBUG_VER_SHIFT) #define ID_AA64DFR0_DEBUG_VER(x) ((x) & ID_AA64DFR0_DEBUG_VER_MASK) #define ID_AA64DFR0_DEBUG_VER_8 (0x6 << ID_AA64DFR0_DEBUG_VER_SHIFT) #define ID_AA64DFR0_TRACE_VER_SHIFT 4 #define ID_AA64DFR0_TRACE_VER_MASK (0xf << ID_AA64DFR0_TRACE_VER_SHIFT) #define ID_AA64DFR0_TRACE_VER(x) ((x) & ID_AA64DFR0_TRACE_VER_MASK) #define ID_AA64DFR0_TRACE_VER_NONE (0x0 << ID_AA64DFR0_TRACE_VER_SHIFT) #define ID_AA64DFR0_TRACE_VER_IMPL (0x1 << ID_AA64DFR0_TRACE_VER_SHIFT) #define ID_AA64DFR0_PMU_VER_SHIFT 8 #define ID_AA64DFR0_PMU_VER_MASK (0xf << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_PMU_VER(x) ((x) & ID_AA64DFR0_PMU_VER_MASK) #define ID_AA64DFR0_PMU_VER_NONE (0x0 << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_PMU_VER_3 (0x1 << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_PMU_VER_IMPL (0xf << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_BRPS_SHIFT 12 #define ID_AA64DFR0_BRPS_MASK (0xf << ID_AA64DFR0_BRPS_SHIFT) #define ID_AA64DFR0_BRPS(x) \ ((((x) >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) + 1) #define ID_AA64DFR0_WRPS_SHIFT 20 #define ID_AA64DFR0_WRPS_MASK (0xf << ID_AA64DFR0_WRPS_SHIFT) #define ID_AA64DFR0_WRPS(x) \ ((((x) >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) + 1) #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_CTX_CMPS_MASK (0xf << ID_AA64DFR0_CTX_CMPS_SHIFT) #define ID_AA64DFR0_CTX_CMPS(x) \ ((((x) >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) + 1) /* ID_AA64ISAR0_EL1 */ #define ID_AA64ISAR0_MASK 0x000ffff0 #define ID_AA64ISAR0_AES_SHIFT 4 #define ID_AA64ISAR0_AES_MASK (0xf << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES(x) ((x) & ID_AA64ISAR0_AES_MASK) #define ID_AA64ISAR0_AES_NONE (0x0 << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES_BASE (0x1 << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES_PMULL (0x2 << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_SHA1_MASK (0xf << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA1(x) ((x) & ID_AA64ISAR0_SHA1_MASK) #define ID_AA64ISAR0_SHA1_NONE (0x0 << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA1_BASE (0x1 << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA2_SHIFT 12 #define ID_AA64ISAR0_SHA2_MASK (0xf << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_SHA2(x) ((x) & ID_AA64ISAR0_SHA2_MASK) #define ID_AA64ISAR0_SHA2_NONE (0x0 << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_SHA2_BASE (0x1 << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_CRC32_SHIFT 16 #define ID_AA64ISAR0_CRC32_MASK (0xf << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_CRC32(x) ((x) & ID_AA64ISAR0_CRC32_MASK) #define ID_AA64ISAR0_CRC32_NONE (0x0 << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_CRC32_BASE (0x1 << ID_AA64ISAR0_CRC32_SHIFT) /* ID_AA64MMFR0_EL1 */ #define ID_AA64MMFR0_MASK 0xffffffff #define ID_AA64MMFR0_PA_RANGE_SHIFT 0 #define ID_AA64MMFR0_PA_RANGE_MASK (0xf << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE(x) ((x) & ID_AA64MMFR0_PA_RANGE_MASK) #define ID_AA64MMFR0_PA_RANGE_4G (0x0 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_64G (0x1 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_1T (0x2 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_4T (0x3 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_16T (0x4 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_256T (0x5 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_ASID_BITS_SHIFT 4 #define ID_AA64MMFR0_ASID_BITS_MASK (0xf << ID_AA64MMFR0_ASID_BITS_SHIFT) #define ID_AA64MMFR0_ASID_BITS(x) ((x) & ID_AA64MMFR0_ASID_BITS_MASK) #define ID_AA64MMFR0_ASID_BITS_8 (0x0 << ID_AA64MMFR0_ASID_BITS_SHIFT) #define ID_AA64MMFR0_ASID_BITS_16 (0x2 << ID_AA64MMFR0_ASID_BITS_SHIFT) #define ID_AA64MMFR0_BIGEND_SHIFT 8 #define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) #define ID_AA64MMFR0_BIGEND(x) ((x) & ID_AA64MMFR0_BIGEND_MASK) #define ID_AA64MMFR0_BIGEND_FIXED (0x0 << ID_AA64MMFR0_BIGEND_SHIFT) #define ID_AA64MMFR0_BIGEND_MIXED (0x1 << ID_AA64MMFR0_BIGEND_SHIFT) #define ID_AA64MMFR0_S_NS_MEM_SHIFT 12 #define ID_AA64MMFR0_S_NS_MEM_MASK (0xf << ID_AA64MMFR0_S_NS_MEM_SHIFT) #define ID_AA64MMFR0_S_NS_MEM(x) ((x) & ID_AA64MMFR0_S_NS_MEM_MASK) #define ID_AA64MMFR0_S_NS_MEM_NONE (0x0 << ID_AA64MMFR0_S_NS_MEM_SHIFT) #define ID_AA64MMFR0_S_NS_MEM_DISTINCT (0x1 << ID_AA64MMFR0_S_NS_MEM_SHIFT) #define ID_AA64MMFR0_BIGEND_EL0_SHIFT 16 #define ID_AA64MMFR0_BIGEND_EL0_MASK (0xf << ID_AA64MMFR0_BIGEND_EL0_SHIFT) #define ID_AA64MMFR0_BIGEND_EL0(x) ((x) & ID_AA64MMFR0_BIGEND_EL0_MASK) #define ID_AA64MMFR0_BIGEND_EL0_FIXED (0x0 << ID_AA64MMFR0_BIGEND_EL0_SHIFT) #define ID_AA64MMFR0_BIGEND_EL0_MIXED (0x1 << ID_AA64MMFR0_BIGEND_EL0_SHIFT) #define ID_AA64MMFR0_TGRAN16_SHIFT 20 #define ID_AA64MMFR0_TGRAN16_MASK (0xf << ID_AA64MMFR0_TGRAN16_SHIFT) #define ID_AA64MMFR0_TGRAN16(x) ((x) & ID_AA64MMFR0_TGRAN16_MASK) #define ID_AA64MMFR0_TGRAN16_NONE (0x0 << ID_AA64MMFR0_TGRAN16_SHIFT) #define ID_AA64MMFR0_TGRAN16_IMPL (0x1 << ID_AA64MMFR0_TGRAN16_SHIFT) #define ID_AA64MMFR0_TGRAN64_SHIFT 24 #define ID_AA64MMFR0_TGRAN64_MASK (0xf << ID_AA64MMFR0_TGRAN64_SHIFT) #define ID_AA64MMFR0_TGRAN64(x) ((x) & ID_AA64MMFR0_TGRAN64_MASK) #define ID_AA64MMFR0_TGRAN64_IMPL (0x0 << ID_AA64MMFR0_TGRAN64_SHIFT) #define ID_AA64MMFR0_TGRAN64_NONE (0xf << ID_AA64MMFR0_TGRAN64_SHIFT) #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN4_MASK (0xf << ID_AA64MMFR0_TGRAN4_SHIFT) #define ID_AA64MMFR0_TGRAN4(x) ((x) & ID_AA64MMFR0_TGRAN4_MASK) #define ID_AA64MMFR0_TGRAN4_IMPL (0x0 << ID_AA64MMFR0_TGRAN4_SHIFT) #define ID_AA64MMFR0_TGRAN4_NONE (0xf << ID_AA64MMFR0_TGRAN4_SHIFT) /* ID_AA64PFR0_EL1 */ #define ID_AA64PFR0_MASK 0x0fffffff #define ID_AA64PFR0_EL0_SHIFT 0 #define ID_AA64PFR0_EL0_MASK (0xf << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL0(x) ((x) & ID_AA64PFR0_EL0_MASK) #define ID_AA64PFR0_EL0_64 (1 << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL0_64_32 (2 << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL1_SHIFT 4 #define ID_AA64PFR0_EL1_MASK (0xf << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL1(x) ((x) & ID_AA64PFR0_EL1_MASK) #define ID_AA64PFR0_EL1_64 (1 << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL1_64_32 (2 << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL2_SHIFT 8 #define ID_AA64PFR0_EL2_MASK (0xf << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2(x) ((x) & ID_AA64PFR0_EL2_MASK) #define ID_AA64PFR0_EL2_NONE (0 << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2_64 (1 << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2_64_32 (2 << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL3_SHIFT 12 #define ID_AA64PFR0_EL3_MASK (0xf << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3(x) ((x) & ID_AA64PFR0_EL3_MASK) #define ID_AA64PFR0_EL3_NONE (0 << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3_64 (1 << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3_64_32 (2 << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_FP_SHIFT 16 #define ID_AA64PFR0_FP_MASK (0xf << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_FP(x) ((x) & ID_AA64PFR0_FP_MASK) #define ID_AA64PFR0_FP_IMPL (0x0 << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_FP_NONE (0xf << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_ADV_SIMD_SHIFT 20 #define ID_AA64PFR0_ADV_SIMD_MASK (0xf << ID_AA64PFR0_ADV_SIMD_SHIFT) #define ID_AA64PFR0_ADV_SIMD(x) ((x) & ID_AA64PFR0_ADV_SIMD_MASK) #define ID_AA64PFR0_ADV_SIMD_IMPL (0x0 << ID_AA64PFR0_ADV_SIMD_SHIFT) #define ID_AA64PFR0_ADV_SIMD_NONE (0xf << ID_AA64PFR0_ADV_SIMD_SHIFT) #define ID_AA64PFR0_GIC_BITS 0x4 /* Number of bits in GIC field */ #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_GIC_MASK (0xf << ID_AA64PFR0_GIC_SHIFT) #define ID_AA64PFR0_GIC(x) ((x) & ID_AA64PFR0_GIC_MASK) #define ID_AA64PFR0_GIC_CPUIF_NONE (0x0 << ID_AA64PFR0_GIC_SHIFT) #define ID_AA64PFR0_GIC_CPUIF_EN (0x1 << ID_AA64PFR0_GIC_SHIFT) /* MAIR_EL1 - Memory Attribute Indirection Register */ #define MAIR_ATTR_MASK(idx) (0xff << ((n)* 8)) #define MAIR_ATTR(attr, idx) ((attr) << ((idx) * 8)) + +/* PAR_EL1 - Physical Address Register */ +#define PAR_F_SHIFT 0 +#define PAR_F (0x1 << PAR_F_SHIFT) +#define PAR_SUCCESS(x) (((x) & PAR_F) == 0) +/* When PAR_F == 0 (success) */ +#define PAR_SH_SHIFT 7 +#define PAR_SH_MASK (0x3 << PAR_SH_SHIFT) +#define PAR_NS_SHIFT 9 +#define PAR_NS_MASK (0x3 << PAR_NS_SHIFT) +#define PAR_PA_SHIFT 12 +#define PAR_PA_MASK 0x0000fffffffff000 +#define PAR_ATTR_SHIFT 56 +#define PAR_ATTR_MASK (0xff << PAR_ATTR_SHIFT) +/* When PAR_F == 1 (aborted) */ +#define PAR_FST_SHIFT 1 +#define PAR_FST_MASK (0x3f << PAR_FST_SHIFT) +#define PAR_PTW_SHIFT 8 +#define PAR_PTW_MASK (0x1 << PAR_PTW_SHIFT) +#define PAR_S_SHIFT 9 +#define PAR_S_MASK (0x1 << PAR_S_SHIFT) /* SCTLR_EL1 - System Control Register */ #define SCTLR_RES0 0xc8222400 /* Reserved, write 0 */ #define SCTLR_RES1 0x30d00800 /* Reserved, write 1 */ #define SCTLR_M 0x00000001 #define SCTLR_A 0x00000002 #define SCTLR_C 0x00000004 #define SCTLR_SA 0x00000008 #define SCTLR_SA0 0x00000010 #define SCTLR_CP15BEN 0x00000020 #define SCTLR_THEE 0x00000040 #define SCTLR_ITD 0x00000080 #define SCTLR_SED 0x00000100 #define SCTLR_UMA 0x00000200 #define SCTLR_I 0x00001000 #define SCTLR_DZE 0x00004000 #define SCTLR_UCT 0x00008000 #define SCTLR_nTWI 0x00010000 #define SCTLR_nTWE 0x00040000 #define SCTLR_WXN 0x00080000 #define SCTLR_EOE 0x01000000 #define SCTLR_EE 0x02000000 #define SCTLR_UCI 0x04000000 /* SPSR_EL1 */ /* * When the exception is taken in AArch64: * M[4] is 0 for AArch64 mode * M[3:2] is the exception level * M[1] is unused * M[0] is the SP select: * 0: always SP0 * 1: current ELs SP */ #define PSR_M_EL0t 0x00000000 #define PSR_M_EL1t 0x00000004 #define PSR_M_EL1h 0x00000005 #define PSR_M_EL2t 0x00000008 #define PSR_M_EL2h 0x00000009 #define PSR_M_MASK 0x0000001f #define PSR_F 0x00000040 #define PSR_I 0x00000080 #define PSR_A 0x00000100 #define PSR_D 0x00000200 #define PSR_IL 0x00100000 #define PSR_SS 0x00200000 #define PSR_V 0x10000000 #define PSR_C 0x20000000 #define PSR_Z 0x40000000 #define PSR_N 0x80000000 /* TCR_EL1 - Translation Control Register */ #define TCR_ASID_16 (1 << 36) #define TCR_IPS_SHIFT 32 #define TCR_IPS_32BIT (0 << TCR_IPS_SHIFT) #define TCR_IPS_36BIT (1 << TCR_IPS_SHIFT) #define TCR_IPS_40BIT (2 << TCR_IPS_SHIFT) #define TCR_IPS_42BIT (3 << TCR_IPS_SHIFT) #define TCR_IPS_44BIT (4 << TCR_IPS_SHIFT) #define TCR_IPS_48BIT (5 << TCR_IPS_SHIFT) #define TCR_TG1_SHIFT 30 #define TCR_TG1_16K (1 << TCR_TG1_SHIFT) #define TCR_TG1_4K (2 << TCR_TG1_SHIFT) #define TCR_TG1_64K (3 << TCR_TG1_SHIFT) #define TCR_SH1_SHIFT 28 #define TCR_SH1_IS (0x3UL << TCR_SH1_SHIFT) #define TCR_ORGN1_SHIFT 26 #define TCR_ORGN1_WBWA (0x1UL << TCR_ORGN1_SHIFT) #define TCR_IRGN1_SHIFT 24 #define TCR_IRGN1_WBWA (0x1UL << TCR_IRGN1_SHIFT) #define TCR_SH0_SHIFT 12 #define TCR_SH0_IS (0x3UL << TCR_SH0_SHIFT) #define TCR_ORGN0_SHIFT 10 #define TCR_ORGN0_WBWA (0x1UL << TCR_ORGN0_SHIFT) #define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_WBWA (0x1UL << TCR_IRGN0_SHIFT) #define TCR_CACHE_ATTRS ((TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) |\ (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)) #ifdef SMP #define TCR_SMP_ATTRS (TCR_SH0_IS | TCR_SH1_IS) #else #define TCR_SMP_ATTRS 0 #endif #define TCR_T1SZ_SHIFT 16 #define TCR_T0SZ_SHIFT 0 #define TCR_T1SZ(x) ((x) << TCR_T1SZ_SHIFT) #define TCR_T0SZ(x) ((x) << TCR_T0SZ_SHIFT) #define TCR_TxSZ(x) (TCR_T1SZ(x) | TCR_T0SZ(x)) /* Saved Program Status Register */ #define DBG_SPSR_SS (0x1 << 21) /* Monitor Debug System Control Register */ #define DBG_MDSCR_SS (0x1 << 0) #define DBG_MDSCR_KDE (0x1 << 13) #define DBG_MDSCR_MDE (0x1 << 15) /* Perfomance Monitoring Counters */ #define PMCR_E (1 << 0) /* Enable all counters */ #define PMCR_P (1 << 1) /* Reset all counters */ #define PMCR_C (1 << 2) /* Clock counter reset */ #define PMCR_D (1 << 3) /* CNTR counts every 64 clk cycles */ #define PMCR_X (1 << 4) /* Export to ext. monitoring (ETM) */ #define PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define PMCR_LC (1 << 6) /* Long cycle count enable */ #define PMCR_IMP_SHIFT 24 /* Implementer code */ #define PMCR_IMP_MASK (0xff << PMCR_IMP_SHIFT) #define PMCR_IDCODE_SHIFT 16 /* Identification code */ #define PMCR_IDCODE_MASK (0xff << PMCR_IDCODE_SHIFT) #define PMCR_IDCODE_CORTEX_A57 0x01 #define PMCR_IDCODE_CORTEX_A72 0x02 #define PMCR_IDCODE_CORTEX_A53 0x03 #define PMCR_N_SHIFT 11 /* Number of counters implemented */ #define PMCR_N_MASK (0x1f << PMCR_N_SHIFT) #endif /* !_MACHINE_ARMREG_H_ */ Index: user/alc/PQ_LAUNDRY/sys/arm64/include/machdep.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/arm64/include/machdep.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/arm64/include/machdep.h (revision 303642) @@ -1,46 +1,54 @@ /*- * Copyright (c) 2013 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_MACHDEP_H_ #define _MACHINE_MACHDEP_H_ struct arm64_bootparams { vm_offset_t modulep; vm_offset_t kern_l1pt; /* L1 page table for the kernel */ uint64_t kern_delta; vm_offset_t kern_stack; vm_offset_t kern_l0pt; /* L1 page table for the kernel */ }; +enum arm64_bus { + ARM64_BUS_NONE, + ARM64_BUS_FDT, + ARM64_BUS_ACPI, +}; + +extern enum arm64_bus arm64_bus_method; + extern vm_paddr_t physmap[]; extern u_int physmap_idx; void initarm(struct arm64_bootparams *); extern void (*pagezero)(void *); #endif /* _MACHINE_MACHDEP_H_ */ Index: user/alc/PQ_LAUNDRY/sys/boot/common/bcache.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/boot/common/bcache.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/boot/common/bcache.c (revision 303642) @@ -1,468 +1,470 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright 2015 Toomas Soome * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include __FBSDID("$FreeBSD$"); /* * Simple hashed block cache */ #include #include #include #include #include "bootstrap.h" /* #define BCACHE_DEBUG */ #ifdef BCACHE_DEBUG # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else # define DEBUG(fmt, args...) #endif struct bcachectl { daddr_t bc_blkno; int bc_count; }; /* * bcache per device node. cache is allocated on device first open and freed * on last close, to save memory. The issue there is the size; biosdisk * supports up to 31 (0x1f) devices. Classic setup would use single disk * to boot from, but this has changed with zfs. */ struct bcache { struct bcachectl *bcache_ctl; caddr_t bcache_data; u_int bcache_nblks; size_t ra; }; static u_int bcache_total_nblks; /* set by bcache_init */ static u_int bcache_blksize; /* set by bcache_init */ static u_int bcache_numdev; /* set by bcache_add_dev */ /* statistics */ static u_int bcache_units; /* number of devices with cache */ static u_int bcache_unit_nblks; /* nblocks per unit */ static u_int bcache_hits; static u_int bcache_misses; static u_int bcache_ops; static u_int bcache_bypasses; static u_int bcache_bcount; static u_int bcache_rablks; #define BHASH(bc, blkno) ((blkno) & ((bc)->bcache_nblks - 1)) #define BCACHE_LOOKUP(bc, blkno) \ ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno)) #define BCACHE_READAHEAD 256 #define BCACHE_MINREADAHEAD 32 static void bcache_invalidate(struct bcache *bc, daddr_t blkno); static void bcache_insert(struct bcache *bc, daddr_t blkno); static void bcache_free_instance(struct bcache *bc); /* * Initialise the cache for (nblks) of (bsize). */ void bcache_init(u_int nblks, size_t bsize) { /* set up control data */ bcache_total_nblks = nblks; bcache_blksize = bsize; } /* * add number of devices to bcache. we have to divide cache space * between the devices, so bcache_add_dev() can be used to set up the * number. The issue is, we need to get the number before actual allocations. * bcache_add_dev() is supposed to be called from device init() call, so the * assumption is, devsw dv_init is called for plain devices first, and * for zfs, last. */ void bcache_add_dev(int devices) { bcache_numdev += devices; } void * bcache_allocate(void) { u_int i; struct bcache *bc = malloc(sizeof (struct bcache)); int disks = bcache_numdev; if (disks == 0) disks = 1; /* safe guard */ if (bc == NULL) { errno = ENOMEM; return (bc); } /* * the bcache block count must be power of 2 for hash function */ i = fls(disks) - 1; /* highbit - 1 */ if (disks > (1 << i)) /* next power of 2 */ i++; bc->bcache_nblks = bcache_total_nblks >> i; bcache_unit_nblks = bc->bcache_nblks; bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); if (bc->bcache_data == NULL) { /* dont error out yet. fall back to 32 blocks and try again */ bc->bcache_nblks = 32; bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); } bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl)); if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) { bcache_free_instance(bc); errno = ENOMEM; return(NULL); } /* Flush the cache */ for (i = 0; i < bc->bcache_nblks; i++) { bc->bcache_ctl[i].bc_count = -1; bc->bcache_ctl[i].bc_blkno = -1; } bcache_units++; bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */ return (bc); } void bcache_free(void *cache) { struct bcache *bc = cache; if (bc == NULL) return; bcache_free_instance(bc); bcache_units--; } /* * Handle a write request; write directly to the disk, and populate the * cache with the new values. */ static int write_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; struct bcache *bc = dd->dv_cache; daddr_t i, nblk; nblk = size / bcache_blksize; /* Invalidate the blocks being written */ for (i = 0; i < nblk; i++) { bcache_invalidate(bc, blk + i); } /* Write the blocks */ return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize)); } /* * Handle a read request; fill in parts of the request that can * be satisfied by the cache, use the supplied strategy routine to do * device I/O and then use the I/O results to populate the cache. */ static int read_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; struct bcache *bc = dd->dv_cache; size_t i, nblk, p_size, r_size, complete, ra; int result; daddr_t p_blk; caddr_t p_buf; if (bc == NULL) { errno = ENODEV; return (-1); } if (rsize != NULL) *rsize = 0; nblk = size / bcache_blksize; if ((nblk == 0 && size != 0) || offset != 0) nblk++; result = 0; complete = 1; /* Satisfy any cache hits up front, break on first miss */ for (i = 0; i < nblk; i++) { if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) { bcache_misses += (nblk - i); complete = 0; if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD) bc->ra >>= 1; /* reduce read ahead */ break; } else { bcache_hits++; } } if (complete) { /* whole set was in cache, return it */ if (bc->ra < BCACHE_READAHEAD) bc->ra <<= 1; /* increase read ahead */ bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf, size); goto done; } /* * Fill in any misses. From check we have i pointing to first missing * block, read in all remaining blocks + readahead. * We have space at least for nblk - i before bcache wraps. */ p_blk = blk + i; p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk)); r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */ p_size = MIN(r_size, nblk - i); /* read at least those blocks */ ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size); if (ra != bc->bcache_nblks) { /* do we have RA space? */ ra = MIN(bc->ra, ra); p_size += ra; } /* invalidate bcache */ for (i = 0; i < p_size; i++) { bcache_invalidate(bc, p_blk + i); } r_size = 0; /* * with read-ahead, it may happen we are attempting to read past * disk end, as bcache has no information about disk size. * in such case we should get partial read if some blocks can be * read or error, if no blocks can be read. * in either case we should return the data in bcache and only * return error if there is no data. */ result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0, p_size * bcache_blksize, p_buf, &r_size); r_size /= bcache_blksize; for (i = 0; i < r_size; i++) bcache_insert(bc, p_blk + i); /* update ra statistics */ if (r_size != 0) { if (r_size < p_size) bcache_rablks += (p_size - r_size); else bcache_rablks += ra; } /* check how much data can we copy */ for (i = 0; i < nblk; i++) { if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) break; } - size = i * bcache_blksize; + if (size > i * bcache_blksize) + size = i * bcache_blksize; + if (size != 0) { bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf, size); result = 0; } done: if ((result == 0) && (rsize != NULL)) *rsize = size; return(result); } /* * Requests larger than 1/2 cache size will be bypassed and go * directly to the disk. XXX tune this. */ int bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; struct bcache *bc = dd->dv_cache; u_int bcache_nblks = 0; int nblk, cblk, ret; size_t csize, isize, total; bcache_ops++; if (bc != NULL) bcache_nblks = bc->bcache_nblks; /* bypass large requests, or when the cache is inactive */ if (bc == NULL || (offset == 0 && ((size * 2 / bcache_blksize) > bcache_nblks))) { DEBUG("bypass %d from %d", size / bcache_blksize, blk); bcache_bypasses++; return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize)); } /* normalize offset */ while (offset >= bcache_blksize) { blk++; offset -= bcache_blksize; } switch (rw) { case F_READ: nblk = size / bcache_blksize; if (offset || (size != 0 && nblk == 0)) nblk++; /* read at least one block */ ret = 0; total = 0; while(size) { cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */ cblk = MIN(cblk, nblk); if (size <= bcache_blksize) csize = size; else { csize = cblk * bcache_blksize; if (offset) csize -= (bcache_blksize - offset); } ret = read_strategy(devdata, rw, blk, offset, csize, buf+total, &isize); /* * we may have error from read ahead, if we have read some data * return partial read. */ if (ret != 0 || isize == 0) { if (total != 0) ret = 0; break; } blk += (offset+isize) / bcache_blksize; offset = 0; total += isize; size -= isize; nblk = size / bcache_blksize; } if (rsize) *rsize = total; return (ret); case F_WRITE: return write_strategy(devdata, rw, blk, offset, size, buf, rsize); } return -1; } /* * Free allocated bcache instance */ static void bcache_free_instance(struct bcache *bc) { if (bc != NULL) { if (bc->bcache_ctl) free(bc->bcache_ctl); if (bc->bcache_data) free(bc->bcache_data); free(bc); } } /* * Insert a block into the cache. */ static void bcache_insert(struct bcache *bc, daddr_t blkno) { u_int cand; cand = BHASH(bc, blkno); DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount); bc->bcache_ctl[cand].bc_blkno = blkno; bc->bcache_ctl[cand].bc_count = bcache_bcount++; } /* * Invalidate a block from the cache. */ static void bcache_invalidate(struct bcache *bc, daddr_t blkno) { u_int i; i = BHASH(bc, blkno); if (bc->bcache_ctl[i].bc_blkno == blkno) { bc->bcache_ctl[i].bc_count = -1; bc->bcache_ctl[i].bc_blkno = -1; DEBUG("invalidate blk %llu", blkno); } } #ifndef BOOT2 COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache); static int command_bcache(int argc, char *argv[]) { if (argc != 1) { command_errmsg = "wrong number of arguments"; return(CMD_ERROR); } printf("\ncache blocks: %d\n", bcache_total_nblks); printf("cache blocksz: %d\n", bcache_blksize); printf("cache readahead: %d\n", bcache_rablks); printf("unit cache blocks: %d\n", bcache_unit_nblks); printf("cached units: %d\n", bcache_units); printf("%d ops %d bypasses %d hits %d misses\n", bcache_ops, bcache_bypasses, bcache_hits, bcache_misses); return(CMD_OK); } #endif Index: user/alc/PQ_LAUNDRY/sys/boot/common/interp_parse.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/boot/common/interp_parse.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/boot/common/interp_parse.c (revision 303642) @@ -1,204 +1,222 @@ /*- * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Jordan K. Hubbard * 29 August 1998 * * The meat of the simple parser. */ #include __FBSDID("$FreeBSD$"); #include #include #include "bootstrap.h" static void clean(void); static int insert(int *argcp, char *buf); static char *variable_lookup(char *name); #define PARSE_BUFSIZE 1024 /* maximum size of one element */ #define MAXARGS 20 /* maximum number of elements */ static char *args[MAXARGS]; /* * parse: accept a string of input and "parse" it for backslash * substitutions and environment variable expansions (${var}), * returning an argc/argv style vector of whitespace separated * arguments. Returns 0 on success, 1 on failure (ok, ok, so I * wimped-out on the error codes! :). * * Note that the argv array returned must be freed by the caller, but * we own the space allocated for arguments and will free that on next * invocation. This allows argv consumers to modify the array if * required. * * NB: environment variables that expand to more than one whitespace * separated token will be returned as a single argv[] element, not * split in turn. Expanded text is also immune to further backslash * elimination or expansion since this is a one-pass, non-recursive * parser. You didn't specify more than this so if you want more, ask * me. - jkh */ #define PARSE_FAIL(expr) \ if (expr) { \ printf("fail at line %d\n", __LINE__); \ clean(); \ free(copy); \ free(buf); \ return 1; \ } /* Accept the usual delimiters for a variable, returning counterpart */ static char isdelim(int ch) { if (ch == '{') return '}'; else if (ch == '(') return ')'; return '\0'; } static int isquote(int ch) { - return (ch == '\'' || ch == '"'); + return (ch == '\''); } +static int +isdquote(int ch) +{ + return (ch == '"'); +} + int parse(int *argc, char ***argv, char *str) { int ac; char *val, *p, *q, *copy = NULL; size_t i = 0; - char token, tmp, quote, *buf; + char token, tmp, quote, dquote, *buf; enum { STR, VAR, WHITE } state; ac = *argc = 0; - quote = 0; + dquote = quote = 0; if (!str || (p = copy = backslash(str)) == NULL) return 1; /* Initialize vector and state */ clean(); state = STR; buf = (char *)malloc(PARSE_BUFSIZE); token = 0; /* And awaaaaaaaaay we go! */ while (*p) { switch (state) { case STR: if ((*p == '\\') && p[1]) { p++; PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); buf[i++] = *p++; } else if (isquote(*p)) { quote = quote ? 0 : *p; - ++p; - } - else if (isspace(*p) && !quote) { + if (dquote) { /* keep quote */ + PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); + buf[i++] = *p++; + } else + ++p; + } else if (isdquote(*p)) { + dquote = dquote ? 0 : *p; + if (quote) { /* keep dquote */ + PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); + buf[i++] = *p++; + } else + ++p; + } else if (isspace(*p) && !quote && !dquote) { state = WHITE; if (i) { buf[i] = '\0'; PARSE_FAIL(insert(&ac, buf)); i = 0; } ++p; - } else if (*p == '$') { + } else if (*p == '$' && !quote) { token = isdelim(*(p + 1)); if (token) p += 2; else ++p; state = VAR; } else { PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); buf[i++] = *p++; } break; case WHITE: if (isspace(*p)) ++p; else state = STR; break; case VAR: if (token) { PARSE_FAIL((q = strchr(p, token)) == NULL); } else { q = p; while (*q && !isspace(*q)) ++q; } tmp = *q; *q = '\0'; if ((val = variable_lookup(p)) != NULL) { size_t len = strlen(val); strncpy(buf + i, val, PARSE_BUFSIZE - (i + 1)); i += min(len, PARSE_BUFSIZE - 1); } *q = tmp; /* restore value */ p = q + (token ? 1 : 0); state = STR; break; } } + /* missing terminating ' or " */ + PARSE_FAIL(quote || dquote); /* If at end of token, add it */ if (i && state == STR) { buf[i] = '\0'; PARSE_FAIL(insert(&ac, buf)); } args[ac] = NULL; *argc = ac; *argv = (char **)malloc((sizeof(char *) * ac + 1)); bcopy(args, *argv, sizeof(char *) * ac + 1); free(buf); free(copy); return 0; } #define MAXARGS 20 /* Clean vector space */ static void clean(void) { int i; for (i = 0; i < MAXARGS; i++) { if (args[i] != NULL) { free(args[i]); args[i] = NULL; } } } static int insert(int *argcp, char *buf) { if (*argcp >= MAXARGS) return 1; args[(*argcp)++] = strdup(buf); return 0; } static char * variable_lookup(char *name) { /* XXX search "special variable" space first? */ return (char *)getenv(name); } Index: user/alc/PQ_LAUNDRY/sys/boot/zfs/libzfs.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/boot/zfs/libzfs.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/boot/zfs/libzfs.h (revision 303642) @@ -1,74 +1,74 @@ /*- * Copyright (c) 2012 Andriy Gapon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BOOT_LIBZFS_H_ #define _BOOT_LIBZFS_H_ #define ZFS_MAXNAMELEN 256 /* * ZFS fully-qualified device descriptor. * Note, this must match the 'struct devdesc' declaration in bootstrap.h. * Arch-specific device descriptors should be binary compatible with this * structure if they are to support ZFS. */ struct zfs_devdesc { struct devsw *d_dev; int d_type; int d_unit; void *d_opendata; uint64_t pool_guid; uint64_t root_guid; }; struct zfs_boot_args { uint32_t size; uint32_t reserved; uint64_t pool; uint64_t root; uint64_t primary_pool; uint64_t primary_vdev; char gelipw[256]; }; int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path); char *zfs_fmtdev(void *vdev); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); void init_zfs_bootenv(char *currdev); int zfs_bootenv(const char *name); -int zfs_belist_add(const char *name); +int zfs_belist_add(const char *name, uint64_t __unused); int zfs_set_env(void); extern struct devsw zfs_dev; extern struct fs_ops zfs_fsops; #endif /*_BOOT_LIBZFS_H_*/ Index: user/alc/PQ_LAUNDRY/sys/boot/zfs/zfs.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/boot/zfs/zfs.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/boot/zfs/zfs.c (revision 303642) @@ -1,897 +1,897 @@ /*- * Copyright (c) 2007 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone file reading package. */ #include #include #include #include #include #include #include #include #include #include #include "libzfs.h" #include "zfsimpl.c" /* Define the range of indexes to be populated with ZFS Boot Environments */ #define ZFS_BE_FIRST 4 #define ZFS_BE_LAST 8 static int zfs_open(const char *path, struct open_file *f); static int zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid); static int zfs_close(struct open_file *f); static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t zfs_seek(struct open_file *f, off_t offset, int where); static int zfs_stat(struct open_file *f, struct stat *sb); static int zfs_readdir(struct open_file *f, struct dirent *d); struct devsw zfs_dev; struct fs_ops zfs_fsops = { "zfs", zfs_open, zfs_close, zfs_read, zfs_write, zfs_seek, zfs_stat, zfs_readdir }; /* * In-core open file. */ struct file { off_t f_seekp; /* seek pointer */ dnode_phys_t f_dnode; uint64_t f_zap_type; /* zap type for readdir */ uint64_t f_num_leafs; /* number of fzap leaf blocks */ zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */ }; static int zfs_env_index; static int zfs_env_count; SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head); struct zfs_be_list *zfs_be_headp; struct zfs_be_entry { const char *name; SLIST_ENTRY(zfs_be_entry) entries; } *zfs_be, *zfs_be_tmp; /* * Open a file. */ static int zfs_open(const char *upath, struct open_file *f) { struct zfsmount *mount = (struct zfsmount *)f->f_devdata; struct file *fp; int rc; if (f->f_dev != &zfs_dev) return (EINVAL); /* allocate file system specific data structure */ fp = malloc(sizeof(struct file)); bzero(fp, sizeof(struct file)); f->f_fsdata = (void *)fp; rc = zfs_lookup(mount, upath, &fp->f_dnode); fp->f_seekp = 0; if (rc) { f->f_fsdata = NULL; free(fp); } return (rc); } static int zfs_close(struct open_file *f) { struct file *fp = (struct file *)f->f_fsdata; dnode_cache_obj = 0; f->f_fsdata = (void *)0; if (fp == (struct file *)0) return (0); free(fp); return (0); } /* * Copy a portion of a file into kernel memory. * Cross block boundaries when necessary. */ static int zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; struct stat sb; size_t n; int rc; rc = zfs_stat(f, &sb); if (rc) return (rc); n = size; if (fp->f_seekp + n > sb.st_size) n = sb.st_size - fp->f_seekp; rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); if (rc) return (rc); if (0) { int i; for (i = 0; i < n; i++) putchar(((char*) start)[i]); } fp->f_seekp += n; if (resid) *resid = size - n; return (0); } /* * Don't be silly - the bootstrap has no business writing anything. */ static int zfs_write(struct open_file *f, void *start, size_t size, size_t *resid /* out */) { return (EROFS); } static off_t zfs_seek(struct open_file *f, off_t offset, int where) { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_seekp = offset; break; case SEEK_CUR: fp->f_seekp += offset; break; case SEEK_END: { struct stat sb; int error; error = zfs_stat(f, &sb); if (error != 0) { errno = error; return (-1); } fp->f_seekp = sb.st_size - offset; break; } default: errno = EINVAL; return (-1); } return (fp->f_seekp); } static int zfs_stat(struct open_file *f, struct stat *sb) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; return (zfs_dnode_stat(spa, &fp->f_dnode, sb)); } static int zfs_readdir(struct open_file *f, struct dirent *d) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; mzap_ent_phys_t mze; struct stat sb; size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; int rc; rc = zfs_stat(f, &sb); if (rc) return (rc); if (!S_ISDIR(sb.st_mode)) return (ENOTDIR); /* * If this is the first read, get the zap type. */ if (fp->f_seekp == 0) { rc = dnode_read(spa, &fp->f_dnode, 0, &fp->f_zap_type, sizeof(fp->f_zap_type)); if (rc) return (rc); if (fp->f_zap_type == ZBT_MICRO) { fp->f_seekp = offsetof(mzap_phys_t, mz_chunk); } else { rc = dnode_read(spa, &fp->f_dnode, offsetof(zap_phys_t, zap_num_leafs), &fp->f_num_leafs, sizeof(fp->f_num_leafs)); if (rc) return (rc); fp->f_seekp = bsize; fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, fp->f_zap_leaf, bsize); if (rc) return (rc); } } if (fp->f_zap_type == ZBT_MICRO) { mzap_next: if (fp->f_seekp >= bsize) return (ENOENT); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, &mze, sizeof(mze)); if (rc) return (rc); fp->f_seekp += sizeof(mze); if (!mze.mze_name[0]) goto mzap_next; d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value); d->d_type = ZFS_DIRENT_TYPE(mze.mze_value); strcpy(d->d_name, mze.mze_name); d->d_namlen = strlen(d->d_name); return (0); } else { zap_leaf_t zl; zap_leaf_chunk_t *zc, *nc; int chunk; size_t namelen; char *p; uint64_t value; /* * Initialise this so we can use the ZAP size * calculating macros. */ zl.l_bs = ilog2(bsize); zl.l_phys = fp->f_zap_leaf; /* * Figure out which chunk we are currently looking at * and consider seeking to the next leaf. We use the * low bits of f_seekp as a simple chunk index. */ fzap_next: chunk = fp->f_seekp & (bsize - 1); if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) { fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize; chunk = 0; /* * Check for EOF and read the new leaf. */ if (fp->f_seekp >= bsize * fp->f_num_leafs) return (ENOENT); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, fp->f_zap_leaf, bsize); if (rc) return (rc); } zc = &ZAP_LEAF_CHUNK(&zl, chunk); fp->f_seekp++; if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) goto fzap_next; namelen = zc->l_entry.le_name_numints; if (namelen > sizeof(d->d_name)) namelen = sizeof(d->d_name); /* * Paste the name back together. */ nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); p = d->d_name; while (namelen > 0) { int len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; memcpy(p, nc->l_array.la_array, len); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); } d->d_name[sizeof(d->d_name) - 1] = 0; /* * Assume the first eight bytes of the value are * a uint64_t. */ value = fzap_leaf_value(&zl, zc); d->d_fileno = ZFS_DIRENT_OBJ(value); d->d_type = ZFS_DIRENT_TYPE(value); d->d_namlen = strlen(d->d_name); return (0); } } static int vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size) { int fd; fd = (uintptr_t) priv; lseek(fd, offset, SEEK_SET); if (read(fd, buf, size) == size) { return 0; } else { return (EIO); } } static int zfs_dev_init(void) { spa_t *spa; spa_t *next; spa_t *prev; zfs_init(); if (archsw.arch_zfs_probe == NULL) return (ENXIO); archsw.arch_zfs_probe(); prev = NULL; spa = STAILQ_FIRST(&zfs_pools); while (spa != NULL) { next = STAILQ_NEXT(spa, spa_link); if (zfs_spa_init(spa)) { if (prev == NULL) STAILQ_REMOVE_HEAD(&zfs_pools, spa_link); else STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link); } else prev = spa; spa = next; } return (0); } struct zfs_probe_args { int fd; const char *devname; uint64_t *pool_guid; u_int secsz; }; static int zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset) { struct zfs_probe_args *ppa; ppa = (struct zfs_probe_args *)arg; return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd, offset * ppa->secsz, buf, blocks * ppa->secsz)); } static int zfs_probe(int fd, uint64_t *pool_guid) { spa_t *spa; int ret; ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); if (ret == 0 && pool_guid != NULL) *pool_guid = spa->spa_guid; return (ret); } static int zfs_probe_partition(void *arg, const char *partname, const struct ptable_entry *part) { struct zfs_probe_args *ppa, pa; struct ptable *table; char devname[32]; int ret; /* Probe only freebsd-zfs and freebsd partitions */ if (part->type != PART_FREEBSD && part->type != PART_FREEBSD_ZFS) return (0); ppa = (struct zfs_probe_args *)arg; strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); devname[strlen(ppa->devname) - 1] = '\0'; sprintf(devname, "%s%s:", devname, partname); pa.fd = open(devname, O_RDONLY); if (pa.fd == -1) return (0); ret = zfs_probe(pa.fd, ppa->pool_guid); if (ret == 0) return (0); /* Do we have BSD label here? */ if (part->type == PART_FREEBSD) { pa.devname = devname; pa.pool_guid = ppa->pool_guid; pa.secsz = ppa->secsz; table = ptable_open(&pa, part->end - part->start + 1, ppa->secsz, zfs_diskread); if (table != NULL) { ptable_iterate(table, &pa, zfs_probe_partition); ptable_close(table); } } close(pa.fd); return (0); } int zfs_probe_dev(const char *devname, uint64_t *pool_guid) { struct ptable *table; struct zfs_probe_args pa; off_t mediasz; int ret; pa.fd = open(devname, O_RDONLY); if (pa.fd == -1) return (ENXIO); /* Probe the whole disk */ ret = zfs_probe(pa.fd, pool_guid); if (ret == 0) return (0); /* Probe each partition */ ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); if (ret == 0) ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz); if (ret == 0) { pa.devname = devname; pa.pool_guid = pool_guid; table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz, zfs_diskread); if (table != NULL) { ptable_iterate(table, &pa, zfs_probe_partition); ptable_close(table); } } close(pa.fd); return (ret); } /* * Print information about ZFS pools */ static void zfs_dev_print(int verbose) { spa_t *spa; char line[80]; if (verbose) { spa_all_status(); return; } STAILQ_FOREACH(spa, &zfs_pools, spa_link) { sprintf(line, " zfs:%s\n", spa->spa_name); pager_output(line); } } /* * Attempt to open the pool described by (dev) for use by (f). */ static int zfs_dev_open(struct open_file *f, ...) { va_list args; struct zfs_devdesc *dev; struct zfsmount *mount; spa_t *spa; int rv; va_start(args, f); dev = va_arg(args, struct zfs_devdesc *); va_end(args); if (dev->pool_guid == 0) spa = STAILQ_FIRST(&zfs_pools); else spa = spa_find_by_guid(dev->pool_guid); if (!spa) return (ENXIO); mount = malloc(sizeof(*mount)); rv = zfs_mount(spa, dev->root_guid, mount); if (rv != 0) { free(mount); return (rv); } if (mount->objset.os_type != DMU_OST_ZFS) { printf("Unexpected object set type %ju\n", (uintmax_t)mount->objset.os_type); free(mount); return (EIO); } f->f_devdata = mount; free(dev); return (0); } static int zfs_dev_close(struct open_file *f) { free(f->f_devdata); f->f_devdata = NULL; return (0); } static int zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { return (ENOSYS); } struct devsw zfs_dev = { .dv_name = "zfs", .dv_type = DEVT_ZFS, .dv_init = zfs_dev_init, .dv_strategy = zfs_dev_strategy, .dv_open = zfs_dev_open, .dv_close = zfs_dev_close, .dv_ioctl = noioctl, .dv_print = zfs_dev_print, .dv_cleanup = NULL }; int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) { static char rootname[ZFS_MAXNAMELEN]; static char poolname[ZFS_MAXNAMELEN]; spa_t *spa; const char *end; const char *np; const char *sep; int rv; np = devspec; if (*np != ':') return (EINVAL); np++; end = strchr(np, ':'); if (end == NULL) return (EINVAL); sep = strchr(np, '/'); if (sep == NULL || sep >= end) sep = end; memcpy(poolname, np, sep - np); poolname[sep - np] = '\0'; if (sep < end) { sep++; memcpy(rootname, sep, end - sep); rootname[end - sep] = '\0'; } else rootname[0] = '\0'; spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); dev->pool_guid = spa->spa_guid; rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid); if (rv != 0) return (rv); if (path != NULL) *path = (*end == '\0') ? end : end + 1; dev->d_dev = &zfs_dev; dev->d_type = zfs_dev.dv_type; return (0); } char * zfs_fmtdev(void *vdev) { static char rootname[ZFS_MAXNAMELEN]; static char buf[2 * ZFS_MAXNAMELEN + 8]; struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; buf[0] = '\0'; if (dev->d_type != DEVT_ZFS) return (buf); if (dev->pool_guid == 0) { spa = STAILQ_FIRST(&zfs_pools); dev->pool_guid = spa->spa_guid; } else spa = spa_find_by_guid(dev->pool_guid); if (spa == NULL) { printf("ZFS: can't find pool by guid\n"); return (buf); } if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) { printf("ZFS: can't find root filesystem\n"); return (buf); } if (zfs_rlookup(spa, dev->root_guid, rootname)) { printf("ZFS: can't find filesystem by guid\n"); return (buf); } if (rootname[0] == '\0') sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name); else sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name, rootname); return (buf); } int zfs_list(const char *name) { static char poolname[ZFS_MAXNAMELEN]; uint64_t objid; spa_t *spa; const char *dsname; int len; int rv; len = strlen(name); dsname = strchr(name, '/'); if (dsname != NULL) { len = dsname - name; dsname++; } else dsname = ""; memcpy(poolname, name, len); poolname[len] = '\0'; spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); rv = zfs_lookup_dataset(spa, dsname, &objid); if (rv != 0) return (rv); return (zfs_list_dataset(spa, objid)); } void init_zfs_bootenv(char *currdev) { char *beroot; if (strlen(currdev) == 0) return; if(strncmp(currdev, "zfs:", 4) != 0) return; /* Remove the trailing : */ currdev[strlen(currdev) - 1] = '\0'; setenv("zfs_be_active", currdev, 1); setenv("zfs_be_currpage", "1", 1); /* Forward past zfs: */ currdev = strchr(currdev, ':'); currdev++; /* Remove the last element (current bootenv) */ beroot = strrchr(currdev, '/'); if (beroot != NULL) beroot[0] = '\0'; beroot = currdev; setenv("zfs_be_root", beroot, 1); } int zfs_bootenv(const char *name) { static char poolname[ZFS_MAXNAMELEN], *dsname, *root; char becount[4]; uint64_t objid; spa_t *spa; int len, rv, pages, perpage, currpage; if (name == NULL) return (EINVAL); if ((root = getenv("zfs_be_root")) == NULL) return (EINVAL); if (strcmp(name, root) != 0) { if (setenv("zfs_be_root", name, 1) != 0) return (ENOMEM); } SLIST_INIT(&zfs_be_head); zfs_env_count = 0; len = strlen(name); dsname = strchr(name, '/'); if (dsname != NULL) { len = dsname - name; dsname++; } else dsname = ""; memcpy(poolname, name, len); poolname[len] = '\0'; spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); rv = zfs_lookup_dataset(spa, dsname, &objid); if (rv != 0) return (rv); rv = zfs_callback_dataset(spa, objid, zfs_belist_add); /* Calculate and store the number of pages of BEs */ perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1); pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0); snprintf(becount, 4, "%d", pages); if (setenv("zfs_be_pages", becount, 1) != 0) return (ENOMEM); /* Roll over the page counter if it has exceeded the maximum */ currpage = strtol(getenv("zfs_be_currpage"), NULL, 10); if (currpage > pages) { if (setenv("zfs_be_currpage", "1", 1) != 0) return (ENOMEM); } /* Populate the menu environment variables */ zfs_set_env(); /* Clean up the SLIST of ZFS BEs */ while (!SLIST_EMPTY(&zfs_be_head)) { zfs_be = SLIST_FIRST(&zfs_be_head); SLIST_REMOVE_HEAD(&zfs_be_head, entries); free(zfs_be); } return (rv); } int -zfs_belist_add(const char *name) +zfs_belist_add(const char *name, uint64_t value __unused) { /* Skip special datasets that start with a $ character */ if (strncmp(name, "$", 1) == 0) { return (0); } /* Add the boot environment to the head of the SLIST */ zfs_be = malloc(sizeof(struct zfs_be_entry)); if (zfs_be == NULL) { return (ENOMEM); } zfs_be->name = name; SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries); zfs_env_count++; return (0); } int zfs_set_env(void) { char envname[32], envval[256]; char *beroot, *pagenum; int rv, page, ctr; beroot = getenv("zfs_be_root"); if (beroot == NULL) { return (1); } pagenum = getenv("zfs_be_currpage"); if (pagenum != NULL) { page = strtol(pagenum, NULL, 10); } else { page = 1; } ctr = 1; rv = 0; zfs_env_index = ZFS_BE_FIRST; SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { /* Skip to the requested page number */ if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) { ctr++; continue; } snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); snprintf(envval, sizeof(envval), "%s", zfs_be->name); rv = setenv(envname, envval, 1); if (rv != 0) { break; } snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); rv = setenv(envname, envval, 1); if (rv != 0){ break; } snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); rv = setenv(envname, "set_bootenv", 1); if (rv != 0){ break; } snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name); rv = setenv(envname, envval, 1); if (rv != 0){ break; } zfs_env_index++; if (zfs_env_index > ZFS_BE_LAST) { break; } } for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) { snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); (void)unsetenv(envname); } return (rv); } Index: user/alc/PQ_LAUNDRY/sys/boot/zfs/zfsimpl.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/boot/zfs/zfsimpl.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/boot/zfs/zfsimpl.c (revision 303642) @@ -1,2199 +1,2267 @@ /*- * Copyright (c) 2007 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone ZFS file reader. */ #include #include #include "zfsimpl.h" #include "zfssubr.c" struct zfsmount { const spa_t *spa; objset_phys_t objset; uint64_t rootobj; }; /* * List of all vdevs, chained through v_alllink. */ static vdev_list_t zfs_vdevs; /* * List of ZFS features supported for read */ static const char *features_for_read[] = { "org.illumos:lz4_compress", "com.delphix:hole_birth", "com.delphix:extensible_dataset", "com.delphix:embedded_data", "org.open-zfs:large_blocks", NULL }; /* * List of all pools, chained through spa_link. */ static spa_list_t zfs_pools; static uint64_t zfs_crc64_table[256]; static const dnode_phys_t *dnode_cache_obj = 0; static uint64_t dnode_cache_bn; static char *dnode_cache_buf; static char *zap_scratch; static char *zfs_temp_buf, *zfs_temp_end, *zfs_temp_ptr; #define TEMP_SIZE (1024 * 1024) static int zio_read(const spa_t *spa, const blkptr_t *bp, void *buf); static int zfs_get_root(const spa_t *spa, uint64_t *objid); static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result); static void zfs_init(void) { STAILQ_INIT(&zfs_vdevs); STAILQ_INIT(&zfs_pools); zfs_temp_buf = malloc(TEMP_SIZE); zfs_temp_end = zfs_temp_buf + TEMP_SIZE; zfs_temp_ptr = zfs_temp_buf; dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE); zap_scratch = malloc(SPA_MAXBLOCKSIZE); zfs_init_crc(); } static void * zfs_alloc(size_t size) { char *ptr; if (zfs_temp_ptr + size > zfs_temp_end) { printf("ZFS: out of temporary buffer space\n"); for (;;) ; } ptr = zfs_temp_ptr; zfs_temp_ptr += size; return (ptr); } static void zfs_free(void *ptr, size_t size) { zfs_temp_ptr -= size; if (zfs_temp_ptr != ptr) { printf("ZFS: zfs_alloc()/zfs_free() mismatch\n"); for (;;) ; } } static int xdr_int(const unsigned char **xdr, int *ip) { *ip = ((*xdr)[0] << 24) | ((*xdr)[1] << 16) | ((*xdr)[2] << 8) | ((*xdr)[3] << 0); (*xdr) += 4; return (0); } static int xdr_u_int(const unsigned char **xdr, u_int *ip) { *ip = ((*xdr)[0] << 24) | ((*xdr)[1] << 16) | ((*xdr)[2] << 8) | ((*xdr)[3] << 0); (*xdr) += 4; return (0); } static int xdr_uint64_t(const unsigned char **xdr, uint64_t *lp) { u_int hi, lo; xdr_u_int(xdr, &hi); xdr_u_int(xdr, &lo); *lp = (((uint64_t) hi) << 32) | lo; return (0); } static int nvlist_find(const unsigned char *nvlist, const char *name, int type, int* elementsp, void *valuep) { const unsigned char *p, *pair; int junk; int encoded_size, decoded_size; p = nvlist; xdr_int(&p, &junk); xdr_int(&p, &junk); pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); while (encoded_size && decoded_size) { int namelen, pairtype, elements; const char *pairname; xdr_int(&p, &namelen); pairname = (const char*) p; p += roundup(namelen, 4); xdr_int(&p, &pairtype); if (!memcmp(name, pairname, namelen) && type == pairtype) { xdr_int(&p, &elements); if (elementsp) *elementsp = elements; if (type == DATA_TYPE_UINT64) { xdr_uint64_t(&p, (uint64_t *) valuep); return (0); } else if (type == DATA_TYPE_STRING) { int len; xdr_int(&p, &len); (*(const char**) valuep) = (const char*) p; return (0); } else if (type == DATA_TYPE_NVLIST || type == DATA_TYPE_NVLIST_ARRAY) { (*(const unsigned char**) valuep) = (const unsigned char*) p; return (0); } else { return (EIO); } } else { /* * Not the pair we are looking for, skip to the next one. */ p = pair + encoded_size; } pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); } return (EIO); } static int nvlist_check_features_for_read(const unsigned char *nvlist) { const unsigned char *p, *pair; int junk; int encoded_size, decoded_size; int rc; rc = 0; p = nvlist; xdr_int(&p, &junk); xdr_int(&p, &junk); pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); while (encoded_size && decoded_size) { int namelen, pairtype; const char *pairname; int i, found; found = 0; xdr_int(&p, &namelen); pairname = (const char*) p; p += roundup(namelen, 4); xdr_int(&p, &pairtype); for (i = 0; features_for_read[i] != NULL; i++) { if (!memcmp(pairname, features_for_read[i], namelen)) { found = 1; break; } } if (!found) { printf("ZFS: unsupported feature: %s\n", pairname); rc = EIO; } p = pair + encoded_size; pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); } return (rc); } /* * Return the next nvlist in an nvlist array. */ static const unsigned char * nvlist_next(const unsigned char *nvlist) { const unsigned char *p, *pair; int junk; int encoded_size, decoded_size; p = nvlist; xdr_int(&p, &junk); xdr_int(&p, &junk); pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); while (encoded_size && decoded_size) { p = pair + encoded_size; pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); } return p; } #ifdef TEST static const unsigned char * nvlist_print(const unsigned char *nvlist, unsigned int indent) { static const char* typenames[] = { "DATA_TYPE_UNKNOWN", "DATA_TYPE_BOOLEAN", "DATA_TYPE_BYTE", "DATA_TYPE_INT16", "DATA_TYPE_UINT16", "DATA_TYPE_INT32", "DATA_TYPE_UINT32", "DATA_TYPE_INT64", "DATA_TYPE_UINT64", "DATA_TYPE_STRING", "DATA_TYPE_BYTE_ARRAY", "DATA_TYPE_INT16_ARRAY", "DATA_TYPE_UINT16_ARRAY", "DATA_TYPE_INT32_ARRAY", "DATA_TYPE_UINT32_ARRAY", "DATA_TYPE_INT64_ARRAY", "DATA_TYPE_UINT64_ARRAY", "DATA_TYPE_STRING_ARRAY", "DATA_TYPE_HRTIME", "DATA_TYPE_NVLIST", "DATA_TYPE_NVLIST_ARRAY", "DATA_TYPE_BOOLEAN_VALUE", "DATA_TYPE_INT8", "DATA_TYPE_UINT8", "DATA_TYPE_BOOLEAN_ARRAY", "DATA_TYPE_INT8_ARRAY", "DATA_TYPE_UINT8_ARRAY" }; unsigned int i, j; const unsigned char *p, *pair; int junk; int encoded_size, decoded_size; p = nvlist; xdr_int(&p, &junk); xdr_int(&p, &junk); pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); while (encoded_size && decoded_size) { int namelen, pairtype, elements; const char *pairname; xdr_int(&p, &namelen); pairname = (const char*) p; p += roundup(namelen, 4); xdr_int(&p, &pairtype); for (i = 0; i < indent; i++) printf(" "); printf("%s %s", typenames[pairtype], pairname); xdr_int(&p, &elements); switch (pairtype) { case DATA_TYPE_UINT64: { uint64_t val; xdr_uint64_t(&p, &val); printf(" = 0x%jx\n", (uintmax_t)val); break; } case DATA_TYPE_STRING: { int len; xdr_int(&p, &len); printf(" = \"%s\"\n", p); break; } case DATA_TYPE_NVLIST: printf("\n"); nvlist_print(p, indent + 1); break; case DATA_TYPE_NVLIST_ARRAY: for (j = 0; j < elements; j++) { printf("[%d]\n", j); p = nvlist_print(p, indent + 1); if (j != elements - 1) { for (i = 0; i < indent; i++) printf(" "); printf("%s %s", typenames[pairtype], pairname); } } break; default: printf("\n"); } p = pair + encoded_size; pair = p; xdr_int(&p, &encoded_size); xdr_int(&p, &decoded_size); } return p; } #endif static int vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t size) { size_t psize; int rc; if (!vdev->v_phys_read) return (EIO); if (bp) { psize = BP_GET_PSIZE(bp); } else { psize = size; } /*printf("ZFS: reading %d bytes at 0x%jx to %p\n", psize, (uintmax_t)offset, buf);*/ rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize); if (rc) return (rc); if (bp && zio_checksum_verify(bp, buf)) return (EIO); return (0); } static int vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) { return (vdev_read_phys(vdev, bp, buf, offset + VDEV_LABEL_START_SIZE, bytes)); } static int vdev_mirror_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) { vdev_t *kid; int rc; rc = EIO; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { if (kid->v_state != VDEV_STATE_HEALTHY) continue; rc = kid->v_read(kid, bp, buf, offset, bytes); if (!rc) return (0); } return (rc); } static int vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) { vdev_t *kid; /* * Here we should have two kids: * First one which is the one we are replacing and we can trust * only this one to have valid data, but it might not be present. * Second one is that one we are replacing with. It is most likely * healthy, but we can't trust it has needed data, so we won't use it. */ kid = STAILQ_FIRST(&vdev->v_children); if (kid == NULL) return (EIO); if (kid->v_state != VDEV_STATE_HEALTHY) return (EIO); return (kid->v_read(kid, bp, buf, offset, bytes)); } static vdev_t * vdev_find(uint64_t guid) { vdev_t *vdev; STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink) if (vdev->v_guid == guid) return (vdev); return (0); } static vdev_t * vdev_create(uint64_t guid, vdev_read_t *read) { vdev_t *vdev; vdev = malloc(sizeof(vdev_t)); memset(vdev, 0, sizeof(vdev_t)); STAILQ_INIT(&vdev->v_children); vdev->v_guid = guid; vdev->v_state = VDEV_STATE_OFFLINE; vdev->v_read = read; vdev->v_phys_read = 0; vdev->v_read_priv = 0; STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink); return (vdev); } static int vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev, vdev_t **vdevp, int is_newer) { int rc; uint64_t guid, id, ashift, nparity; const char *type; const char *path; vdev_t *vdev, *kid; const unsigned char *kids; int nkids, i, is_new; uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present; if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0, &guid) || nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, 0, &id) || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, 0, &type)) { printf("ZFS: can't find vdev details\n"); return (ENOENT); } if (strcmp(type, VDEV_TYPE_MIRROR) && strcmp(type, VDEV_TYPE_DISK) #ifdef ZFS_TEST && strcmp(type, VDEV_TYPE_FILE) #endif && strcmp(type, VDEV_TYPE_RAIDZ) && strcmp(type, VDEV_TYPE_REPLACING)) { printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n"); return (EIO); } is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0; nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, 0, &is_offline); nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, 0, &is_removed); nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, 0, &is_faulted); nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, 0, &is_degraded); nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, 0, &isnt_present); vdev = vdev_find(guid); if (!vdev) { is_new = 1; if (!strcmp(type, VDEV_TYPE_MIRROR)) vdev = vdev_create(guid, vdev_mirror_read); else if (!strcmp(type, VDEV_TYPE_RAIDZ)) vdev = vdev_create(guid, vdev_raidz_read); else if (!strcmp(type, VDEV_TYPE_REPLACING)) vdev = vdev_create(guid, vdev_replacing_read); else vdev = vdev_create(guid, vdev_disk_read); vdev->v_id = id; vdev->v_top = pvdev != NULL ? pvdev : vdev; if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT, DATA_TYPE_UINT64, 0, &ashift) == 0) vdev->v_ashift = ashift; else vdev->v_ashift = 0; if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY, DATA_TYPE_UINT64, 0, &nparity) == 0) vdev->v_nparity = nparity; else vdev->v_nparity = 0; if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH, DATA_TYPE_STRING, 0, &path) == 0) { if (strncmp(path, "/dev/", 5) == 0) path += 5; vdev->v_name = strdup(path); } else { if (!strcmp(type, "raidz")) { if (vdev->v_nparity == 1) vdev->v_name = "raidz1"; else if (vdev->v_nparity == 2) vdev->v_name = "raidz2"; else if (vdev->v_nparity == 3) vdev->v_name = "raidz3"; else { printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n"); return (EIO); } } else { vdev->v_name = strdup(type); } } } else { is_new = 0; } if (is_new || is_newer) { /* * This is either new vdev or we've already seen this vdev, * but from an older vdev label, so let's refresh its state * from the newer label. */ if (is_offline) vdev->v_state = VDEV_STATE_OFFLINE; else if (is_removed) vdev->v_state = VDEV_STATE_REMOVED; else if (is_faulted) vdev->v_state = VDEV_STATE_FAULTED; else if (is_degraded) vdev->v_state = VDEV_STATE_DEGRADED; else if (isnt_present) vdev->v_state = VDEV_STATE_CANT_OPEN; } rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids); /* * Its ok if we don't have any kids. */ if (rc == 0) { vdev->v_nchildren = nkids; for (i = 0; i < nkids; i++) { rc = vdev_init_from_nvlist(kids, vdev, &kid, is_newer); if (rc) return (rc); if (is_new) STAILQ_INSERT_TAIL(&vdev->v_children, kid, v_childlink); kids = nvlist_next(kids); } } else { vdev->v_nchildren = 0; } if (vdevp) *vdevp = vdev; return (0); } static void vdev_set_state(vdev_t *vdev) { vdev_t *kid; int good_kids; int bad_kids; /* * A mirror or raidz is healthy if all its kids are healthy. A * mirror is degraded if any of its kids is healthy; a raidz * is degraded if at most nparity kids are offline. */ if (STAILQ_FIRST(&vdev->v_children)) { good_kids = 0; bad_kids = 0; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { if (kid->v_state == VDEV_STATE_HEALTHY) good_kids++; else bad_kids++; } if (bad_kids == 0) { vdev->v_state = VDEV_STATE_HEALTHY; } else { if (vdev->v_read == vdev_mirror_read) { if (good_kids) { vdev->v_state = VDEV_STATE_DEGRADED; } else { vdev->v_state = VDEV_STATE_OFFLINE; } } else if (vdev->v_read == vdev_raidz_read) { if (bad_kids > vdev->v_nparity) { vdev->v_state = VDEV_STATE_OFFLINE; } else { vdev->v_state = VDEV_STATE_DEGRADED; } } } } } static spa_t * spa_find_by_guid(uint64_t guid) { spa_t *spa; STAILQ_FOREACH(spa, &zfs_pools, spa_link) if (spa->spa_guid == guid) return (spa); return (0); } static spa_t * spa_find_by_name(const char *name) { spa_t *spa; STAILQ_FOREACH(spa, &zfs_pools, spa_link) if (!strcmp(spa->spa_name, name)) return (spa); return (0); } #ifdef BOOT2 static spa_t * spa_get_primary(void) { return (STAILQ_FIRST(&zfs_pools)); } static vdev_t * spa_get_primary_vdev(const spa_t *spa) { vdev_t *vdev; vdev_t *kid; if (spa == NULL) spa = spa_get_primary(); if (spa == NULL) return (NULL); vdev = STAILQ_FIRST(&spa->spa_vdevs); if (vdev == NULL) return (NULL); for (kid = STAILQ_FIRST(&vdev->v_children); kid != NULL; kid = STAILQ_FIRST(&vdev->v_children)) vdev = kid; return (vdev); } #endif static spa_t * spa_create(uint64_t guid) { spa_t *spa; spa = malloc(sizeof(spa_t)); memset(spa, 0, sizeof(spa_t)); STAILQ_INIT(&spa->spa_vdevs); spa->spa_guid = guid; STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link); return (spa); } static const char * state_name(vdev_state_t state) { static const char* names[] = { "UNKNOWN", "CLOSED", "OFFLINE", "REMOVED", "CANT_OPEN", "FAULTED", "DEGRADED", "ONLINE" }; return names[state]; } #ifdef BOOT2 #define pager_printf printf #else static void pager_printf(const char *fmt, ...) { char line[80]; va_list args; va_start(args, fmt); vsprintf(line, fmt, args); va_end(args); pager_output(line); } #endif #define STATUS_FORMAT " %s %s\n" static void print_state(int indent, const char *name, vdev_state_t state) { int i; char buf[512]; buf[0] = 0; for (i = 0; i < indent; i++) strcat(buf, " "); strcat(buf, name); pager_printf(STATUS_FORMAT, buf, state_name(state)); } static void vdev_status(vdev_t *vdev, int indent) { vdev_t *kid; print_state(indent, vdev->v_name, vdev->v_state); STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { vdev_status(kid, indent + 1); } } static void spa_status(spa_t *spa) { static char bootfs[ZFS_MAXNAMELEN]; uint64_t rootid; vdev_t *vdev; int good_kids, bad_kids, degraded_kids; vdev_state_t state; pager_printf(" pool: %s\n", spa->spa_name); if (zfs_get_root(spa, &rootid) == 0 && zfs_rlookup(spa, rootid, bootfs) == 0) { if (bootfs[0] == '\0') pager_printf("bootfs: %s\n", spa->spa_name); else pager_printf("bootfs: %s/%s\n", spa->spa_name, bootfs); } pager_printf("config:\n\n"); pager_printf(STATUS_FORMAT, "NAME", "STATE"); good_kids = 0; degraded_kids = 0; bad_kids = 0; STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) { if (vdev->v_state == VDEV_STATE_HEALTHY) good_kids++; else if (vdev->v_state == VDEV_STATE_DEGRADED) degraded_kids++; else bad_kids++; } state = VDEV_STATE_CLOSED; if (good_kids > 0 && (degraded_kids + bad_kids) == 0) state = VDEV_STATE_HEALTHY; else if ((good_kids + degraded_kids) > 0) state = VDEV_STATE_DEGRADED; print_state(0, spa->spa_name, state); STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) { vdev_status(vdev, 1); } } static void spa_all_status(void) { spa_t *spa; int first = 1; STAILQ_FOREACH(spa, &zfs_pools, spa_link) { if (!first) pager_printf("\n"); first = 0; spa_status(spa); } } static int vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap) { vdev_t vtmp; vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch; spa_t *spa; vdev_t *vdev, *top_vdev, *pool_vdev; off_t off; blkptr_t bp; const unsigned char *nvlist; uint64_t val; uint64_t guid; uint64_t pool_txg, pool_guid; uint64_t is_log; const char *pool_name; const unsigned char *vdevs; const unsigned char *features; int i, rc, is_newer; char *upbuf; const struct uberblock *up; /* * Load the vdev label and figure out which * uberblock is most current. */ memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = read; vtmp.v_read_priv = read_priv; off = offsetof(vdev_label_t, vl_vdev_phys); BP_ZERO(&bp); BP_SET_LSIZE(&bp, sizeof(vdev_phys_t)); BP_SET_PSIZE(&bp, sizeof(vdev_phys_t)); BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); DVA_SET_OFFSET(BP_IDENTITY(&bp), off); ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); if (vdev_read_phys(&vtmp, &bp, vdev_label, off, 0)) return (EIO); if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) { return (EIO); } nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4; if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64, 0, &val)) { return (EIO); } if (!SPA_VERSION_IS_SUPPORTED(val)) { printf("ZFS: unsupported ZFS version %u (should be %u)\n", (unsigned) val, (unsigned) SPA_VERSION); return (EIO); } /* Check ZFS features for read */ if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, DATA_TYPE_NVLIST, 0, &features) == 0 && nvlist_check_features_for_read(features) != 0) return (EIO); if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64, 0, &val)) { return (EIO); } if (val == POOL_STATE_DESTROYED) { /* We don't boot only from destroyed pools. */ return (EIO); } if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, 0, &pool_txg) || nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, 0, &pool_guid) || nvlist_find(nvlist, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING, 0, &pool_name)) { /* * Cache and spare devices end up here - just ignore * them. */ /*printf("ZFS: can't find pool details\n");*/ return (EIO); } is_log = 0; (void) nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, 0, &is_log); if (is_log) return (EIO); /* * Create the pool if this is the first time we've seen it. */ spa = spa_find_by_guid(pool_guid); if (!spa) { spa = spa_create(pool_guid); spa->spa_name = strdup(pool_name); } if (pool_txg > spa->spa_txg) { spa->spa_txg = pool_txg; is_newer = 1; } else is_newer = 0; /* * Get the vdev tree and create our in-core copy of it. * If we already have a vdev with this guid, this must * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0, &guid)) { return (EIO); } vdev = vdev_find(guid); if (vdev && vdev->v_phys_read) /* Has this vdev already been inited? */ return (EIO); if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, 0, &vdevs)) { return (EIO); } rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer); if (rc) return (rc); /* * Add the toplevel vdev to the pool if its not already there. */ STAILQ_FOREACH(pool_vdev, &spa->spa_vdevs, v_childlink) if (top_vdev == pool_vdev) break; if (!pool_vdev && top_vdev) STAILQ_INSERT_TAIL(&spa->spa_vdevs, top_vdev, v_childlink); /* * We should already have created an incomplete vdev for this * vdev. Find it and initialise it with our read proc. */ vdev = vdev_find(guid); if (vdev) { vdev->v_phys_read = read; vdev->v_read_priv = read_priv; vdev->v_state = VDEV_STATE_HEALTHY; } else { printf("ZFS: inconsistent nvlist contents\n"); return (EIO); } /* * Re-evaluate top-level vdev state. */ vdev_set_state(top_vdev); /* * Ok, we are happy with the pool so far. Lets find * the best uberblock and then we can actually access * the contents of the pool. */ upbuf = zfs_alloc(VDEV_UBERBLOCK_SIZE(vdev)); up = (const struct uberblock *)upbuf; for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdev); i++) { off = VDEV_UBERBLOCK_OFFSET(vdev, i); BP_ZERO(&bp); DVA_SET_OFFSET(&bp.blk_dva[0], off); BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); if (vdev_read_phys(vdev, &bp, upbuf, off, 0)) continue; if (up->ub_magic != UBERBLOCK_MAGIC) continue; if (up->ub_txg < spa->spa_txg) continue; if (up->ub_txg > spa->spa_uberblock.ub_txg) { spa->spa_uberblock = *up; } else if (up->ub_txg == spa->spa_uberblock.ub_txg) { if (up->ub_timestamp > spa->spa_uberblock.ub_timestamp) spa->spa_uberblock = *up; } } zfs_free(upbuf, VDEV_UBERBLOCK_SIZE(vdev)); if (spap) *spap = spa; return (0); } static int ilog2(int n) { int v; for (v = 0; v < 32; v++) if (n == (1 << v)) return v; return -1; } static int zio_read_gang(const spa_t *spa, const blkptr_t *bp, void *buf) { blkptr_t gbh_bp; zio_gbh_phys_t zio_gb; char *pbuf; int i; /* Artificial BP for gang block header. */ gbh_bp = *bp; BP_SET_PSIZE(&gbh_bp, SPA_GANGBLOCKSIZE); BP_SET_LSIZE(&gbh_bp, SPA_GANGBLOCKSIZE); BP_SET_CHECKSUM(&gbh_bp, ZIO_CHECKSUM_GANG_HEADER); BP_SET_COMPRESS(&gbh_bp, ZIO_COMPRESS_OFF); for (i = 0; i < SPA_DVAS_PER_BP; i++) DVA_SET_GANG(&gbh_bp.blk_dva[i], 0); /* Read gang header block using the artificial BP. */ if (zio_read(spa, &gbh_bp, &zio_gb)) return (EIO); pbuf = buf; for (i = 0; i < SPA_GBH_NBLKPTRS; i++) { blkptr_t *gbp = &zio_gb.zg_blkptr[i]; if (BP_IS_HOLE(gbp)) continue; if (zio_read(spa, gbp, pbuf)) return (EIO); pbuf += BP_GET_PSIZE(gbp); } if (zio_checksum_verify(bp, buf)) return (EIO); return (0); } static int zio_read(const spa_t *spa, const blkptr_t *bp, void *buf) { int cpfunc = BP_GET_COMPRESS(bp); uint64_t align, size; void *pbuf; int i, error; /* * Process data embedded in block pointer */ if (BP_IS_EMBEDDED(bp)) { ASSERT(BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA); size = BPE_GET_PSIZE(bp); ASSERT(size <= BPE_PAYLOAD_SIZE); if (cpfunc != ZIO_COMPRESS_OFF) pbuf = zfs_alloc(size); else pbuf = buf; decode_embedded_bp_compressed(bp, pbuf); error = 0; if (cpfunc != ZIO_COMPRESS_OFF) { error = zio_decompress_data(cpfunc, pbuf, size, buf, BP_GET_LSIZE(bp)); zfs_free(pbuf, size); } if (error != 0) printf("ZFS: i/o error - unable to decompress block pointer data, error %d\n", error); return (error); } error = EIO; for (i = 0; i < SPA_DVAS_PER_BP; i++) { const dva_t *dva = &bp->blk_dva[i]; vdev_t *vdev; int vdevid; off_t offset; if (!dva->dva_word[0] && !dva->dva_word[1]) continue; vdevid = DVA_GET_VDEV(dva); offset = DVA_GET_OFFSET(dva); STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) { if (vdev->v_id == vdevid) break; } if (!vdev || !vdev->v_read) continue; size = BP_GET_PSIZE(bp); if (vdev->v_read == vdev_raidz_read) { align = 1ULL << vdev->v_top->v_ashift; if (P2PHASE(size, align) != 0) size = P2ROUNDUP(size, align); } if (size != BP_GET_PSIZE(bp) || cpfunc != ZIO_COMPRESS_OFF) pbuf = zfs_alloc(size); else pbuf = buf; if (DVA_GET_GANG(dva)) error = zio_read_gang(spa, bp, pbuf); else error = vdev->v_read(vdev, bp, pbuf, offset, size); if (error == 0) { if (cpfunc != ZIO_COMPRESS_OFF) error = zio_decompress_data(cpfunc, pbuf, BP_GET_PSIZE(bp), buf, BP_GET_LSIZE(bp)); else if (size != BP_GET_PSIZE(bp)) bcopy(pbuf, buf, BP_GET_PSIZE(bp)); } if (buf != pbuf) zfs_free(pbuf, size); if (error == 0) break; } if (error != 0) printf("ZFS: i/o error - all block copies unavailable\n"); return (error); } static int dnode_read(const spa_t *spa, const dnode_phys_t *dnode, off_t offset, void *buf, size_t buflen) { int ibshift = dnode->dn_indblkshift - SPA_BLKPTRSHIFT; int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; int nlevels = dnode->dn_nlevels; int i, rc; if (bsize > SPA_MAXBLOCKSIZE) { printf("ZFS: I/O error - blocks larger than 128K are not supported\n"); return (EIO); } /* * Note: bsize may not be a power of two here so we need to do an * actual divide rather than a bitshift. */ while (buflen > 0) { uint64_t bn = offset / bsize; int boff = offset % bsize; int ibn; const blkptr_t *indbp; blkptr_t bp; if (bn > dnode->dn_maxblkid) return (EIO); if (dnode == dnode_cache_obj && bn == dnode_cache_bn) goto cached; indbp = dnode->dn_blkptr; for (i = 0; i < nlevels; i++) { /* * Copy the bp from the indirect array so that * we can re-use the scratch buffer for multi-level * objects. */ ibn = bn >> ((nlevels - i - 1) * ibshift); ibn &= ((1 << ibshift) - 1); bp = indbp[ibn]; if (BP_IS_HOLE(&bp)) { memset(dnode_cache_buf, 0, bsize); break; } rc = zio_read(spa, &bp, dnode_cache_buf); if (rc) return (rc); indbp = (const blkptr_t *) dnode_cache_buf; } dnode_cache_obj = dnode; dnode_cache_bn = bn; cached: /* * The buffer contains our data block. Copy what we * need from it and loop. */ i = bsize - boff; if (i > buflen) i = buflen; memcpy(buf, &dnode_cache_buf[boff], i); buf = ((char*) buf) + i; offset += i; buflen -= i; } return (0); } /* * Lookup a value in a microzap directory. Assumes that the zap * scratch buffer contains the directory contents. */ static int mzap_lookup(const dnode_phys_t *dnode, const char *name, uint64_t *value) { const mzap_phys_t *mz; const mzap_ent_phys_t *mze; size_t size; int chunks, i; /* * Microzap objects use exactly one block. Read the whole * thing. */ size = dnode->dn_datablkszsec * 512; mz = (const mzap_phys_t *) zap_scratch; chunks = size / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { mze = &mz->mz_chunk[i]; if (!strcmp(mze->mze_name, name)) { *value = mze->mze_value; return (0); } } return (ENOENT); } /* * Compare a name with a zap leaf entry. Return non-zero if the name * matches. */ static int fzap_name_equal(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, const char *name) { size_t namelen; const zap_leaf_chunk_t *nc; const char *p; namelen = zc->l_entry.le_name_numints; nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk); p = name; while (namelen > 0) { size_t len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; if (memcmp(p, nc->l_array.la_array, len)) return (0); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next); } return 1; } /* * Extract a uint64_t value from a zap leaf entry. */ static uint64_t fzap_leaf_value(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc) { const zap_leaf_chunk_t *vc; int i; uint64_t value; const uint8_t *p; vc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_value_chunk); for (i = 0, value = 0, p = vc->l_array.la_array; i < 8; i++) { value = (value << 8) | p[i]; } return value; } /* * Lookup a value in a fatzap directory. Assumes that the zap scratch * buffer contains the directory header. */ static int fzap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value) { int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; zap_phys_t zh = *(zap_phys_t *) zap_scratch; fat_zap_t z; uint64_t *ptrtbl; uint64_t hash; int rc; if (zh.zap_magic != ZAP_MAGIC) return (EIO); z.zap_block_shift = ilog2(bsize); z.zap_phys = (zap_phys_t *) zap_scratch; /* * Figure out where the pointer table is and read it in if necessary. */ if (zh.zap_ptrtbl.zt_blk) { rc = dnode_read(spa, dnode, zh.zap_ptrtbl.zt_blk * bsize, zap_scratch, bsize); if (rc) return (rc); ptrtbl = (uint64_t *) zap_scratch; } else { ptrtbl = &ZAP_EMBEDDED_PTRTBL_ENT(&z, 0); } hash = zap_hash(zh.zap_salt, name); zap_leaf_t zl; zl.l_bs = z.zap_block_shift; off_t off = ptrtbl[hash >> (64 - zh.zap_ptrtbl.zt_shift)] << zl.l_bs; zap_leaf_chunk_t *zc; rc = dnode_read(spa, dnode, off, zap_scratch, bsize); if (rc) return (rc); zl.l_phys = (zap_leaf_phys_t *) zap_scratch; /* * Make sure this chunk matches our hash. */ if (zl.l_phys->l_hdr.lh_prefix_len > 0 && zl.l_phys->l_hdr.lh_prefix != hash >> (64 - zl.l_phys->l_hdr.lh_prefix_len)) return (ENOENT); /* * Hash within the chunk to find our entry. */ int shift = (64 - ZAP_LEAF_HASH_SHIFT(&zl) - zl.l_phys->l_hdr.lh_prefix_len); int h = (hash >> shift) & ((1 << ZAP_LEAF_HASH_SHIFT(&zl)) - 1); h = zl.l_phys->l_hash[h]; if (h == 0xffff) return (ENOENT); zc = &ZAP_LEAF_CHUNK(&zl, h); while (zc->l_entry.le_hash != hash) { if (zc->l_entry.le_next == 0xffff) { zc = 0; break; } zc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_next); } if (fzap_name_equal(&zl, zc, name)) { if (zc->l_entry.le_value_intlen * zc->l_entry.le_value_numints > 8) return (E2BIG); *value = fzap_leaf_value(&zl, zc); return (0); } return (ENOENT); } /* * Lookup a name in a zap object and return its value as a uint64_t. */ static int zap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value) { int rc; uint64_t zap_type; size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; rc = dnode_read(spa, dnode, 0, zap_scratch, size); if (rc) return (rc); zap_type = *(uint64_t *) zap_scratch; if (zap_type == ZBT_MICRO) return mzap_lookup(dnode, name, value); else if (zap_type == ZBT_HEADER) return fzap_lookup(spa, dnode, name, value); printf("ZFS: invalid zap_type=%d\n", (int)zap_type); return (EIO); } /* * List a microzap directory. Assumes that the zap scratch buffer contains * the directory contents. */ static int -mzap_list(const dnode_phys_t *dnode, int (*callback)(const char *)) +mzap_list(const dnode_phys_t *dnode, int (*callback)(const char *, uint64_t)) { const mzap_phys_t *mz; const mzap_ent_phys_t *mze; size_t size; - int chunks, i; + int chunks, i, rc; /* * Microzap objects use exactly one block. Read the whole * thing. */ size = dnode->dn_datablkszsec * 512; mz = (const mzap_phys_t *) zap_scratch; chunks = size / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { mze = &mz->mz_chunk[i]; - if (mze->mze_name[0]) - //printf("%-32s 0x%jx\n", mze->mze_name, (uintmax_t)mze->mze_value); - callback(mze->mze_name); + if (mze->mze_name[0]) { + rc = callback(mze->mze_name, mze->mze_value); + if (rc != 0) + return (rc); + } } return (0); } /* * List a fatzap directory. Assumes that the zap scratch buffer contains * the directory header. */ static int -fzap_list(const spa_t *spa, const dnode_phys_t *dnode, int (*callback)(const char *)) +fzap_list(const spa_t *spa, const dnode_phys_t *dnode, int (*callback)(const char *, uint64_t)) { int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; zap_phys_t zh = *(zap_phys_t *) zap_scratch; fat_zap_t z; - int i, j; + int i, j, rc; if (zh.zap_magic != ZAP_MAGIC) return (EIO); z.zap_block_shift = ilog2(bsize); z.zap_phys = (zap_phys_t *) zap_scratch; /* * This assumes that the leaf blocks start at block 1. The * documentation isn't exactly clear on this. */ zap_leaf_t zl; zl.l_bs = z.zap_block_shift; for (i = 0; i < zh.zap_num_leafs; i++) { off_t off = (i + 1) << zl.l_bs; char name[256], *p; uint64_t value; if (dnode_read(spa, dnode, off, zap_scratch, bsize)) return (EIO); zl.l_phys = (zap_leaf_phys_t *) zap_scratch; for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) { zap_leaf_chunk_t *zc, *nc; int namelen; zc = &ZAP_LEAF_CHUNK(&zl, j); if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) continue; namelen = zc->l_entry.le_name_numints; if (namelen > sizeof(name)) namelen = sizeof(name); /* * Paste the name back together. */ nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); p = name; while (namelen > 0) { int len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; memcpy(p, nc->l_array.la_array, len); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); } /* * Assume the first eight bytes of the value are * a uint64_t. */ value = fzap_leaf_value(&zl, zc); //printf("%s 0x%jx\n", name, (uintmax_t)value); - callback((const char *)name); + rc = callback((const char *)name, value); + if (rc != 0) + return (rc); } } return (0); } -static int zfs_printf(const char *name) +static int zfs_printf(const char *name, uint64_t value __unused) { printf("%s\n", name); return (0); } /* * List a zap directory. */ static int zap_list(const spa_t *spa, const dnode_phys_t *dnode) { uint64_t zap_type; size_t size = dnode->dn_datablkszsec * 512; if (dnode_read(spa, dnode, 0, zap_scratch, size)) return (EIO); zap_type = *(uint64_t *) zap_scratch; if (zap_type == ZBT_MICRO) return mzap_list(dnode, zfs_printf); else return fzap_list(spa, dnode, zfs_printf); } static int objset_get_dnode(const spa_t *spa, const objset_phys_t *os, uint64_t objnum, dnode_phys_t *dnode) { off_t offset; offset = objnum * sizeof(dnode_phys_t); return dnode_read(spa, &os->os_meta_dnode, offset, dnode, sizeof(dnode_phys_t)); } static int mzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name, uint64_t value) { const mzap_phys_t *mz; const mzap_ent_phys_t *mze; size_t size; int chunks, i; /* * Microzap objects use exactly one block. Read the whole * thing. */ size = dnode->dn_datablkszsec * 512; mz = (const mzap_phys_t *) zap_scratch; chunks = size / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { mze = &mz->mz_chunk[i]; if (value == mze->mze_value) { strcpy(name, mze->mze_name); return (0); } } return (ENOENT); } static void fzap_name_copy(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, char *name) { size_t namelen; const zap_leaf_chunk_t *nc; char *p; namelen = zc->l_entry.le_name_numints; nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk); p = name; while (namelen > 0) { size_t len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; memcpy(p, nc->l_array.la_array, len); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next); } *p = '\0'; } static int fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name, uint64_t value) { int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; zap_phys_t zh = *(zap_phys_t *) zap_scratch; fat_zap_t z; int i, j; if (zh.zap_magic != ZAP_MAGIC) return (EIO); z.zap_block_shift = ilog2(bsize); z.zap_phys = (zap_phys_t *) zap_scratch; /* * This assumes that the leaf blocks start at block 1. The * documentation isn't exactly clear on this. */ zap_leaf_t zl; zl.l_bs = z.zap_block_shift; for (i = 0; i < zh.zap_num_leafs; i++) { off_t off = (i + 1) << zl.l_bs; if (dnode_read(spa, dnode, off, zap_scratch, bsize)) return (EIO); zl.l_phys = (zap_leaf_phys_t *) zap_scratch; for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) { zap_leaf_chunk_t *zc; zc = &ZAP_LEAF_CHUNK(&zl, j); if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) continue; if (zc->l_entry.le_value_intlen != 8 || zc->l_entry.le_value_numints != 1) continue; if (fzap_leaf_value(&zl, zc) == value) { fzap_name_copy(&zl, zc, name); return (0); } } } return (ENOENT); } static int zap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name, uint64_t value) { int rc; uint64_t zap_type; size_t size = dnode->dn_datablkszsec * 512; rc = dnode_read(spa, dnode, 0, zap_scratch, size); if (rc) return (rc); zap_type = *(uint64_t *) zap_scratch; if (zap_type == ZBT_MICRO) return mzap_rlookup(spa, dnode, name, value); else return fzap_rlookup(spa, dnode, name, value); } static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result) { char name[256]; char component[256]; uint64_t dir_obj, parent_obj, child_dir_zapobj; dnode_phys_t child_dir_zap, dataset, dir, parent; dsl_dir_phys_t *dd; dsl_dataset_phys_t *ds; char *p; int len; p = &name[sizeof(name) - 1]; *p = '\0'; if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (EIO); } ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; dir_obj = ds->ds_dir_obj; for (;;) { if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir) != 0) return (EIO); dd = (dsl_dir_phys_t *)&dir.dn_bonus; /* Actual loop condition. */ parent_obj = dd->dd_parent_obj; if (parent_obj == 0) break; if (objset_get_dnode(spa, &spa->spa_mos, parent_obj, &parent) != 0) return (EIO); dd = (dsl_dir_phys_t *)&parent.dn_bonus; child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) return (EIO); if (zap_rlookup(spa, &child_dir_zap, component, dir_obj) != 0) return (EIO); len = strlen(component); p -= len; memcpy(p, component, len); --p; *p = '/'; /* Actual loop iteration. */ dir_obj = parent_obj; } if (*p != '\0') ++p; strcpy(result, p); return (0); } static int zfs_lookup_dataset(const spa_t *spa, const char *name, uint64_t *objnum) { char element[256]; uint64_t dir_obj, child_dir_zapobj; dnode_phys_t child_dir_zap, dir; dsl_dir_phys_t *dd; const char *p, *q; if (objset_get_dnode(spa, &spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) return (EIO); if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, &dir_obj)) return (EIO); p = name; for (;;) { if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir)) return (EIO); dd = (dsl_dir_phys_t *)&dir.dn_bonus; while (*p == '/') p++; /* Actual loop condition #1. */ if (*p == '\0') break; q = strchr(p, '/'); if (q) { memcpy(element, p, q - p); element[q - p] = '\0'; p = q + 1; } else { strcpy(element, p); p += strlen(p); } child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) return (EIO); /* Actual loop condition #2. */ if (zap_lookup(spa, &child_dir_zap, element, &dir_obj) != 0) return (ENOENT); } *objnum = dd->dd_head_dataset_obj; return (0); } #ifndef BOOT2 static int zfs_list_dataset(const spa_t *spa, uint64_t objnum/*, int pos, char *entry*/) { uint64_t dir_obj, child_dir_zapobj; dnode_phys_t child_dir_zap, dir, dataset; dsl_dataset_phys_t *ds; dsl_dir_phys_t *dd; if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (EIO); } ds = (dsl_dataset_phys_t *) &dataset.dn_bonus; dir_obj = ds->ds_dir_obj; if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir)) { printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj); return (EIO); } dd = (dsl_dir_phys_t *)&dir.dn_bonus; child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) { printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj); return (EIO); } return (zap_list(spa, &child_dir_zap) != 0); } int -zfs_callback_dataset(const spa_t *spa, uint64_t objnum, int (*callback)(const char *name)) +zfs_callback_dataset(const spa_t *spa, uint64_t objnum, int (*callback)(const char *, uint64_t)) { uint64_t dir_obj, child_dir_zapobj, zap_type; dnode_phys_t child_dir_zap, dir, dataset; dsl_dataset_phys_t *ds; dsl_dir_phys_t *dd; int err; err = objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset); if (err != 0) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (err); } ds = (dsl_dataset_phys_t *) &dataset.dn_bonus; dir_obj = ds->ds_dir_obj; err = objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir); if (err != 0) { printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj); return (err); } dd = (dsl_dir_phys_t *)&dir.dn_bonus; child_dir_zapobj = dd->dd_child_dir_zapobj; err = objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap); if (err != 0) { printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj); return (err); } err = dnode_read(spa, &child_dir_zap, 0, zap_scratch, child_dir_zap.dn_datablkszsec * 512); if (err != 0) return (err); zap_type = *(uint64_t *) zap_scratch; if (zap_type == ZBT_MICRO) return mzap_list(&child_dir_zap, callback); else return fzap_list(spa, &child_dir_zap, callback); } #endif /* * Find the object set given the object number of its dataset object * and return its details in *objset */ static int zfs_mount_dataset(const spa_t *spa, uint64_t objnum, objset_phys_t *objset) { dnode_phys_t dataset; dsl_dataset_phys_t *ds; if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (EIO); } ds = (dsl_dataset_phys_t *) &dataset.dn_bonus; if (zio_read(spa, &ds->ds_bp, objset)) { printf("ZFS: can't read object set for dataset %ju\n", (uintmax_t)objnum); return (EIO); } return (0); } /* * Find the object set pointed to by the BOOTFS property or the root * dataset if there is none and return its details in *objset */ static int zfs_get_root(const spa_t *spa, uint64_t *objid) { dnode_phys_t dir, propdir; uint64_t props, bootfs, root; *objid = 0; /* * Start with the MOS directory object. */ if (objset_get_dnode(spa, &spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) { printf("ZFS: can't read MOS object directory\n"); return (EIO); } /* * Lookup the pool_props and see if we can find a bootfs. */ if (zap_lookup(spa, &dir, DMU_POOL_PROPS, &props) == 0 && objset_get_dnode(spa, &spa->spa_mos, props, &propdir) == 0 && zap_lookup(spa, &propdir, "bootfs", &bootfs) == 0 && bootfs != 0) { *objid = bootfs; return (0); } /* * Lookup the root dataset directory */ if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, &root) || objset_get_dnode(spa, &spa->spa_mos, root, &dir)) { printf("ZFS: can't find root dsl_dir\n"); return (EIO); } /* * Use the information from the dataset directory's bonus buffer * to find the dataset object and from that the object set itself. */ dsl_dir_phys_t *dd = (dsl_dir_phys_t *) &dir.dn_bonus; *objid = dd->dd_head_dataset_obj; return (0); } static int zfs_mount(const spa_t *spa, uint64_t rootobj, struct zfsmount *mount) { mount->spa = spa; /* * Find the root object set if not explicitly provided */ if (rootobj == 0 && zfs_get_root(spa, &rootobj)) { printf("ZFS: can't find root filesystem\n"); return (EIO); } if (zfs_mount_dataset(spa, rootobj, &mount->objset)) { printf("ZFS: can't open root filesystem\n"); return (EIO); } mount->rootobj = rootobj; return (0); } +/* + * callback function for feature name checks. + */ static int +check_feature(const char *name, uint64_t value) +{ + int i; + + if (value == 0) + return (0); + if (name[0] == '\0') + return (0); + + for (i = 0; features_for_read[i] != NULL; i++) { + if (strcmp(name, features_for_read[i]) == 0) + return (0); + } + printf("ZFS: unsupported feature: %s\n", name); + return (EIO); +} + +/* + * Checks whether the MOS features that are active are supported. + */ +static int +check_mos_features(const spa_t *spa) +{ + dnode_phys_t dir; + uint64_t objnum, zap_type; + size_t size; + int rc; + + if ((rc = objset_get_dnode(spa, &spa->spa_mos, DMU_OT_OBJECT_DIRECTORY, + &dir)) != 0) + return (rc); + if ((rc = zap_lookup(spa, &dir, DMU_POOL_FEATURES_FOR_READ, &objnum)) != 0) + return (rc); + + if ((rc = objset_get_dnode(spa, &spa->spa_mos, objnum, &dir)) != 0) + return (rc); + + if (dir.dn_type != DMU_OTN_ZAP_METADATA) + return (EIO); + + size = dir.dn_datablkszsec * 512; + if (dnode_read(spa, &dir, 0, zap_scratch, size)) + return (EIO); + + zap_type = *(uint64_t *) zap_scratch; + if (zap_type == ZBT_MICRO) + rc = mzap_list(&dir, check_feature); + else + rc = fzap_list(spa, &dir, check_feature); + + return (rc); +} + +static int zfs_spa_init(spa_t *spa) { + int rc; if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) { printf("ZFS: can't read MOS of pool %s\n", spa->spa_name); return (EIO); } if (spa->spa_mos.os_type != DMU_OST_META) { printf("ZFS: corrupted MOS of pool %s\n", spa->spa_name); return (EIO); } - return (0); + + rc = check_mos_features(spa); + if (rc != 0) { + printf("ZFS: pool %s is not supported\n", spa->spa_name); + } + + return (rc); } static int zfs_dnode_stat(const spa_t *spa, dnode_phys_t *dn, struct stat *sb) { if (dn->dn_bonustype != DMU_OT_SA) { znode_phys_t *zp = (znode_phys_t *)dn->dn_bonus; sb->st_mode = zp->zp_mode; sb->st_uid = zp->zp_uid; sb->st_gid = zp->zp_gid; sb->st_size = zp->zp_size; } else { sa_hdr_phys_t *sahdrp; int hdrsize; size_t size = 0; void *buf = NULL; if (dn->dn_bonuslen != 0) sahdrp = (sa_hdr_phys_t *)DN_BONUS(dn); else { if ((dn->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0) { blkptr_t *bp = &dn->dn_spill; int error; size = BP_GET_LSIZE(bp); buf = zfs_alloc(size); error = zio_read(spa, bp, buf); if (error != 0) { zfs_free(buf, size); return (error); } sahdrp = buf; } else { return (EIO); } } hdrsize = SA_HDR_SIZE(sahdrp); sb->st_mode = *(uint64_t *)((char *)sahdrp + hdrsize + SA_MODE_OFFSET); sb->st_uid = *(uint64_t *)((char *)sahdrp + hdrsize + SA_UID_OFFSET); sb->st_gid = *(uint64_t *)((char *)sahdrp + hdrsize + SA_GID_OFFSET); sb->st_size = *(uint64_t *)((char *)sahdrp + hdrsize + SA_SIZE_OFFSET); if (buf != NULL) zfs_free(buf, size); } return (0); } /* * Lookup a file and return its dnode. */ static int zfs_lookup(const struct zfsmount *mount, const char *upath, dnode_phys_t *dnode) { int rc; uint64_t objnum, rootnum, parentnum; const spa_t *spa; dnode_phys_t dn; const char *p, *q; char element[256]; char path[1024]; int symlinks_followed = 0; struct stat sb; spa = mount->spa; if (mount->objset.os_type != DMU_OST_ZFS) { printf("ZFS: unexpected object set type %ju\n", (uintmax_t)mount->objset.os_type); return (EIO); } /* * Get the root directory dnode. */ rc = objset_get_dnode(spa, &mount->objset, MASTER_NODE_OBJ, &dn); if (rc) return (rc); rc = zap_lookup(spa, &dn, ZFS_ROOT_OBJ, &rootnum); if (rc) return (rc); rc = objset_get_dnode(spa, &mount->objset, rootnum, &dn); if (rc) return (rc); objnum = rootnum; p = upath; while (p && *p) { while (*p == '/') p++; if (!*p) break; q = strchr(p, '/'); if (q) { memcpy(element, p, q - p); element[q - p] = 0; p = q; } else { strcpy(element, p); p = 0; } rc = zfs_dnode_stat(spa, &dn, &sb); if (rc) return (rc); if (!S_ISDIR(sb.st_mode)) return (ENOTDIR); parentnum = objnum; rc = zap_lookup(spa, &dn, element, &objnum); if (rc) return (rc); objnum = ZFS_DIRENT_OBJ(objnum); rc = objset_get_dnode(spa, &mount->objset, objnum, &dn); if (rc) return (rc); /* * Check for symlink. */ rc = zfs_dnode_stat(spa, &dn, &sb); if (rc) return (rc); if (S_ISLNK(sb.st_mode)) { if (symlinks_followed > 10) return (EMLINK); symlinks_followed++; /* * Read the link value and copy the tail of our * current path onto the end. */ if (p) strcpy(&path[sb.st_size], p); else path[sb.st_size] = 0; /* * Second test is purely to silence bogus compiler * warning about accessing past the end of dn_bonus. */ if (sb.st_size + sizeof(znode_phys_t) <= dn.dn_bonuslen && sizeof(znode_phys_t) <= sizeof(dn.dn_bonus)) { memcpy(path, &dn.dn_bonus[sizeof(znode_phys_t)], sb.st_size); } else { rc = dnode_read(spa, &dn, 0, path, sb.st_size); if (rc) return (rc); } /* * Restart with the new path, starting either at * the root or at the parent depending whether or * not the link is relative. */ p = path; if (*p == '/') objnum = rootnum; else objnum = parentnum; objset_get_dnode(spa, &mount->objset, objnum, &dn); } } *dnode = dn; return (0); } Index: user/alc/PQ_LAUNDRY/sys/cddl/boot/zfs/zfsimpl.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/cddl/boot/zfs/zfsimpl.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/cddl/boot/zfs/zfsimpl.h (revision 303642) @@ -1,1453 +1,1505 @@ /*- * Copyright (c) 2002 McAfee, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and McAfee Research,, the Security Research Division of * McAfee, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as * part of the DARPA CHATS research program * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2013 by Saso Kiselkov. All rights reserved. */ /* * Copyright (c) 2013 by Delphix. All rights reserved. */ #define MAXNAMELEN 256 #define _NOTE(s) +typedef enum { B_FALSE, B_TRUE } boolean_t; + /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ /* * Macros for various sorts of alignment and rounding when the alignment * is known to be a power of 2. */ #define P2ALIGN(x, align) ((x) & -(align)) #define P2PHASE(x, align) ((x) & ((align) - 1)) #define P2NPHASE(x, align) (-(x) & ((align) - 1)) #define P2ROUNDUP(x, align) (-(-(x) & -(align))) #define P2END(x, align) (-(~(x) & -(align))) #define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align))) #define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1) /* * General-purpose 32-bit and 64-bit bitfield encodings. */ #define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len)) #define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len)) #define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low)) #define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low)) #define BF32_GET(x, low, len) BF32_DECODE(x, low, len) #define BF64_GET(x, low, len) BF64_DECODE(x, low, len) #define BF32_SET(x, low, len, val) \ ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len)) #define BF64_SET(x, low, len, val) \ ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)) #define BF32_GET_SB(x, low, len, shift, bias) \ ((BF32_GET(x, low, len) + (bias)) << (shift)) #define BF64_GET_SB(x, low, len, shift, bias) \ ((BF64_GET(x, low, len) + (bias)) << (shift)) #define BF32_SET_SB(x, low, len, shift, bias, val) \ BF32_SET(x, low, len, ((val) >> (shift)) - (bias)) #define BF64_SET_SB(x, low, len, shift, bias, val) \ BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) /* * Macros to reverse byte order */ #define BSWAP_8(x) ((x) & 0xff) #define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8)) #define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16)) #define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) /* * Note: the boot loader can't actually read blocks larger than 128KB, * due to lack of memory. Therefore its SPA_MAXBLOCKSIZE is still 128KB. */ #define SPA_MINBLOCKSHIFT 9 #define SPA_MAXBLOCKSHIFT 17 #define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) #define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT) /* * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. * The ASIZE encoding should be at least 64 times larger (6 more bits) * to support up to 4-way RAID-Z mirror mode with worst-case gang block * overhead, three DVAs per bp, plus one more bit in case we do anything * else that expands the ASIZE. */ #define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */ #define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */ #define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */ /* * All SPA data is represented by 128-bit data virtual addresses (DVAs). * The members of the dva_t should be considered opaque outside the SPA. */ typedef struct dva { uint64_t dva_word[2]; } dva_t; /* * Each block has a 256-bit checksum -- strong enough for cryptographic hashes. */ typedef struct zio_cksum { uint64_t zc_word[4]; } zio_cksum_t; /* * Each block is described by its DVAs, time of birth, checksum, etc. * The word-by-word, bit-by-bit layout of the blkptr is as follows: * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ * 0 | vdev1 | GRID | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 1 |G| offset1 | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 2 | vdev2 | GRID | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 3 |G| offset2 | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 4 | vdev3 | GRID | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 5 |G| offset3 | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 7 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 9 | physical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ * a | logical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ * b | fill count | * +-------+-------+-------+-------+-------+-------+-------+-------+ * c | checksum[0] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * d | checksum[1] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * e | checksum[2] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * f | checksum[3] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * * Legend: * * vdev virtual device ID * offset offset into virtual device * LSIZE logical size * PSIZE physical size (after compression) * ASIZE allocated size (including RAID-Z parity and gang block headers) * GRID RAID-Z layout information (reserved for future use) * cksum checksum function * comp compression function * G gang block indicator * B byteorder (endianness) * D dedup * X encryption (on version 30, which is not supported) * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type * phys birth txg of block allocation; zero if same as logical birth txg * log. birth transaction group in which the block was logically born * fill count number of non-zero blocks under this bp * checksum[4] 256-bit checksum of the data this bp describes */ /* * "Embedded" blkptr_t's don't actually point to a block, instead they * have a data payload embedded in the blkptr_t itself. See the comment * in blkptr.c for more details. * * The blkptr_t is laid out as follows: * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ * 0 | payload | * 1 | payload | * 2 | payload | * 3 | payload | * 4 | payload | * 5 | payload | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 7 | payload | * 8 | payload | * 9 | payload | * +-------+-------+-------+-------+-------+-------+-------+-------+ * a | logical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ * b | payload | * c | payload | * d | payload | * e | payload | * f | payload | * +-------+-------+-------+-------+-------+-------+-------+-------+ * * Legend: * * payload contains the embedded data * B (byteorder) byteorder (endianness) * D (dedup) padding (set to zero) * X encryption (set to zero; see above) * E (embedded) set to one * lvl indirection level * type DMU object type * etype how to interpret embedded data (BP_EMBEDDED_TYPE_*) * comp compression function of payload * PSIZE size of payload after compression, in bytes * LSIZE logical size of payload, in bytes * note that 25 bits is enough to store the largest * "normal" BP's LSIZE (2^16 * 2^9) in bytes * log. birth transaction group in which the block was logically born * * Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded * bp's they are stored in units of SPA_MINBLOCKSHIFT. * Generally, the generic BP_GET_*() macros can be used on embedded BP's. * The B, D, X, lvl, type, and comp fields are stored the same as with normal * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before * other macros, as they assert that they are only used on BP's of the correct * "embedded-ness". */ #define BPE_GET_ETYPE(bp) \ (ASSERT(BP_IS_EMBEDDED(bp)), \ BF64_GET((bp)->blk_prop, 40, 8)) #define BPE_SET_ETYPE(bp, t) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ BF64_SET((bp)->blk_prop, 40, 8, t); \ _NOTE(CONSTCOND) } while (0) #define BPE_GET_LSIZE(bp) \ (ASSERT(BP_IS_EMBEDDED(bp)), \ BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1)) #define BPE_SET_LSIZE(bp, x) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \ _NOTE(CONSTCOND) } while (0) #define BPE_GET_PSIZE(bp) \ (ASSERT(BP_IS_EMBEDDED(bp)), \ BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1)) #define BPE_SET_PSIZE(bp, x) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \ _NOTE(CONSTCOND) } while (0) typedef enum bp_embedded_type { BP_EMBEDDED_TYPE_DATA, BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */ NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED } bp_embedded_type_t; #define BPE_NUM_WORDS 14 #define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t)) #define BPE_IS_PAYLOADWORD(bp, wp) \ ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) #define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ #define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ typedef struct blkptr { dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ uint64_t blk_prop; /* size, compression, type, etc */ uint64_t blk_pad[2]; /* Extra space for the future */ uint64_t blk_phys_birth; /* txg when block was allocated */ uint64_t blk_birth; /* transaction group at birth */ uint64_t blk_fill; /* fill count */ zio_cksum_t blk_cksum; /* 256-bit checksum */ } blkptr_t; /* * Macros to get and set fields in a bp or DVA. */ #define DVA_GET_ASIZE(dva) \ BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0) #define DVA_SET_ASIZE(dva, x) \ BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \ SPA_MINBLOCKSHIFT, 0, x) #define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8) #define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x) #define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32) #define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x) #define DVA_GET_OFFSET(dva) \ BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0) #define DVA_SET_OFFSET(dva, x) \ BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x) #define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1) #define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) #define BP_GET_LSIZE(bp) \ (BP_IS_EMBEDDED(bp) ? \ (BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \ BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)) #define BP_SET_LSIZE(bp, x) do { \ ASSERT(!BP_IS_EMBEDDED(bp)); \ BF64_SET_SB((bp)->blk_prop, \ 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \ _NOTE(CONSTCOND) } while (0) #define BP_GET_PSIZE(bp) \ BF64_GET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1) #define BP_SET_PSIZE(bp, x) \ BF64_SET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) #define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7) #define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x) #define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) #define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) #define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) #define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) #define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1) #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) #define BP_GET_BYTEORDER(bp) BF64_GET((bp)->blk_prop, 63, 1) #define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) #define BP_PHYSICAL_BIRTH(bp) \ ((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth) #define BP_GET_ASIZE(bp) \ (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[2])) #define BP_GET_UCSIZE(bp) \ ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); #define BP_GET_NDVAS(bp) \ (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) #define DVA_EQUAL(dva1, dva2) \ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ (dva1)->dva_word[0] == (dva2)->dva_word[0]) #define ZIO_CHECKSUM_EQUAL(zc1, zc2) \ (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ ((zc1).zc_word[1] - (zc2).zc_word[1]) | \ ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ ((zc1).zc_word[3] - (zc2).zc_word[3]))) #define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0) #define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \ { \ (zcp)->zc_word[0] = w0; \ (zcp)->zc_word[1] = w1; \ (zcp)->zc_word[2] = w2; \ (zcp)->zc_word[3] = w3; \ } #define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) #define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) #define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \ (dva)->dva_word[1] == 0ULL) #define BP_IS_HOLE(bp) DVA_IS_EMPTY(BP_IDENTITY(bp)) #define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg)) #define BP_ZERO(bp) \ { \ (bp)->blk_dva[0].dva_word[0] = 0; \ (bp)->blk_dva[0].dva_word[1] = 0; \ (bp)->blk_dva[1].dva_word[0] = 0; \ (bp)->blk_dva[1].dva_word[1] = 0; \ (bp)->blk_dva[2].dva_word[0] = 0; \ (bp)->blk_dva[2].dva_word[1] = 0; \ (bp)->blk_prop = 0; \ (bp)->blk_pad[0] = 0; \ (bp)->blk_pad[1] = 0; \ (bp)->blk_phys_birth = 0; \ (bp)->blk_birth = 0; \ (bp)->blk_fill = 0; \ ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ } #define BPE_NUM_WORDS 14 #define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t)) #define BPE_IS_PAYLOADWORD(bp, wp) \ ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) /* * Embedded checksum */ #define ZEC_MAGIC 0x210da7ab10c7a11ULL typedef struct zio_eck { uint64_t zec_magic; /* for validation, endianness */ zio_cksum_t zec_cksum; /* 256-bit checksum */ } zio_eck_t; /* * Gang block headers are self-checksumming and contain an array * of block pointers. */ #define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE #define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ sizeof (zio_eck_t)) / sizeof (blkptr_t)) #define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ sizeof (zio_eck_t) - \ (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ sizeof (uint64_t)) typedef struct zio_gbh { blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; uint64_t zg_filler[SPA_GBH_FILLER]; zio_eck_t zg_tail; } zio_gbh_phys_t; #define VDEV_RAIDZ_MAXPARITY 3 #define VDEV_PAD_SIZE (8 << 10) /* 2 padding areas (vl_pad1 and vl_pad2) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) #define VDEV_UBERBLOCK_SHIFT(vd) \ MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT) #define VDEV_UBERBLOCK_COUNT(vd) \ (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd)) #define VDEV_UBERBLOCK_OFFSET(vd, n) \ offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)]) #define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd)) typedef struct vdev_phys { char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; zio_eck_t vp_zbt; } vdev_phys_t; typedef struct vdev_label { char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ char vl_pad2[VDEV_PAD_SIZE]; /* 8K */ vdev_phys_t vl_vdev_phys; /* 112K */ char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ } vdev_label_t; /* 256K total */ /* * vdev_dirty() flags */ #define VDD_METASLAB 0x01 #define VDD_DTL 0x02 /* * Size and offset of embedded boot loader region on each label. * The total size of the first two labels plus the boot area is 4MB. */ #define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t)) #define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */ /* * Size of label regions at the start and end of each leaf device. */ #define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE) #define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t)) #define VDEV_LABELS 4 enum zio_checksum { ZIO_CHECKSUM_INHERIT = 0, ZIO_CHECKSUM_ON, ZIO_CHECKSUM_OFF, ZIO_CHECKSUM_LABEL, ZIO_CHECKSUM_GANG_HEADER, ZIO_CHECKSUM_ZILOG, ZIO_CHECKSUM_FLETCHER_2, ZIO_CHECKSUM_FLETCHER_4, ZIO_CHECKSUM_SHA256, ZIO_CHECKSUM_ZILOG2, ZIO_CHECKSUM_FUNCTIONS }; #define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4 #define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON enum zio_compress { ZIO_COMPRESS_INHERIT = 0, ZIO_COMPRESS_ON, ZIO_COMPRESS_OFF, ZIO_COMPRESS_LZJB, ZIO_COMPRESS_EMPTY, ZIO_COMPRESS_GZIP_1, ZIO_COMPRESS_GZIP_2, ZIO_COMPRESS_GZIP_3, ZIO_COMPRESS_GZIP_4, ZIO_COMPRESS_GZIP_5, ZIO_COMPRESS_GZIP_6, ZIO_COMPRESS_GZIP_7, ZIO_COMPRESS_GZIP_8, ZIO_COMPRESS_GZIP_9, ZIO_COMPRESS_ZLE, ZIO_COMPRESS_LZ4, ZIO_COMPRESS_FUNCTIONS }; #define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB #define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF /* nvlist pack encoding */ #define NV_ENCODE_NATIVE 0 #define NV_ENCODE_XDR 1 typedef enum { DATA_TYPE_UNKNOWN = 0, DATA_TYPE_BOOLEAN, DATA_TYPE_BYTE, DATA_TYPE_INT16, DATA_TYPE_UINT16, DATA_TYPE_INT32, DATA_TYPE_UINT32, DATA_TYPE_INT64, DATA_TYPE_UINT64, DATA_TYPE_STRING, DATA_TYPE_BYTE_ARRAY, DATA_TYPE_INT16_ARRAY, DATA_TYPE_UINT16_ARRAY, DATA_TYPE_INT32_ARRAY, DATA_TYPE_UINT32_ARRAY, DATA_TYPE_INT64_ARRAY, DATA_TYPE_UINT64_ARRAY, DATA_TYPE_STRING_ARRAY, DATA_TYPE_HRTIME, DATA_TYPE_NVLIST, DATA_TYPE_NVLIST_ARRAY, DATA_TYPE_BOOLEAN_VALUE, DATA_TYPE_INT8, DATA_TYPE_UINT8, DATA_TYPE_BOOLEAN_ARRAY, DATA_TYPE_INT8_ARRAY, DATA_TYPE_UINT8_ARRAY } data_type_t; /* * On-disk version number. */ #define SPA_VERSION_1 1ULL #define SPA_VERSION_2 2ULL #define SPA_VERSION_3 3ULL #define SPA_VERSION_4 4ULL #define SPA_VERSION_5 5ULL #define SPA_VERSION_6 6ULL #define SPA_VERSION_7 7ULL #define SPA_VERSION_8 8ULL #define SPA_VERSION_9 9ULL #define SPA_VERSION_10 10ULL #define SPA_VERSION_11 11ULL #define SPA_VERSION_12 12ULL #define SPA_VERSION_13 13ULL #define SPA_VERSION_14 14ULL #define SPA_VERSION_15 15ULL #define SPA_VERSION_16 16ULL #define SPA_VERSION_17 17ULL #define SPA_VERSION_18 18ULL #define SPA_VERSION_19 19ULL #define SPA_VERSION_20 20ULL #define SPA_VERSION_21 21ULL #define SPA_VERSION_22 22ULL #define SPA_VERSION_23 23ULL #define SPA_VERSION_24 24ULL #define SPA_VERSION_25 25ULL #define SPA_VERSION_26 26ULL #define SPA_VERSION_27 27ULL #define SPA_VERSION_28 28ULL #define SPA_VERSION_5000 5000ULL /* * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*}, * and do the appropriate changes. Also bump the version number in * usr/src/grub/capability. */ #define SPA_VERSION SPA_VERSION_5000 #define SPA_VERSION_STRING "5000" /* * Symbolic names for the changes that caused a SPA_VERSION switch. * Used in the code when checking for presence or absence of a feature. * Feel free to define multiple symbolic names for each version if there * were multiple changes to on-disk structures during that version. * * NOTE: When checking the current SPA_VERSION in your code, be sure * to use spa_version() since it reports the version of the * last synced uberblock. Checking the in-flight version can * be dangerous in some cases. */ #define SPA_VERSION_INITIAL SPA_VERSION_1 #define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2 #define SPA_VERSION_SPARES SPA_VERSION_3 #define SPA_VERSION_RAID6 SPA_VERSION_3 #define SPA_VERSION_BPLIST_ACCOUNT SPA_VERSION_3 #define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3 #define SPA_VERSION_DNODE_BYTES SPA_VERSION_3 #define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4 #define SPA_VERSION_GZIP_COMPRESSION SPA_VERSION_5 #define SPA_VERSION_BOOTFS SPA_VERSION_6 #define SPA_VERSION_SLOGS SPA_VERSION_7 #define SPA_VERSION_DELEGATED_PERMS SPA_VERSION_8 #define SPA_VERSION_FUID SPA_VERSION_9 #define SPA_VERSION_REFRESERVATION SPA_VERSION_9 #define SPA_VERSION_REFQUOTA SPA_VERSION_9 #define SPA_VERSION_UNIQUE_ACCURATE SPA_VERSION_9 #define SPA_VERSION_L2CACHE SPA_VERSION_10 #define SPA_VERSION_NEXT_CLONES SPA_VERSION_11 #define SPA_VERSION_ORIGIN SPA_VERSION_11 #define SPA_VERSION_DSL_SCRUB SPA_VERSION_11 #define SPA_VERSION_SNAP_PROPS SPA_VERSION_12 #define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13 #define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14 #define SPA_VERSION_USERSPACE SPA_VERSION_15 #define SPA_VERSION_STMF_PROP SPA_VERSION_16 #define SPA_VERSION_RAIDZ3 SPA_VERSION_17 #define SPA_VERSION_USERREFS SPA_VERSION_18 #define SPA_VERSION_HOLES SPA_VERSION_19 #define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20 #define SPA_VERSION_DEDUP SPA_VERSION_21 #define SPA_VERSION_RECVD_PROPS SPA_VERSION_22 #define SPA_VERSION_SLIM_ZIL SPA_VERSION_23 #define SPA_VERSION_SA SPA_VERSION_24 #define SPA_VERSION_SCAN SPA_VERSION_25 #define SPA_VERSION_DIR_CLONES SPA_VERSION_26 #define SPA_VERSION_DEADLISTS SPA_VERSION_26 #define SPA_VERSION_FAST_SNAP SPA_VERSION_27 #define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28 #define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28 #define SPA_VERSION_FEATURES SPA_VERSION_5000 #define SPA_VERSION_IS_SUPPORTED(v) \ (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \ ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION)) /* * The following are configuration names used in the nvlist describing a pool's * configuration. */ #define ZPOOL_CONFIG_VERSION "version" #define ZPOOL_CONFIG_POOL_NAME "name" #define ZPOOL_CONFIG_POOL_STATE "state" #define ZPOOL_CONFIG_POOL_TXG "txg" #define ZPOOL_CONFIG_POOL_GUID "pool_guid" #define ZPOOL_CONFIG_CREATE_TXG "create_txg" #define ZPOOL_CONFIG_TOP_GUID "top_guid" #define ZPOOL_CONFIG_VDEV_TREE "vdev_tree" #define ZPOOL_CONFIG_TYPE "type" #define ZPOOL_CONFIG_CHILDREN "children" #define ZPOOL_CONFIG_ID "id" #define ZPOOL_CONFIG_GUID "guid" #define ZPOOL_CONFIG_PATH "path" #define ZPOOL_CONFIG_DEVID "devid" #define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array" #define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift" #define ZPOOL_CONFIG_ASHIFT "ashift" #define ZPOOL_CONFIG_ASIZE "asize" #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_STATS "stats" #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" #define ZPOOL_CONFIG_SPARES "spares" #define ZPOOL_CONFIG_IS_SPARE "is_spare" #define ZPOOL_CONFIG_NPARITY "nparity" #define ZPOOL_CONFIG_HOSTID "hostid" #define ZPOOL_CONFIG_HOSTNAME "hostname" #define ZPOOL_CONFIG_IS_LOG "is_log" #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such * as offline and degraded. */ #define ZPOOL_CONFIG_OFFLINE "offline" #define ZPOOL_CONFIG_FAULTED "faulted" #define ZPOOL_CONFIG_DEGRADED "degraded" #define ZPOOL_CONFIG_REMOVED "removed" #define ZPOOL_CONFIG_FRU "fru" #define ZPOOL_CONFIG_AUX_STATE "aux_state" #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" #define VDEV_TYPE_REPLACING "replacing" #define VDEV_TYPE_RAIDZ "raidz" #define VDEV_TYPE_DISK "disk" #define VDEV_TYPE_FILE "file" #define VDEV_TYPE_MISSING "missing" #define VDEV_TYPE_HOLE "hole" #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_LOG "log" #define VDEV_TYPE_L2CACHE "l2cache" /* * This is needed in userland to report the minimum necessary device size. */ #define SPA_MINDEVSIZE (64ULL << 20) /* * The location of the pool configuration repository, shared between kernel and * userland. */ #define ZPOOL_CACHE "/boot/zfs/zpool.cache" /* * vdev states are ordered from least to most healthy. * A vdev that's CANT_OPEN or below is considered unusable. */ typedef enum vdev_state { VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */ VDEV_STATE_CLOSED, /* Not currently open */ VDEV_STATE_OFFLINE, /* Not allowed to open */ VDEV_STATE_REMOVED, /* Explicitly removed from system */ VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */ VDEV_STATE_FAULTED, /* External request to fault device */ VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */ VDEV_STATE_HEALTHY /* Presumed good */ } vdev_state_t; /* * vdev aux states. When a vdev is in the CANT_OPEN state, the aux field * of the vdev stats structure uses these constants to distinguish why. */ typedef enum vdev_aux { VDEV_AUX_NONE, /* no error */ VDEV_AUX_OPEN_FAILED, /* ldi_open_*() or vn_open() failed */ VDEV_AUX_CORRUPT_DATA, /* bad label or disk contents */ VDEV_AUX_NO_REPLICAS, /* insufficient number of replicas */ VDEV_AUX_BAD_GUID_SUM, /* vdev guid sum doesn't match */ VDEV_AUX_TOO_SMALL, /* vdev size is too small */ VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */ VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */ VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */ VDEV_AUX_SPARED /* hot spare used in another pool */ } vdev_aux_t; /* * pool state. The following states are written to disk as part of the normal * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE. The remaining states are * software abstractions used at various levels to communicate pool state. */ typedef enum pool_state { POOL_STATE_ACTIVE = 0, /* In active use */ POOL_STATE_EXPORTED, /* Explicitly exported */ POOL_STATE_DESTROYED, /* Explicitly destroyed */ POOL_STATE_SPARE, /* Reserved for hot spare use */ POOL_STATE_UNINITIALIZED, /* Internal spa_t state */ POOL_STATE_UNAVAIL, /* Internal libzfs state */ POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */ } pool_state_t; /* * The uberblock version is incremented whenever an incompatible on-disk * format change is made to the SPA, DMU, or ZAP. * * Note: the first two fields should never be moved. When a storage pool * is opened, the uberblock must be read off the disk before the version * can be checked. If the ub_version field is moved, we may not detect * version mismatch. If the ub_magic field is moved, applications that * expect the magic number in the first word won't work. */ #define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */ #define UBERBLOCK_SHIFT 10 /* up to 1K */ struct uberblock { uint64_t ub_magic; /* UBERBLOCK_MAGIC */ uint64_t ub_version; /* SPA_VERSION */ uint64_t ub_txg; /* txg of last sync */ uint64_t ub_guid_sum; /* sum of all vdev guids */ uint64_t ub_timestamp; /* UTC time of last sync */ blkptr_t ub_rootbp; /* MOS objset_phys_t */ }; /* * Flags. */ #define DNODE_MUST_BE_ALLOCATED 1 #define DNODE_MUST_BE_FREE 2 /* * Fixed constants. */ #define DNODE_SHIFT 9 /* 512 bytes */ #define DN_MIN_INDBLKSHIFT 12 /* 4k */ #define DN_MAX_INDBLKSHIFT 14 /* 16k */ #define DNODE_BLOCK_SHIFT 14 /* 16k */ #define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ #define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */ #define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */ /* * Derived constants. */ #define DNODE_SIZE (1 << DNODE_SHIFT) #define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT) #define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT)) #define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT) #define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT) #define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT) #define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT) /* The +2 here is a cheesy way to round up */ #define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \ (DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT))) #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) #define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) #define EPB(blkshift, typeshift) (1 << (blkshift - typeshift)) /* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */ #define DNODE_FLAG_USED_BYTES (1<<0) #define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1) /* Does dnode have a SA spill blkptr in bonus? */ #define DNODE_FLAG_SPILL_BLKPTR (1<<2) typedef struct dnode_phys { uint8_t dn_type; /* dmu_object_type_t */ uint8_t dn_indblkshift; /* ln2(indirect block size) */ uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */ uint8_t dn_nblkptr; /* length of dn_blkptr */ uint8_t dn_bonustype; /* type of data in bonus buffer */ uint8_t dn_checksum; /* ZIO_CHECKSUM type */ uint8_t dn_compress; /* ZIO_COMPRESS type */ uint8_t dn_flags; /* DNODE_FLAG_* */ uint16_t dn_datablkszsec; /* data block size in 512b sectors */ uint16_t dn_bonuslen; /* length of dn_bonus */ uint8_t dn_pad2[4]; /* accounting is protected by dn_dirty_mtx */ uint64_t dn_maxblkid; /* largest allocated block ID */ uint64_t dn_used; /* bytes (or sectors) of disk space */ uint64_t dn_pad3[4]; blkptr_t dn_blkptr[1]; uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)]; blkptr_t dn_spill; } dnode_phys_t; +typedef enum dmu_object_byteswap { + DMU_BSWAP_UINT8, + DMU_BSWAP_UINT16, + DMU_BSWAP_UINT32, + DMU_BSWAP_UINT64, + DMU_BSWAP_ZAP, + DMU_BSWAP_DNODE, + DMU_BSWAP_OBJSET, + DMU_BSWAP_ZNODE, + DMU_BSWAP_OLDACL, + DMU_BSWAP_ACL, + /* + * Allocating a new byteswap type number makes the on-disk format + * incompatible with any other format that uses the same number. + * + * Data can usually be structured to work with one of the + * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types. + */ + DMU_BSWAP_NUMFUNCS +} dmu_object_byteswap_t; + +#define DMU_OT_NEWTYPE 0x80 +#define DMU_OT_METADATA 0x40 +#define DMU_OT_BYTESWAP_MASK 0x3f + +/* + * Defines a uint8_t object type. Object types specify if the data + * in the object is metadata (boolean) and how to byteswap the data + * (dmu_object_byteswap_t). + */ +#define DMU_OT(byteswap, metadata) \ + (DMU_OT_NEWTYPE | \ + ((metadata) ? DMU_OT_METADATA : 0) | \ + ((byteswap) & DMU_OT_BYTESWAP_MASK)) + typedef enum dmu_object_type { DMU_OT_NONE, /* general: */ DMU_OT_OBJECT_DIRECTORY, /* ZAP */ DMU_OT_OBJECT_ARRAY, /* UINT64 */ DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ DMU_OT_BPLIST, /* UINT64 */ DMU_OT_BPLIST_HDR, /* UINT64 */ /* spa: */ DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ DMU_OT_SPACE_MAP, /* UINT64 */ /* zil: */ DMU_OT_INTENT_LOG, /* UINT64 */ /* dmu: */ DMU_OT_DNODE, /* DNODE */ DMU_OT_OBJSET, /* OBJSET */ /* dsl: */ DMU_OT_DSL_DIR, /* UINT64 */ DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */ DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */ DMU_OT_DSL_PROPS, /* ZAP */ DMU_OT_DSL_DATASET, /* UINT64 */ /* zpl: */ DMU_OT_ZNODE, /* ZNODE */ DMU_OT_OLDACL, /* Old ACL */ DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ DMU_OT_MASTER_NODE, /* ZAP */ DMU_OT_UNLINKED_SET, /* ZAP */ /* zvol: */ DMU_OT_ZVOL, /* UINT8 */ DMU_OT_ZVOL_PROP, /* ZAP */ /* other; for testing only! */ DMU_OT_PLAIN_OTHER, /* UINT8 */ DMU_OT_UINT64_OTHER, /* UINT64 */ DMU_OT_ZAP_OTHER, /* ZAP */ /* new object types: */ DMU_OT_ERROR_LOG, /* ZAP */ DMU_OT_SPA_HISTORY, /* UINT8 */ DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */ DMU_OT_POOL_PROPS, /* ZAP */ DMU_OT_DSL_PERMS, /* ZAP */ DMU_OT_ACL, /* ACL */ DMU_OT_SYSACL, /* SYSACL */ DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ DMU_OT_NEXT_CLONES, /* ZAP */ DMU_OT_SCAN_QUEUE, /* ZAP */ DMU_OT_USERGROUP_USED, /* ZAP */ DMU_OT_USERGROUP_QUOTA, /* ZAP */ DMU_OT_USERREFS, /* ZAP */ DMU_OT_DDT_ZAP, /* ZAP */ DMU_OT_DDT_STATS, /* ZAP */ DMU_OT_SA, /* System attr */ DMU_OT_SA_MASTER_NODE, /* ZAP */ DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ DMU_OT_SCAN_XLATE, /* ZAP */ DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */ - DMU_OT_NUMTYPES + DMU_OT_NUMTYPES, + + /* + * Names for valid types declared with DMU_OT(). + */ + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE) } dmu_object_type_t; typedef enum dmu_objset_type { DMU_OST_NONE, DMU_OST_META, DMU_OST_ZFS, DMU_OST_ZVOL, DMU_OST_OTHER, /* For testing only! */ DMU_OST_ANY, /* Be careful! */ DMU_OST_NUMTYPES } dmu_objset_type_t; /* * header for all bonus and spill buffers. * The header has a fixed portion with a variable number * of "lengths" depending on the number of variable sized * attribues which are determined by the "layout number" */ #define SA_MAGIC 0x2F505A /* ZFS SA */ typedef struct sa_hdr_phys { uint32_t sa_magic; uint16_t sa_layout_info; /* Encoded with hdrsize and layout number */ uint16_t sa_lengths[1]; /* optional sizes for variable length attrs */ /* ... Data follows the lengths. */ } sa_hdr_phys_t; /* * sa_hdr_phys -> sa_layout_info * * 16 10 0 * +--------+-------+ * | hdrsz |layout | * +--------+-------+ * * Bits 0-10 are the layout number * Bits 11-16 are the size of the header. * The hdrsize is the number * 8 * * For example. * hdrsz of 1 ==> 8 byte header * 2 ==> 16 byte header * */ #define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10) #define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0) #define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \ { \ BF32_SET_SB(x, 10, 6, 3, 0, size); \ BF32_SET(x, 0, 10, num); \ } #define SA_MODE_OFFSET 0 #define SA_SIZE_OFFSET 8 #define SA_GEN_OFFSET 16 #define SA_UID_OFFSET 24 #define SA_GID_OFFSET 32 #define SA_PARENT_OFFSET 40 /* * Intent log header - this on disk structure holds fields to manage * the log. All fields are 64 bit to easily handle cross architectures. */ typedef struct zil_header { uint64_t zh_claim_txg; /* txg in which log blocks were claimed */ uint64_t zh_replay_seq; /* highest replayed sequence number */ blkptr_t zh_log; /* log chain */ uint64_t zh_claim_seq; /* highest claimed sequence number */ uint64_t zh_pad[5]; } zil_header_t; #define OBJSET_PHYS_SIZE 2048 typedef struct objset_phys { dnode_phys_t os_meta_dnode; zil_header_t os_zil_header; uint64_t os_type; uint64_t os_flags; char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 - sizeof (zil_header_t) - sizeof (uint64_t)*2]; dnode_phys_t os_userused_dnode; dnode_phys_t os_groupused_dnode; } objset_phys_t; typedef struct dsl_dir_phys { uint64_t dd_creation_time; /* not actually used */ uint64_t dd_head_dataset_obj; uint64_t dd_parent_obj; uint64_t dd_clone_parent_obj; uint64_t dd_child_dir_zapobj; /* * how much space our children are accounting for; for leaf * datasets, == physical space used by fs + snaps */ uint64_t dd_used_bytes; uint64_t dd_compressed_bytes; uint64_t dd_uncompressed_bytes; /* Administrative quota setting */ uint64_t dd_quota; /* Administrative reservation setting */ uint64_t dd_reserved; uint64_t dd_props_zapobj; uint64_t dd_pad[21]; /* pad out to 256 bytes for good measure */ } dsl_dir_phys_t; typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; uint64_t ds_prev_snap_obj; uint64_t ds_prev_snap_txg; uint64_t ds_next_snap_obj; uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */ uint64_t ds_num_children; /* clone/snap children; ==0 for head */ uint64_t ds_creation_time; /* seconds since 1970 */ uint64_t ds_creation_txg; uint64_t ds_deadlist_obj; uint64_t ds_used_bytes; uint64_t ds_compressed_bytes; uint64_t ds_uncompressed_bytes; uint64_t ds_unique_bytes; /* only relevant to snapshots */ /* * The ds_fsid_guid is a 56-bit ID that can change to avoid * collisions. The ds_guid is a 64-bit ID that will never * change, so there is a small probability that it will collide. */ uint64_t ds_fsid_guid; uint64_t ds_guid; uint64_t ds_flags; blkptr_t ds_bp; uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */ } dsl_dataset_phys_t; /* * The names of zap entries in the DIRECTORY_OBJECT of the MOS. */ #define DMU_POOL_DIRECTORY_OBJECT 1 #define DMU_POOL_CONFIG "config" +#define DMU_POOL_FEATURES_FOR_READ "features_for_read" #define DMU_POOL_ROOT_DATASET "root_dataset" #define DMU_POOL_SYNC_BPLIST "sync_bplist" #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" #define DMU_POOL_ERRLOG_LAST "errlog_last" #define DMU_POOL_SPARES "spares" #define DMU_POOL_DEFLATE "deflate" #define DMU_POOL_HISTORY "history" #define DMU_POOL_PROPS "pool_props" #define ZAP_MAGIC 0x2F52AB2ABULL #define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_block_shift) #define ZAP_MAXCD (uint32_t)(-1) #define ZAP_HASHBITS 28 #define MZAP_ENT_LEN 64 #define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) #define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT #define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT) typedef struct mzap_ent_phys { uint64_t mze_value; uint32_t mze_cd; uint16_t mze_pad; /* in case we want to chain them someday */ char mze_name[MZAP_NAME_LEN]; } mzap_ent_phys_t; typedef struct mzap_phys { uint64_t mz_block_type; /* ZBT_MICRO */ uint64_t mz_salt; uint64_t mz_pad[6]; mzap_ent_phys_t mz_chunk[1]; /* actually variable size depending on block size */ } mzap_phys_t; /* * The (fat) zap is stored in one object. It is an array of * 1<= 6] [zap_leaf_t] [ptrtbl] ... * */ #define ZBT_LEAF ((1ULL << 63) + 0) #define ZBT_HEADER ((1ULL << 63) + 1) #define ZBT_MICRO ((1ULL << 63) + 3) /* any other values are ptrtbl blocks */ /* * the embedded pointer table takes up half a block: * block size / entry size (2^3) / 2 */ #define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1) /* * The embedded pointer table starts half-way through the block. Since * the pointer table itself is half the block, it starts at (64-bit) * word number (1<zap_phys) \ [(idx) + (1<l_bs) - hash entry size (2) * number of hash * entries - header space (2*chunksize) */ #define ZAP_LEAF_NUMCHUNKS(l) \ (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \ ZAP_LEAF_CHUNKSIZE - 2) /* * The amount of space within the chunk available for the array is: * chunk size - space for type (1) - space for next pointer (2) */ #define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3) #define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \ (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES) /* * Low water mark: when there are only this many chunks free, start * growing the ptrtbl. Ideally, this should be larger than a * "reasonably-sized" entry. 20 chunks is more than enough for the * largest directory entry (MAXNAMELEN (256) byte name, 8-byte value), * while still being only around 3% for 16k blocks. */ #define ZAP_LEAF_LOW_WATER (20) /* * The leaf hash table has block size / 2^5 (32) number of entries, * which should be more than enough for the maximum number of entries, * which is less than block size / CHUNKSIZE (24) / minimum number of * chunks per entry (3). */ #define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5) #define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l)) /* * The chunks start immediately after the hash table. The end of the * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a * chunk_t. */ #define ZAP_LEAF_CHUNK(l, idx) \ ((zap_leaf_chunk_t *) \ ((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx] #define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry) typedef enum zap_chunk_type { ZAP_CHUNK_FREE = 253, ZAP_CHUNK_ENTRY = 252, ZAP_CHUNK_ARRAY = 251, ZAP_CHUNK_TYPE_MAX = 250 } zap_chunk_type_t; /* * TAKE NOTE: * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified. */ typedef struct zap_leaf_phys { struct zap_leaf_header { uint64_t lh_block_type; /* ZBT_LEAF */ uint64_t lh_pad1; uint64_t lh_prefix; /* hash prefix of this leaf */ uint32_t lh_magic; /* ZAP_LEAF_MAGIC */ uint16_t lh_nfree; /* number free chunks */ uint16_t lh_nentries; /* number of entries */ uint16_t lh_prefix_len; /* num bits used to id this */ /* above is accessable to zap, below is zap_leaf private */ uint16_t lh_freelist; /* chunk head of free list */ uint8_t lh_pad2[12]; } l_hdr; /* 2 24-byte chunks */ /* * The header is followed by a hash table with * ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is * followed by an array of ZAP_LEAF_NUMCHUNKS(zap) * zap_leaf_chunk structures. These structures are accessed * with the ZAP_LEAF_CHUNK() macro. */ uint16_t l_hash[1]; } zap_leaf_phys_t; typedef union zap_leaf_chunk { struct zap_leaf_entry { uint8_t le_type; /* always ZAP_CHUNK_ENTRY */ uint8_t le_value_intlen; /* size of ints */ uint16_t le_next; /* next entry in hash chain */ uint16_t le_name_chunk; /* first chunk of the name */ uint16_t le_name_numints; /* bytes in name, incl null */ uint16_t le_value_chunk; /* first chunk of the value */ uint16_t le_value_numints; /* value length in ints */ uint32_t le_cd; /* collision differentiator */ uint64_t le_hash; /* hash value of the name */ } l_entry; struct zap_leaf_array { uint8_t la_type; /* always ZAP_CHUNK_ARRAY */ uint8_t la_array[ZAP_LEAF_ARRAY_BYTES]; uint16_t la_next; /* next blk or CHAIN_END */ } l_array; struct zap_leaf_free { uint8_t lf_type; /* always ZAP_CHUNK_FREE */ uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES]; uint16_t lf_next; /* next in free list, or CHAIN_END */ } l_free; } zap_leaf_chunk_t; typedef struct zap_leaf { int l_bs; /* block size shift */ zap_leaf_phys_t *l_phys; } zap_leaf_t; /* * Define special zfs pflags */ #define ZFS_XATTR 0x1 /* is an extended attribute */ #define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */ #define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */ #define MASTER_NODE_OBJ 1 /* * special attributes for master node. */ #define ZFS_FSID "FSID" #define ZFS_UNLINKED_SET "DELETE_QUEUE" #define ZFS_ROOT_OBJ "ROOT" #define ZPL_VERSION_OBJ "VERSION" #define ZFS_PROP_BLOCKPERPAGE "BLOCKPERPAGE" #define ZFS_PROP_NOGROWBLOCKS "NOGROWBLOCKS" #define ZFS_FLAG_BLOCKPERPAGE 0x1 #define ZFS_FLAG_NOGROWBLOCKS 0x2 /* * ZPL version - rev'd whenever an incompatible on-disk format change * occurs. Independent of SPA/DMU/ZAP versioning. */ #define ZPL_VERSION 1ULL /* * The directory entry has the type (currently unused on Solaris) in the * top 4 bits, and the object number in the low 48 bits. The "middle" * 12 bits are unused. */ #define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4) #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) #define ZFS_DIRENT_MAKE(type, obj) (((uint64_t)type << 60) | obj) typedef struct ace { uid_t a_who; /* uid or gid */ uint32_t a_access_mask; /* read,write,... */ uint16_t a_flags; /* see below */ uint16_t a_type; /* allow or deny */ } ace_t; #define ACE_SLOT_CNT 6 typedef struct zfs_znode_acl { uint64_t z_acl_extern_obj; /* ext acl pieces */ uint32_t z_acl_count; /* Number of ACEs */ uint16_t z_acl_version; /* acl version */ uint16_t z_acl_pad; /* pad */ ace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */ } zfs_znode_acl_t; /* * This is the persistent portion of the znode. It is stored * in the "bonus buffer" of the file. Short symbolic links * are also stored in the bonus buffer. */ typedef struct znode_phys { uint64_t zp_atime[2]; /* 0 - last file access time */ uint64_t zp_mtime[2]; /* 16 - last file modification time */ uint64_t zp_ctime[2]; /* 32 - last file change time */ uint64_t zp_crtime[2]; /* 48 - creation time */ uint64_t zp_gen; /* 64 - generation (txg of creation) */ uint64_t zp_mode; /* 72 - file mode bits */ uint64_t zp_size; /* 80 - size of file */ uint64_t zp_parent; /* 88 - directory parent (`..') */ uint64_t zp_links; /* 96 - number of links to file */ uint64_t zp_xattr; /* 104 - DMU object for xattrs */ uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */ uint64_t zp_flags; /* 120 - persistent flags */ uint64_t zp_uid; /* 128 - file owner */ uint64_t zp_gid; /* 136 - owning group */ uint64_t zp_pad[4]; /* 144 - future */ zfs_znode_acl_t zp_acl; /* 176 - 263 ACL */ /* * Data may pad out any remaining bytes in the znode buffer, eg: * * |<---------------------- dnode_phys (512) ------------------------>| * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->| * |<---- znode (264) ---->|<---- data (56) ---->| * * At present, we only use this space to store symbolic links. */ } znode_phys_t; /* * In-core vdev representation. */ struct vdev; typedef int vdev_phys_read_t(struct vdev *vdev, void *priv, off_t offset, void *buf, size_t bytes); typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes); typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; typedef struct vdev { STAILQ_ENTRY(vdev) v_childlink; /* link in parent's child list */ STAILQ_ENTRY(vdev) v_alllink; /* link in global vdev list */ vdev_list_t v_children; /* children of this vdev */ const char *v_name; /* vdev name */ uint64_t v_guid; /* vdev guid */ int v_id; /* index in parent */ int v_ashift; /* offset to block shift */ int v_nparity; /* # parity for raidz */ struct vdev *v_top; /* parent vdev */ int v_nchildren; /* # children */ vdev_state_t v_state; /* current state */ vdev_phys_read_t *v_phys_read; /* read from raw leaf vdev */ vdev_read_t *v_read; /* read from vdev */ void *v_read_priv; /* private data for read function */ } vdev_t; /* * In-core pool representation. */ typedef STAILQ_HEAD(spa_list, spa) spa_list_t; typedef struct spa { STAILQ_ENTRY(spa) spa_link; /* link in global pool list */ char *spa_name; /* pool name */ uint64_t spa_guid; /* pool guid */ uint64_t spa_txg; /* most recent transaction */ struct uberblock spa_uberblock; /* best uberblock so far */ vdev_list_t spa_vdevs; /* list of all toplevel vdevs */ objset_phys_t spa_mos; /* MOS for this pool */ int spa_inited; /* initialized */ } spa_t; static void decode_embedded_bp_compressed(const blkptr_t *, void *); Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c (revision 303642) @@ -1,2755 +1,2763 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2010 The FreeBSD Foundation * * $FreeBSD$ */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ #include #include #include #include #include #include #ifdef illumos #include #endif #include #include #include #ifdef illumos #include #endif #include #include #include #include #include #include #include #include #ifdef illumos #include #endif #include #include #ifndef illumos #include #include #include +#include #include #include #include #include #include #include #include #include #endif /* * User-Land Trap-Based Tracing * ---------------------------- * * The fasttrap provider allows DTrace consumers to instrument any user-level * instruction to gather data; this includes probes with semantic * signifigance like entry and return as well as simple offsets into the * function. While the specific techniques used are very ISA specific, the * methodology is generalizable to any architecture. * * * The General Methodology * ----------------------- * * With the primary goal of tracing every user-land instruction and the * limitation that we can't trust user space so don't want to rely on much * information there, we begin by replacing the instructions we want to trace * with trap instructions. Each instruction we overwrite is saved into a hash * table keyed by process ID and pc address. When we enter the kernel due to * this trap instruction, we need the effects of the replaced instruction to * appear to have occurred before we proceed with the user thread's * execution. * * Each user level thread is represented by a ulwp_t structure which is * always easily accessible through a register. The most basic way to produce * the effects of the instruction we replaced is to copy that instruction out * to a bit of scratch space reserved in the user thread's ulwp_t structure * (a sort of kernel-private thread local storage), set the PC to that * scratch space and single step. When we reenter the kernel after single * stepping the instruction we must then adjust the PC to point to what would * normally be the next instruction. Of course, special care must be taken * for branches and jumps, but these represent such a small fraction of any * instruction set that writing the code to emulate these in the kernel is * not too difficult. * * Return probes may require several tracepoints to trace every return site, * and, conversely, each tracepoint may activate several probes (the entry * and offset 0 probes, for example). To solve this muliplexing problem, * tracepoints contain lists of probes to activate and probes contain lists * of tracepoints to enable. If a probe is activated, it adds its ID to * existing tracepoints or creates new ones as necessary. * * Most probes are activated _before_ the instruction is executed, but return * probes are activated _after_ the effects of the last instruction of the * function are visible. Return probes must be fired _after_ we have * single-stepped the instruction whereas all other probes are fired * beforehand. * * * Lock Ordering * ------------- * * The lock ordering below -- both internally and with respect to the DTrace * framework -- is a little tricky and bears some explanation. Each provider * has a lock (ftp_mtx) that protects its members including reference counts * for enabled probes (ftp_rcount), consumers actively creating probes * (ftp_ccount) and USDT consumers (ftp_mcount); all three prevent a provider * from being freed. A provider is looked up by taking the bucket lock for the * provider hash table, and is returned with its lock held. The provider lock * may be taken in functions invoked by the DTrace framework, but may not be * held while calling functions in the DTrace framework. * * To ensure consistency over multiple calls to the DTrace framework, the * creation lock (ftp_cmtx) should be held. Naturally, the creation lock may * not be taken when holding the provider lock as that would create a cyclic * lock ordering. In situations where one would naturally take the provider * lock and then the creation lock, we instead up a reference count to prevent * the provider from disappearing, drop the provider lock, and acquire the * creation lock. * * Briefly: * bucket lock before provider lock * DTrace before provider lock * creation lock before DTrace * never hold the provider lock and creation lock simultaneously */ static d_open_t fasttrap_open; static d_ioctl_t fasttrap_ioctl; static struct cdevsw fasttrap_cdevsw = { .d_version = D_VERSION, .d_open = fasttrap_open, .d_ioctl = fasttrap_ioctl, .d_name = "fasttrap", }; static struct cdev *fasttrap_cdev; static dtrace_meta_provider_id_t fasttrap_meta_id; static struct proc *fasttrap_cleanup_proc; static struct mtx fasttrap_cleanup_mtx; static uint_t fasttrap_cleanup_work, fasttrap_cleanup_drain, fasttrap_cleanup_cv; /* * Generation count on modifications to the global tracepoint lookup table. */ static volatile uint64_t fasttrap_mod_gen; /* * When the fasttrap provider is loaded, fasttrap_max is set to either * FASTTRAP_MAX_DEFAULT, or the value for fasttrap-max-probes in the * fasttrap.conf file (Illumos), or the value provied in the loader.conf (FreeBSD). * Each time a probe is created, fasttrap_total is incremented by the number * of tracepoints that may be associated with that probe; fasttrap_total is capped * at fasttrap_max. */ #define FASTTRAP_MAX_DEFAULT 250000 static uint32_t fasttrap_max = FASTTRAP_MAX_DEFAULT; static uint32_t fasttrap_total; /* * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ #define FASTTRAP_TPOINTS_DEFAULT_SIZE 0x4000 #define FASTTRAP_PROVIDERS_DEFAULT_SIZE 0x100 #define FASTTRAP_PROCS_DEFAULT_SIZE 0x100 #define FASTTRAP_PID_NAME "pid" fasttrap_hash_t fasttrap_tpoints; static fasttrap_hash_t fasttrap_provs; static fasttrap_hash_t fasttrap_procs; static uint64_t fasttrap_pid_count; /* pid ref count */ static kmutex_t fasttrap_count_mtx; /* lock on ref count */ #define FASTTRAP_ENABLE_FAIL 1 #define FASTTRAP_ENABLE_PARTIAL 2 static int fasttrap_tracepoint_enable(proc_t *, fasttrap_probe_t *, uint_t); static void fasttrap_tracepoint_disable(proc_t *, fasttrap_probe_t *, uint_t); static fasttrap_provider_t *fasttrap_provider_lookup(pid_t, const char *, const dtrace_pattr_t *); static void fasttrap_provider_retire(pid_t, const char *, int); static void fasttrap_provider_free(fasttrap_provider_t *); static fasttrap_proc_t *fasttrap_proc_lookup(pid_t); static void fasttrap_proc_release(fasttrap_proc_t *); #ifndef illumos static void fasttrap_thread_dtor(void *, struct thread *); #endif #define FASTTRAP_PROVS_INDEX(pid, name) \ ((fasttrap_hash_str(name) + (pid)) & fasttrap_provs.fth_mask) #define FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask) #ifndef illumos struct rmlock fasttrap_tp_lock; static eventhandler_tag fasttrap_thread_dtor_tag; #endif static unsigned long tpoints_hash_size = FASTTRAP_TPOINTS_DEFAULT_SIZE; #ifdef __FreeBSD__ SYSCTL_DECL(_kern_dtrace); SYSCTL_NODE(_kern_dtrace, OID_AUTO, fasttrap, CTLFLAG_RD, 0, "DTrace fasttrap parameters"); SYSCTL_UINT(_kern_dtrace_fasttrap, OID_AUTO, max_probes, CTLFLAG_RWTUN, &fasttrap_max, FASTTRAP_MAX_DEFAULT, "Maximum number of fasttrap probes"); SYSCTL_ULONG(_kern_dtrace_fasttrap, OID_AUTO, tpoints_hash_size, CTLFLAG_RDTUN, &tpoints_hash_size, FASTTRAP_TPOINTS_DEFAULT_SIZE, "Size of the tracepoint hash table"); #endif static int fasttrap_highbit(ulong_t i) { int h = 1; if (i == 0) return (0); #ifdef _LP64 if (i & 0xffffffff00000000ul) { h += 32; i >>= 32; } #endif if (i & 0xffff0000) { h += 16; i >>= 16; } if (i & 0xff00) { h += 8; i >>= 8; } if (i & 0xf0) { h += 4; i >>= 4; } if (i & 0xc) { h += 2; i >>= 2; } if (i & 0x2) { h += 1; } return (h); } static uint_t fasttrap_hash_str(const char *p) { unsigned int g; uint_t hval = 0; while (*p) { hval = (hval << 4) + *p++; if ((g = (hval & 0xf0000000)) != 0) hval ^= g >> 24; hval &= ~g; } return (hval); } void fasttrap_sigtrap(proc_t *p, kthread_t *t, uintptr_t pc) { #ifdef illumos sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); sqp->sq_info.si_signo = SIGTRAP; sqp->sq_info.si_code = TRAP_DTRACE; sqp->sq_info.si_addr = (caddr_t)pc; mutex_enter(&p->p_lock); sigaddqa(p, t, sqp); mutex_exit(&p->p_lock); if (t != NULL) aston(t); #else ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); ksiginfo_init(ksi); ksi->ksi_signo = SIGTRAP; ksi->ksi_code = TRAP_DTRACE; ksi->ksi_addr = (caddr_t)pc; PROC_LOCK(p); (void) tdsendsignal(p, t, SIGTRAP, ksi); PROC_UNLOCK(p); #endif } #ifndef illumos /* * Obtain a chunk of scratch space in the address space of the target process. */ fasttrap_scrspace_t * fasttrap_scraddr(struct thread *td, fasttrap_proc_t *fprc) { fasttrap_scrblock_t *scrblk; fasttrap_scrspace_t *scrspc; struct proc *p; vm_offset_t addr; int error, i; scrspc = NULL; if (td->t_dtrace_sscr != NULL) { /* If the thread already has scratch space, we're done. */ scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr; return (scrspc); } p = td->td_proc; mutex_enter(&fprc->ftpc_mtx); if (LIST_EMPTY(&fprc->ftpc_fscr)) { /* * No scratch space is available, so we'll map a new scratch * space block into the traced process' address space. */ addr = 0; error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, FASTTRAP_SCRBLOCK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) goto done; scrblk = malloc(sizeof(*scrblk), M_SOLARIS, M_WAITOK); scrblk->ftsb_addr = addr; LIST_INSERT_HEAD(&fprc->ftpc_scrblks, scrblk, ftsb_next); /* * Carve the block up into chunks and put them on the free list. */ for (i = 0; i < FASTTRAP_SCRBLOCK_SIZE / FASTTRAP_SCRSPACE_SIZE; i++) { scrspc = malloc(sizeof(*scrspc), M_SOLARIS, M_WAITOK); scrspc->ftss_addr = addr + i * FASTTRAP_SCRSPACE_SIZE; LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next); } } /* * Take the first scratch chunk off the free list, put it on the * allocated list, and return its address. */ scrspc = LIST_FIRST(&fprc->ftpc_fscr); LIST_REMOVE(scrspc, ftss_next); LIST_INSERT_HEAD(&fprc->ftpc_ascr, scrspc, ftss_next); /* * This scratch space is reserved for use by td until the thread exits. */ td->t_dtrace_sscr = scrspc; done: mutex_exit(&fprc->ftpc_mtx); return (scrspc); } /* * Return any allocated per-thread scratch space chunks back to the process' * free list. */ static void fasttrap_thread_dtor(void *arg __unused, struct thread *td) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc; fasttrap_scrspace_t *scrspc; pid_t pid; if (td->t_dtrace_sscr == NULL) return; pid = td->td_proc->p_pid; bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; fprc = NULL; /* Look up the fasttrap process handle for this process. */ mutex_enter(&bucket->ftb_mtx); for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); break; } } if (fprc == NULL) { mutex_exit(&bucket->ftb_mtx); return; } scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr; LIST_REMOVE(scrspc, ftss_next); LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next); mutex_exit(&fprc->ftpc_mtx); } #endif /* * This function ensures that no threads are actively using the memory * associated with probes that were formerly live. */ static void fasttrap_mod_barrier(uint64_t gen) { int i; if (gen < fasttrap_mod_gen) return; fasttrap_mod_gen++; #ifdef illumos CPU_FOREACH(i) { mutex_enter(&fasttrap_cpuc_pid_lock[i]); mutex_exit(&fasttrap_cpuc_pid_lock[i]); } #else rm_wlock(&fasttrap_tp_lock); rm_wunlock(&fasttrap_tp_lock); #endif } /* * This function performs asynchronous cleanup of fasttrap providers. The * Solaris implementation of this mechanism use a timeout that's activated in * fasttrap_pid_cleanup(), but this doesn't work in FreeBSD: one may sleep while * holding the DTrace mutexes, but it is unsafe to sleep in a callout handler. * Thus we use a dedicated process to perform the cleanup when requested. */ /*ARGSUSED*/ static void fasttrap_pid_cleanup_cb(void *data) { fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; int i, later = 0, rval; mtx_lock(&fasttrap_cleanup_mtx); while (!fasttrap_cleanup_drain || later > 0) { fasttrap_cleanup_work = 0; mtx_unlock(&fasttrap_cleanup_mtx); later = 0; /* * Iterate over all the providers trying to remove the marked * ones. If a provider is marked but not retired, we just * have to take a crack at removing it -- it's no big deal if * we can't. */ for (i = 0; i < fasttrap_provs.fth_nent; i++) { bucket = &fasttrap_provs.fth_table[i]; mutex_enter(&bucket->ftb_mtx); fpp = (fasttrap_provider_t **)&bucket->ftb_data; while ((fp = *fpp) != NULL) { if (!fp->ftp_marked) { fpp = &fp->ftp_next; continue; } mutex_enter(&fp->ftp_mtx); /* * If this provider has consumers actively * creating probes (ftp_ccount) or is a USDT * provider (ftp_mcount), we can't unregister * or even condense. */ if (fp->ftp_ccount != 0 || fp->ftp_mcount != 0) { mutex_exit(&fp->ftp_mtx); fp->ftp_marked = 0; continue; } if (!fp->ftp_retired || fp->ftp_rcount != 0) fp->ftp_marked = 0; mutex_exit(&fp->ftp_mtx); /* * If we successfully unregister this * provider we can remove it from the hash * chain and free the memory. If our attempt * to unregister fails and this is a retired * provider, increment our flag to try again * pretty soon. If we've consumed more than * half of our total permitted number of * probes call dtrace_condense() to try to * clean out the unenabled probes. */ provid = fp->ftp_provid; if ((rval = dtrace_unregister(provid)) != 0) { if (fasttrap_total > fasttrap_max / 2) (void) dtrace_condense(provid); if (rval == EAGAIN) fp->ftp_marked = 1; later += fp->ftp_marked; fpp = &fp->ftp_next; } else { *fpp = fp->ftp_next; fasttrap_provider_free(fp); } } mutex_exit(&bucket->ftb_mtx); } mtx_lock(&fasttrap_cleanup_mtx); /* * If we were unable to retire a provider, try again after a * second. This situation can occur in certain circumstances * where providers cannot be unregistered even though they have * no probes enabled because of an execution of dtrace -l or * something similar. */ if (later > 0 || fasttrap_cleanup_work || fasttrap_cleanup_drain) { mtx_unlock(&fasttrap_cleanup_mtx); pause("ftclean", hz); mtx_lock(&fasttrap_cleanup_mtx); } else mtx_sleep(&fasttrap_cleanup_cv, &fasttrap_cleanup_mtx, 0, "ftcl", 0); } /* * Wake up the thread in fasttrap_unload() now that we're done. */ wakeup(&fasttrap_cleanup_drain); mtx_unlock(&fasttrap_cleanup_mtx); kthread_exit(); } /* * Activates the asynchronous cleanup mechanism. */ static void fasttrap_pid_cleanup(void) { mtx_lock(&fasttrap_cleanup_mtx); if (!fasttrap_cleanup_work) { fasttrap_cleanup_work = 1; wakeup(&fasttrap_cleanup_cv); } mtx_unlock(&fasttrap_cleanup_mtx); } /* * This is called from cfork() via dtrace_fasttrap_fork(). The child * process's address space is (roughly) a copy of the parent process's so * we have to remove all the instrumentation we had previously enabled in the * parent. */ static void fasttrap_fork(proc_t *p, proc_t *cp) { #ifndef illumos fasttrap_scrblock_t *scrblk; fasttrap_proc_t *fprc = NULL; #endif pid_t ppid = p->p_pid; int i; #ifdef illumos ASSERT(curproc == p); ASSERT(p->p_proc_flag & P_PR_LOCK); #else PROC_LOCK_ASSERT(p, MA_OWNED); #endif #ifdef illumos ASSERT(p->p_dtrace_count > 0); #else if (p->p_dtrace_helpers) { /* * dtrace_helpers_duplicate() allocates memory. */ _PHOLD(cp); PROC_UNLOCK(p); PROC_UNLOCK(cp); dtrace_helpers_duplicate(p, cp); PROC_LOCK(cp); PROC_LOCK(p); _PRELE(cp); } /* * This check is purposely here instead of in kern_fork.c because, * for legal resons, we cannot include the dtrace_cddl.h header * inside kern_fork.c and insert if-clause there. */ if (p->p_dtrace_count == 0) return; #endif ASSERT(cp->p_dtrace_count == 0); /* * This would be simpler and faster if we maintained per-process * hash tables of enabled tracepoints. It could, however, potentially * slow down execution of a tracepoint since we'd need to go * through two levels of indirection. In the future, we should * consider either maintaining per-process ancillary lists of * enabled tracepoints or hanging a pointer to a per-process hash * table of enabled tracepoints off the proc structure. */ /* * We don't have to worry about the child process disappearing * because we're in fork(). */ #ifdef illumos mtx_lock_spin(&cp->p_slock); sprlock_proc(cp); mtx_unlock_spin(&cp->p_slock); #else /* * fasttrap_tracepoint_remove() expects the child process to be * unlocked and the VM then expects curproc to be unlocked. */ _PHOLD(cp); PROC_UNLOCK(cp); PROC_UNLOCK(p); #endif /* * Iterate over every tracepoint looking for ones that belong to the * parent process, and remove each from the child process. */ for (i = 0; i < fasttrap_tpoints.fth_nent; i++) { fasttrap_tracepoint_t *tp; fasttrap_bucket_t *bucket = &fasttrap_tpoints.fth_table[i]; mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (tp->ftt_pid == ppid && tp->ftt_proc->ftpc_acount != 0) { int ret = fasttrap_tracepoint_remove(cp, tp); ASSERT(ret == 0); /* * The count of active providers can only be * decremented (i.e. to zero) during exec, * exit, and removal of a meta provider so it * should be impossible to drop the count * mid-fork. */ ASSERT(tp->ftt_proc->ftpc_acount != 0); #ifndef illumos fprc = tp->ftt_proc; #endif } } mutex_exit(&bucket->ftb_mtx); #ifndef illumos /* * Unmap any scratch space inherited from the parent's address * space. */ if (fprc != NULL) { mutex_enter(&fprc->ftpc_mtx); LIST_FOREACH(scrblk, &fprc->ftpc_scrblks, ftsb_next) { vm_map_remove(&cp->p_vmspace->vm_map, scrblk->ftsb_addr, scrblk->ftsb_addr + FASTTRAP_SCRBLOCK_SIZE); } mutex_exit(&fprc->ftpc_mtx); } #endif } #ifdef illumos mutex_enter(&cp->p_lock); sprunlock(cp); #else PROC_LOCK(p); PROC_LOCK(cp); _PRELE(cp); #endif } /* * This is called from proc_exit() or from exec_common() if p_dtrace_probes * is set on the proc structure to indicate that there is a pid provider * associated with this process. */ static void fasttrap_exec_exit(proc_t *p) { #ifndef illumos struct thread *td; #endif #ifdef illumos ASSERT(p == curproc); #else PROC_LOCK_ASSERT(p, MA_OWNED); _PHOLD(p); /* * Since struct threads may be recycled, we cannot rely on t_dtrace_sscr * fields to be zeroed by kdtrace_thread_ctor. Thus we must zero it * ourselves when a process exits. */ FOREACH_THREAD_IN_PROC(p, td) td->t_dtrace_sscr = NULL; PROC_UNLOCK(p); #endif /* * We clean up the pid provider for this process here; user-land * static probes are handled by the meta-provider remove entry point. */ fasttrap_provider_retire(p->p_pid, FASTTRAP_PID_NAME, 0); #ifndef illumos if (p->p_dtrace_helpers) dtrace_helpers_destroy(p); PROC_LOCK(p); _PRELE(p); #endif } /*ARGSUSED*/ static void fasttrap_pid_provide(void *arg, dtrace_probedesc_t *desc) { /* * There are no "default" pid probes. */ } static int fasttrap_tracepoint_enable(proc_t *p, fasttrap_probe_t *probe, uint_t index) { fasttrap_tracepoint_t *tp, *new_tp = NULL; fasttrap_bucket_t *bucket; fasttrap_id_t *id; pid_t pid; uintptr_t pc; ASSERT(index < probe->ftp_ntps); pid = probe->ftp_pid; pc = probe->ftp_tps[index].fit_tp->ftt_pc; id = &probe->ftp_tps[index].fit_id; ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); #ifdef illumos ASSERT(!(p->p_flag & SVFORK)); #endif /* * Before we make any modifications, make sure we've imposed a barrier * on the generation in which this probe was last modified. */ fasttrap_mod_barrier(probe->ftp_gen); bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; /* * If the tracepoint has already been enabled, just add our id to the * list of interested probes. This may be our second time through * this path in which case we'll have constructed the tracepoint we'd * like to install. If we can't find a match, and have an allocated * tracepoint ready to go, enable that one now. * * A tracepoint whose process is defunct is also considered defunct. */ again: mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { /* * Note that it's safe to access the active count on the * associated proc structure because we know that at least one * provider (this one) will still be around throughout this * operation. */ if (tp->ftt_pid != pid || tp->ftt_pc != pc || tp->ftt_proc->ftpc_acount == 0) continue; /* * Now that we've found a matching tracepoint, it would be * a decent idea to confirm that the tracepoint is still * enabled and the trap instruction hasn't been overwritten. * Since this is a little hairy, we'll punt for now. */ /* * This can't be the first interested probe. We don't have * to worry about another thread being in the midst of * deleting this tracepoint (which would be the only valid * reason for a tracepoint to have no interested probes) * since we're holding P_PR_LOCK for this process. */ ASSERT(tp->ftt_ids != NULL || tp->ftt_retids != NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: id->fti_next = tp->ftt_ids; membar_producer(); tp->ftt_ids = id; membar_producer(); break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: id->fti_next = tp->ftt_retids; membar_producer(); tp->ftt_retids = id; membar_producer(); break; default: ASSERT(0); } mutex_exit(&bucket->ftb_mtx); if (new_tp != NULL) { new_tp->ftt_ids = NULL; new_tp->ftt_retids = NULL; } return (0); } /* * If we have a good tracepoint ready to go, install it now while * we have the lock held and no one can screw with us. */ if (new_tp != NULL) { int rc = 0; new_tp->ftt_next = bucket->ftb_data; membar_producer(); bucket->ftb_data = new_tp; membar_producer(); mutex_exit(&bucket->ftb_mtx); /* * Activate the tracepoint in the ISA-specific manner. * If this fails, we need to report the failure, but * indicate that this tracepoint must still be disabled * by calling fasttrap_tracepoint_disable(). */ if (fasttrap_tracepoint_install(p, new_tp) != 0) rc = FASTTRAP_ENABLE_PARTIAL; /* * Increment the count of the number of tracepoints active in * the victim process. */ #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #endif p->p_dtrace_count++; return (rc); } mutex_exit(&bucket->ftb_mtx); /* * Initialize the tracepoint that's been preallocated with the probe. */ new_tp = probe->ftp_tps[index].fit_tp; ASSERT(new_tp->ftt_pid == pid); ASSERT(new_tp->ftt_pc == pc); ASSERT(new_tp->ftt_proc == probe->ftp_prov->ftp_proc); ASSERT(new_tp->ftt_ids == NULL); ASSERT(new_tp->ftt_retids == NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: id->fti_next = NULL; new_tp->ftt_ids = id; break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: id->fti_next = NULL; new_tp->ftt_retids = id; break; default: ASSERT(0); } + +#ifdef __FreeBSD__ + if (SV_PROC_FLAG(p, SV_LP64)) + p->p_model = DATAMODEL_LP64; + else + p->p_model = DATAMODEL_ILP32; +#endif /* * If the ISA-dependent initialization goes to plan, go back to the * beginning and try to install this freshly made tracepoint. */ if (fasttrap_tracepoint_init(p, new_tp, pc, id->fti_ptype) == 0) goto again; new_tp->ftt_ids = NULL; new_tp->ftt_retids = NULL; return (FASTTRAP_ENABLE_FAIL); } static void fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index) { fasttrap_bucket_t *bucket; fasttrap_provider_t *provider = probe->ftp_prov; fasttrap_tracepoint_t **pp, *tp; fasttrap_id_t *id, **idp = NULL; pid_t pid; uintptr_t pc; ASSERT(index < probe->ftp_ntps); pid = probe->ftp_pid; pc = probe->ftp_tps[index].fit_tp->ftt_pc; id = &probe->ftp_tps[index].fit_id; ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); /* * Find the tracepoint and make sure that our id is one of the * ones registered with it. */ bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (tp->ftt_pid == pid && tp->ftt_pc == pc && tp->ftt_proc == provider->ftp_proc) break; } /* * If we somehow lost this tracepoint, we're in a world of hurt. */ ASSERT(tp != NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: ASSERT(tp->ftt_ids != NULL); idp = &tp->ftt_ids; break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: ASSERT(tp->ftt_retids != NULL); idp = &tp->ftt_retids; break; default: ASSERT(0); } while ((*idp)->fti_probe != probe) { idp = &(*idp)->fti_next; ASSERT(*idp != NULL); } id = *idp; *idp = id->fti_next; membar_producer(); ASSERT(id->fti_probe == probe); /* * If there are other registered enablings of this tracepoint, we're * all done, but if this was the last probe assocated with this * this tracepoint, we need to remove and free it. */ if (tp->ftt_ids != NULL || tp->ftt_retids != NULL) { /* * If the current probe's tracepoint is in use, swap it * for an unused tracepoint. */ if (tp == probe->ftp_tps[index].fit_tp) { fasttrap_probe_t *tmp_probe; fasttrap_tracepoint_t **tmp_tp; uint_t tmp_index; if (tp->ftt_ids != NULL) { tmp_probe = tp->ftt_ids->fti_probe; /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_ids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } else { tmp_probe = tp->ftt_retids->fti_probe; /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_retids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } ASSERT(*tmp_tp != NULL); ASSERT(*tmp_tp != probe->ftp_tps[index].fit_tp); ASSERT((*tmp_tp)->ftt_ids == NULL); ASSERT((*tmp_tp)->ftt_retids == NULL); probe->ftp_tps[index].fit_tp = *tmp_tp; *tmp_tp = tp; } mutex_exit(&bucket->ftb_mtx); /* * Tag the modified probe with the generation in which it was * changed. */ probe->ftp_gen = fasttrap_mod_gen; return; } mutex_exit(&bucket->ftb_mtx); /* * We can't safely remove the tracepoint from the set of active * tracepoints until we've actually removed the fasttrap instruction * from the process's text. We can, however, operate on this * tracepoint secure in the knowledge that no other thread is going to * be looking at it since we hold P_PR_LOCK on the process if it's * live or we hold the provider lock on the process if it's dead and * gone. */ /* * We only need to remove the actual instruction if we're looking * at an existing process */ if (p != NULL) { /* * If we fail to restore the instruction we need to kill * this process since it's in a completely unrecoverable * state. */ if (fasttrap_tracepoint_remove(p, tp) != 0) fasttrap_sigtrap(p, NULL, pc); /* * Decrement the count of the number of tracepoints active * in the victim process. */ #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #endif p->p_dtrace_count--; } /* * Remove the probe from the hash table of active tracepoints. */ mutex_enter(&bucket->ftb_mtx); pp = (fasttrap_tracepoint_t **)&bucket->ftb_data; ASSERT(*pp != NULL); while (*pp != tp) { pp = &(*pp)->ftt_next; ASSERT(*pp != NULL); } *pp = tp->ftt_next; membar_producer(); mutex_exit(&bucket->ftb_mtx); /* * Tag the modified probe with the generation in which it was changed. */ probe->ftp_gen = fasttrap_mod_gen; } static void fasttrap_enable_callbacks(void) { /* * We don't have to play the rw lock game here because we're * providing something rather than taking something away -- * we can be sure that no threads have tried to follow this * function pointer yet. */ mutex_enter(&fasttrap_count_mtx); if (fasttrap_pid_count == 0) { ASSERT(dtrace_pid_probe_ptr == NULL); ASSERT(dtrace_return_probe_ptr == NULL); dtrace_pid_probe_ptr = &fasttrap_pid_probe; dtrace_return_probe_ptr = &fasttrap_return_probe; } ASSERT(dtrace_pid_probe_ptr == &fasttrap_pid_probe); ASSERT(dtrace_return_probe_ptr == &fasttrap_return_probe); fasttrap_pid_count++; mutex_exit(&fasttrap_count_mtx); } static void fasttrap_disable_callbacks(void) { #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif mutex_enter(&fasttrap_count_mtx); ASSERT(fasttrap_pid_count > 0); fasttrap_pid_count--; if (fasttrap_pid_count == 0) { #ifdef illumos cpu_t *cur, *cpu = CPU; for (cur = cpu->cpu_next_onln; cur != cpu; cur = cur->cpu_next_onln) { rw_enter(&cur->cpu_ft_lock, RW_WRITER); } #endif dtrace_pid_probe_ptr = NULL; dtrace_return_probe_ptr = NULL; #ifdef illumos for (cur = cpu->cpu_next_onln; cur != cpu; cur = cur->cpu_next_onln) { rw_exit(&cur->cpu_ft_lock); } #endif } mutex_exit(&fasttrap_count_mtx); } /*ARGSUSED*/ static void fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; proc_t *p = NULL; int i, rc; ASSERT(probe != NULL); ASSERT(!probe->ftp_enabled); ASSERT(id == probe->ftp_id); #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif /* * Increment the count of enabled probes on this probe's provider; * the provider can't go away while the probe still exists. We * must increment this even if we aren't able to properly enable * this probe. */ mutex_enter(&probe->ftp_prov->ftp_mtx); probe->ftp_prov->ftp_rcount++; mutex_exit(&probe->ftp_prov->ftp_mtx); /* * If this probe's provider is retired (meaning it was valid in a * previously exec'ed incarnation of this address space), bail out. The * provider can't go away while we're in this code path. */ if (probe->ftp_prov->ftp_retired) return; /* * If we can't find the process, it may be that we're in the context of * a fork in which the traced process is being born and we're copying * USDT probes. Otherwise, the process is gone so bail. */ #ifdef illumos if ((p = sprlock(probe->ftp_pid)) == NULL) { if ((curproc->p_flag & SFORKING) == 0) return; mutex_enter(&pidlock); p = prfind(probe->ftp_pid); if (p == NULL) { /* * So it's not that the target process is being born, * it's that it isn't there at all (and we simply * happen to be forking). Anyway, we know that the * target is definitely gone, so bail out. */ mutex_exit(&pidlock); return (0); } /* * Confirm that curproc is indeed forking the process in which * we're trying to enable probes. */ ASSERT(p->p_parent == curproc); ASSERT(p->p_stat == SIDL); mutex_enter(&p->p_lock); mutex_exit(&pidlock); sprlock_proc(p); } ASSERT(!(p->p_flag & SVFORK)); mutex_exit(&p->p_lock); #else if ((p = pfind(probe->ftp_pid)) == NULL) return; #endif /* * We have to enable the trap entry point before any user threads have * the chance to execute the trap instruction we're about to place * in their process's text. */ #ifdef __FreeBSD__ /* * pfind() returns a locked process. */ _PHOLD(p); PROC_UNLOCK(p); #endif fasttrap_enable_callbacks(); /* * Enable all the tracepoints and add this probe's id to each * tracepoint's list of active probes. */ for (i = 0; i < probe->ftp_ntps; i++) { if ((rc = fasttrap_tracepoint_enable(p, probe, i)) != 0) { /* * If enabling the tracepoint failed completely, * we don't have to disable it; if the failure * was only partial we must disable it. */ if (rc == FASTTRAP_ENABLE_FAIL) i--; else ASSERT(rc == FASTTRAP_ENABLE_PARTIAL); /* * Back up and pull out all the tracepoints we've * created so far for this probe. */ while (i >= 0) { fasttrap_tracepoint_disable(p, probe, i); i--; } #ifdef illumos mutex_enter(&p->p_lock); sprunlock(p); #else PRELE(p); #endif /* * Since we're not actually enabling this probe, * drop our reference on the trap table entry. */ fasttrap_disable_callbacks(); return; } } #ifdef illumos mutex_enter(&p->p_lock); sprunlock(p); #else PRELE(p); #endif probe->ftp_enabled = 1; } /*ARGSUSED*/ static void fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; fasttrap_provider_t *provider = probe->ftp_prov; proc_t *p; int i, whack = 0; ASSERT(id == probe->ftp_id); mutex_enter(&provider->ftp_mtx); /* * We won't be able to acquire a /proc-esque lock on the process * iff the process is dead and gone. In this case, we rely on the * provider lock as a point of mutual exclusion to prevent other * DTrace consumers from disabling this probe. */ if ((p = pfind(probe->ftp_pid)) != NULL) { #ifdef __FreeBSD__ if (p->p_flag & P_WEXIT) { PROC_UNLOCK(p); p = NULL; } else { _PHOLD(p); PROC_UNLOCK(p); } #endif } /* * Disable all the associated tracepoints (for fully enabled probes). */ if (probe->ftp_enabled) { for (i = 0; i < probe->ftp_ntps; i++) { fasttrap_tracepoint_disable(p, probe, i); } } ASSERT(provider->ftp_rcount > 0); provider->ftp_rcount--; if (p != NULL) { /* * Even though we may not be able to remove it entirely, we * mark this retired provider to get a chance to remove some * of the associated probes. */ if (provider->ftp_retired && !provider->ftp_marked) whack = provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); } else { /* * If the process is dead, we're just waiting for the * last probe to be disabled to be able to free it. */ if (provider->ftp_rcount == 0 && !provider->ftp_marked) whack = provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); } if (whack) fasttrap_pid_cleanup(); #ifdef __FreeBSD__ if (p != NULL) PRELE(p); #endif if (!probe->ftp_enabled) return; probe->ftp_enabled = 0; #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif fasttrap_disable_callbacks(); } /*ARGSUSED*/ static void fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { fasttrap_probe_t *probe = parg; char *str; int i, ndx; desc->dtargd_native[0] = '\0'; desc->dtargd_xlate[0] = '\0'; if (probe->ftp_prov->ftp_retired != 0 || desc->dtargd_ndx >= probe->ftp_nargs) { desc->dtargd_ndx = DTRACE_ARGNONE; return; } ndx = (probe->ftp_argmap != NULL) ? probe->ftp_argmap[desc->dtargd_ndx] : desc->dtargd_ndx; str = probe->ftp_ntypes; for (i = 0; i < ndx; i++) { str += strlen(str) + 1; } ASSERT(strlen(str + 1) < sizeof (desc->dtargd_native)); (void) strcpy(desc->dtargd_native, str); if (probe->ftp_xtypes == NULL) return; str = probe->ftp_xtypes; for (i = 0; i < desc->dtargd_ndx; i++) { str += strlen(str) + 1; } ASSERT(strlen(str + 1) < sizeof (desc->dtargd_xlate)); (void) strcpy(desc->dtargd_xlate, str); } /*ARGSUSED*/ static void fasttrap_pid_destroy(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; int i; size_t size; ASSERT(probe != NULL); ASSERT(!probe->ftp_enabled); ASSERT(fasttrap_total >= probe->ftp_ntps); atomic_add_32(&fasttrap_total, -probe->ftp_ntps); size = offsetof(fasttrap_probe_t, ftp_tps[probe->ftp_ntps]); if (probe->ftp_gen + 1 >= fasttrap_mod_gen) fasttrap_mod_barrier(probe->ftp_gen); for (i = 0; i < probe->ftp_ntps; i++) { kmem_free(probe->ftp_tps[i].fit_tp, sizeof (fasttrap_tracepoint_t)); } kmem_free(probe, size); } static const dtrace_pattr_t pid_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, }; static dtrace_pops_t pid_pops = { fasttrap_pid_provide, NULL, fasttrap_pid_enable, fasttrap_pid_disable, NULL, NULL, fasttrap_pid_getargdesc, fasttrap_pid_getarg, NULL, fasttrap_pid_destroy }; static dtrace_pops_t usdt_pops = { fasttrap_pid_provide, NULL, fasttrap_pid_enable, fasttrap_pid_disable, NULL, NULL, fasttrap_pid_getargdesc, fasttrap_usdt_getarg, NULL, fasttrap_pid_destroy }; static fasttrap_proc_t * fasttrap_proc_lookup(pid_t pid) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc, *new_fprc; bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; mutex_enter(&bucket->ftb_mtx); for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); fprc->ftpc_rcount++; atomic_inc_64(&fprc->ftpc_acount); ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); mutex_exit(&fprc->ftpc_mtx); return (fprc); } } /* * Drop the bucket lock so we don't try to perform a sleeping * allocation under it. */ mutex_exit(&bucket->ftb_mtx); new_fprc = kmem_zalloc(sizeof (fasttrap_proc_t), KM_SLEEP); new_fprc->ftpc_pid = pid; new_fprc->ftpc_rcount = 1; new_fprc->ftpc_acount = 1; #ifndef illumos mutex_init(&new_fprc->ftpc_mtx, "fasttrap proc mtx", MUTEX_DEFAULT, NULL); #endif mutex_enter(&bucket->ftb_mtx); /* * Take another lap through the list to make sure a proc hasn't * been created for this pid while we weren't under the bucket lock. */ for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); fprc->ftpc_rcount++; atomic_inc_64(&fprc->ftpc_acount); ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); mutex_exit(&fprc->ftpc_mtx); kmem_free(new_fprc, sizeof (fasttrap_proc_t)); return (fprc); } } new_fprc->ftpc_next = bucket->ftb_data; bucket->ftb_data = new_fprc; mutex_exit(&bucket->ftb_mtx); return (new_fprc); } static void fasttrap_proc_release(fasttrap_proc_t *proc) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc, **fprcp; pid_t pid = proc->ftpc_pid; #ifndef illumos fasttrap_scrblock_t *scrblk, *scrblktmp; fasttrap_scrspace_t *scrspc, *scrspctmp; struct proc *p; struct thread *td; #endif mutex_enter(&proc->ftpc_mtx); ASSERT(proc->ftpc_rcount != 0); ASSERT(proc->ftpc_acount <= proc->ftpc_rcount); if (--proc->ftpc_rcount != 0) { mutex_exit(&proc->ftpc_mtx); return; } #ifndef illumos /* * Free all structures used to manage per-thread scratch space. */ LIST_FOREACH_SAFE(scrblk, &proc->ftpc_scrblks, ftsb_next, scrblktmp) { LIST_REMOVE(scrblk, ftsb_next); free(scrblk, M_SOLARIS); } LIST_FOREACH_SAFE(scrspc, &proc->ftpc_fscr, ftss_next, scrspctmp) { LIST_REMOVE(scrspc, ftss_next); free(scrspc, M_SOLARIS); } LIST_FOREACH_SAFE(scrspc, &proc->ftpc_ascr, ftss_next, scrspctmp) { LIST_REMOVE(scrspc, ftss_next); free(scrspc, M_SOLARIS); } if ((p = pfind(pid)) != NULL) { FOREACH_THREAD_IN_PROC(p, td) td->t_dtrace_sscr = NULL; PROC_UNLOCK(p); } #endif mutex_exit(&proc->ftpc_mtx); /* * There should definitely be no live providers associated with this * process at this point. */ ASSERT(proc->ftpc_acount == 0); bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; mutex_enter(&bucket->ftb_mtx); fprcp = (fasttrap_proc_t **)&bucket->ftb_data; while ((fprc = *fprcp) != NULL) { if (fprc == proc) break; fprcp = &fprc->ftpc_next; } /* * Something strange has happened if we can't find the proc. */ ASSERT(fprc != NULL); *fprcp = fprc->ftpc_next; mutex_exit(&bucket->ftb_mtx); kmem_free(fprc, sizeof (fasttrap_proc_t)); } /* * Lookup a fasttrap-managed provider based on its name and associated pid. * If the pattr argument is non-NULL, this function instantiates the provider * if it doesn't exist otherwise it returns NULL. The provider is returned * with its lock held. */ static fasttrap_provider_t * fasttrap_provider_lookup(pid_t pid, const char *name, const dtrace_pattr_t *pattr) { fasttrap_provider_t *fp, *new_fp = NULL; fasttrap_bucket_t *bucket; char provname[DTRACE_PROVNAMELEN]; proc_t *p; cred_t *cred; ASSERT(strlen(name) < sizeof (fp->ftp_name)); ASSERT(pattr != NULL); bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)]; mutex_enter(&bucket->ftb_mtx); /* * Take a lap through the list and return the match if we find it. */ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) { mutex_enter(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); return (fp); } } /* * Drop the bucket lock so we don't try to perform a sleeping * allocation under it. */ mutex_exit(&bucket->ftb_mtx); /* * Make sure the process exists, isn't a child created as the result * of a vfork(2), and isn't a zombie (but may be in fork). */ if ((p = pfind(pid)) == NULL) return (NULL); /* * Increment p_dtrace_probes so that the process knows to inform us * when it exits or execs. fasttrap_provider_free() decrements this * when we're done with this provider. */ p->p_dtrace_probes++; /* * Grab the credentials for this process so we have * something to pass to dtrace_register(). */ PROC_LOCK_ASSERT(p, MA_OWNED); crhold(p->p_ucred); cred = p->p_ucred; PROC_UNLOCK(p); new_fp = kmem_zalloc(sizeof (fasttrap_provider_t), KM_SLEEP); new_fp->ftp_pid = pid; new_fp->ftp_proc = fasttrap_proc_lookup(pid); #ifndef illumos mutex_init(&new_fp->ftp_mtx, "provider mtx", MUTEX_DEFAULT, NULL); mutex_init(&new_fp->ftp_cmtx, "lock on creating", MUTEX_DEFAULT, NULL); #endif ASSERT(new_fp->ftp_proc != NULL); mutex_enter(&bucket->ftb_mtx); /* * Take another lap through the list to make sure a provider hasn't * been created for this pid while we weren't under the bucket lock. */ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) { mutex_enter(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); fasttrap_provider_free(new_fp); crfree(cred); return (fp); } } (void) strcpy(new_fp->ftp_name, name); /* * Fail and return NULL if either the provider name is too long * or we fail to register this new provider with the DTrace * framework. Note that this is the only place we ever construct * the full provider name -- we keep it in pieces in the provider * structure. */ if (snprintf(provname, sizeof (provname), "%s%u", name, (uint_t)pid) >= sizeof (provname) || dtrace_register(provname, pattr, DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER, cred, pattr == &pid_attr ? &pid_pops : &usdt_pops, new_fp, &new_fp->ftp_provid) != 0) { mutex_exit(&bucket->ftb_mtx); fasttrap_provider_free(new_fp); crfree(cred); return (NULL); } new_fp->ftp_next = bucket->ftb_data; bucket->ftb_data = new_fp; mutex_enter(&new_fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); crfree(cred); return (new_fp); } static void fasttrap_provider_free(fasttrap_provider_t *provider) { pid_t pid = provider->ftp_pid; proc_t *p; /* * There need to be no associated enabled probes, no consumers * creating probes, and no meta providers referencing this provider. */ ASSERT(provider->ftp_rcount == 0); ASSERT(provider->ftp_ccount == 0); ASSERT(provider->ftp_mcount == 0); /* * If this provider hasn't been retired, we need to explicitly drop the * count of active providers on the associated process structure. */ if (!provider->ftp_retired) { atomic_dec_64(&provider->ftp_proc->ftpc_acount); ASSERT(provider->ftp_proc->ftpc_acount < provider->ftp_proc->ftpc_rcount); } fasttrap_proc_release(provider->ftp_proc); #ifndef illumos mutex_destroy(&provider->ftp_mtx); mutex_destroy(&provider->ftp_cmtx); #endif kmem_free(provider, sizeof (fasttrap_provider_t)); /* * Decrement p_dtrace_probes on the process whose provider we're * freeing. We don't have to worry about clobbering somone else's * modifications to it because we have locked the bucket that * corresponds to this process's hash chain in the provider hash * table. Don't sweat it if we can't find the process. */ if ((p = pfind(pid)) == NULL) { return; } p->p_dtrace_probes--; #ifndef illumos PROC_UNLOCK(p); #endif } static void fasttrap_provider_retire(pid_t pid, const char *name, int mprov) { fasttrap_provider_t *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; ASSERT(strlen(name) < sizeof (fp->ftp_name)); bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)]; mutex_enter(&bucket->ftb_mtx); for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) break; } if (fp == NULL) { mutex_exit(&bucket->ftb_mtx); return; } mutex_enter(&fp->ftp_mtx); ASSERT(!mprov || fp->ftp_mcount > 0); if (mprov && --fp->ftp_mcount != 0) { mutex_exit(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); return; } /* * Mark the provider to be removed in our post-processing step, mark it * retired, and drop the active count on its proc. Marking it indicates * that we should try to remove it; setting the retired flag indicates * that we're done with this provider; dropping the active the proc * releases our hold, and when this reaches zero (as it will during * exit or exec) the proc and associated providers become defunct. * * We obviously need to take the bucket lock before the provider lock * to perform the lookup, but we need to drop the provider lock * before calling into the DTrace framework since we acquire the * provider lock in callbacks invoked from the DTrace framework. The * bucket lock therefore protects the integrity of the provider hash * table. */ atomic_dec_64(&fp->ftp_proc->ftpc_acount); ASSERT(fp->ftp_proc->ftpc_acount < fp->ftp_proc->ftpc_rcount); fp->ftp_retired = 1; fp->ftp_marked = 1; provid = fp->ftp_provid; mutex_exit(&fp->ftp_mtx); /* * We don't have to worry about invalidating the same provider twice * since fasttrap_provider_lookup() will ignore provider that have * been marked as retired. */ dtrace_invalidate(provid); mutex_exit(&bucket->ftb_mtx); fasttrap_pid_cleanup(); } static int fasttrap_uint32_cmp(const void *ap, const void *bp) { return (*(const uint32_t *)ap - *(const uint32_t *)bp); } static int fasttrap_uint64_cmp(const void *ap, const void *bp) { return (*(const uint64_t *)ap - *(const uint64_t *)bp); } static int fasttrap_add_probe(fasttrap_probe_spec_t *pdata) { fasttrap_provider_t *provider; fasttrap_probe_t *pp; fasttrap_tracepoint_t *tp; char *name; int i, aframes = 0, whack; /* * There needs to be at least one desired trace point. */ if (pdata->ftps_noffs == 0) return (EINVAL); switch (pdata->ftps_type) { case DTFTP_ENTRY: name = "entry"; aframes = FASTTRAP_ENTRY_AFRAMES; break; case DTFTP_RETURN: name = "return"; aframes = FASTTRAP_RETURN_AFRAMES; break; case DTFTP_OFFSETS: name = NULL; break; default: return (EINVAL); } if ((provider = fasttrap_provider_lookup(pdata->ftps_pid, FASTTRAP_PID_NAME, &pid_attr)) == NULL) return (ESRCH); /* * Increment this reference count to indicate that a consumer is * actively adding a new probe associated with this provider. This * prevents the provider from being deleted -- we'll need to check * for pending deletions when we drop this reference count. */ provider->ftp_ccount++; mutex_exit(&provider->ftp_mtx); /* * Grab the creation lock to ensure consistency between calls to * dtrace_probe_lookup() and dtrace_probe_create() in the face of * other threads creating probes. We must drop the provider lock * before taking this lock to avoid a three-way deadlock with the * DTrace framework. */ mutex_enter(&provider->ftp_cmtx); if (name == NULL) { for (i = 0; i < pdata->ftps_noffs; i++) { char name_str[17]; (void) sprintf(name_str, "%llx", (unsigned long long)pdata->ftps_offs[i]); if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str) != 0) continue; atomic_inc_32(&fasttrap_total); if (fasttrap_total > fasttrap_max) { atomic_dec_32(&fasttrap_total); goto no_mem; } pp = kmem_zalloc(sizeof (fasttrap_probe_t), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; pp->ftp_fsize = pdata->ftps_size; pp->ftp_pid = pdata->ftps_pid; pp->ftp_ntps = 1; tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; tp->ftt_pid = pdata->ftps_pid; pp->ftp_tps[0].fit_tp = tp; pp->ftp_tps[0].fit_id.fti_probe = pp; pp->ftp_tps[0].fit_id.fti_ptype = pdata->ftps_type; pp->ftp_id = dtrace_probe_create(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str, FASTTRAP_OFFSET_AFRAMES, pp); } } else if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name) == 0) { atomic_add_32(&fasttrap_total, pdata->ftps_noffs); if (fasttrap_total > fasttrap_max) { atomic_add_32(&fasttrap_total, -pdata->ftps_noffs); goto no_mem; } /* * Make sure all tracepoint program counter values are unique. * We later assume that each probe has exactly one tracepoint * for a given pc. */ qsort(pdata->ftps_offs, pdata->ftps_noffs, sizeof (uint64_t), fasttrap_uint64_cmp); for (i = 1; i < pdata->ftps_noffs; i++) { if (pdata->ftps_offs[i] > pdata->ftps_offs[i - 1]) continue; atomic_add_32(&fasttrap_total, -pdata->ftps_noffs); goto no_mem; } ASSERT(pdata->ftps_noffs > 0); pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[pdata->ftps_noffs]), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; pp->ftp_fsize = pdata->ftps_size; pp->ftp_pid = pdata->ftps_pid; pp->ftp_ntps = pdata->ftps_noffs; for (i = 0; i < pdata->ftps_noffs; i++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; tp->ftt_pid = pdata->ftps_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; pp->ftp_tps[i].fit_id.fti_ptype = pdata->ftps_type; } pp->ftp_id = dtrace_probe_create(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name, aframes, pp); } mutex_exit(&provider->ftp_cmtx); /* * We know that the provider is still valid since we incremented the * creation reference count. If someone tried to clean up this provider * while we were using it (e.g. because the process called exec(2) or * exit(2)), take note of that and try to clean it up now. */ mutex_enter(&provider->ftp_mtx); provider->ftp_ccount--; whack = provider->ftp_retired; mutex_exit(&provider->ftp_mtx); if (whack) fasttrap_pid_cleanup(); return (0); no_mem: /* * If we've exhausted the allowable resources, we'll try to remove * this provider to free some up. This is to cover the case where * the user has accidentally created many more probes than was * intended (e.g. pid123:::). */ mutex_exit(&provider->ftp_cmtx); mutex_enter(&provider->ftp_mtx); provider->ftp_ccount--; provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); fasttrap_pid_cleanup(); return (ENOMEM); } /*ARGSUSED*/ static void * fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) { fasttrap_provider_t *provider; /* * A 32-bit unsigned integer (like a pid for example) can be * expressed in 10 or fewer decimal digits. Make sure that we'll * have enough space for the provider name. */ if (strlen(dhpv->dthpv_provname) + 10 >= sizeof (provider->ftp_name)) { printf("failed to instantiate provider %s: " "name too long to accomodate pid", dhpv->dthpv_provname); return (NULL); } /* * Don't let folks spoof the true pid provider. */ if (strcmp(dhpv->dthpv_provname, FASTTRAP_PID_NAME) == 0) { printf("failed to instantiate provider %s: " "%s is an invalid name", dhpv->dthpv_provname, FASTTRAP_PID_NAME); return (NULL); } /* * The highest stability class that fasttrap supports is ISA; cap * the stability of the new provider accordingly. */ if (dhpv->dthpv_pattr.dtpa_provider.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_provider.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_mod.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_mod.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_func.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_func.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_name.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_name.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA; if ((provider = fasttrap_provider_lookup(pid, dhpv->dthpv_provname, &dhpv->dthpv_pattr)) == NULL) { printf("failed to instantiate provider %s for " "process %u", dhpv->dthpv_provname, (uint_t)pid); return (NULL); } /* * Up the meta provider count so this provider isn't removed until * the meta provider has been told to remove it. */ provider->ftp_mcount++; mutex_exit(&provider->ftp_mtx); return (provider); } /* * We know a few things about our context here: we know that the probe being * created doesn't already exist (DTrace won't load DOF at the same address * twice, even if explicitly told to do so) and we know that we are * single-threaded with respect to the meta provider machinery. Knowing that * this is a new probe and that there is no way for us to race with another * operation on this provider allows us an important optimization: we need not * lookup a probe before adding it. Saving this lookup is important because * this code is in the fork path for processes with USDT probes, and lookups * here are potentially very expensive because of long hash conflicts on * module, function and name (DTrace doesn't hash on provider name). */ /*ARGSUSED*/ static void fasttrap_meta_create_probe(void *arg, void *parg, dtrace_helper_probedesc_t *dhpb) { fasttrap_provider_t *provider = parg; fasttrap_probe_t *pp; fasttrap_tracepoint_t *tp; int i, j; uint32_t ntps; /* * Since the meta provider count is non-zero we don't have to worry * about this provider disappearing. */ ASSERT(provider->ftp_mcount > 0); /* * The offsets must be unique. */ qsort(dhpb->dthpb_offs, dhpb->dthpb_noffs, sizeof (uint32_t), fasttrap_uint32_cmp); for (i = 1; i < dhpb->dthpb_noffs; i++) { if (dhpb->dthpb_base + dhpb->dthpb_offs[i] <= dhpb->dthpb_base + dhpb->dthpb_offs[i - 1]) return; } qsort(dhpb->dthpb_enoffs, dhpb->dthpb_nenoffs, sizeof (uint32_t), fasttrap_uint32_cmp); for (i = 1; i < dhpb->dthpb_nenoffs; i++) { if (dhpb->dthpb_base + dhpb->dthpb_enoffs[i] <= dhpb->dthpb_base + dhpb->dthpb_enoffs[i - 1]) return; } ntps = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs; ASSERT(ntps > 0); atomic_add_32(&fasttrap_total, ntps); if (fasttrap_total > fasttrap_max) { atomic_add_32(&fasttrap_total, -ntps); return; } pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[ntps]), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_pid = provider->ftp_pid; pp->ftp_ntps = ntps; pp->ftp_nargs = dhpb->dthpb_xargc; pp->ftp_xtypes = dhpb->dthpb_xtypes; pp->ftp_ntypes = dhpb->dthpb_ntypes; /* * First create a tracepoint for each actual point of interest. */ for (i = 0; i < dhpb->dthpb_noffs; i++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_offs[i]; tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; #ifdef __sparc pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_POST_OFFSETS; #else pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_OFFSETS; #endif } /* * Then create a tracepoint for each is-enabled point. */ for (j = 0; i < ntps; i++, j++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_enoffs[j]; tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_IS_ENABLED; } /* * If the arguments are shuffled around we set the argument remapping * table. Later, when the probe fires, we only remap the arguments * if the table is non-NULL. */ for (i = 0; i < dhpb->dthpb_xargc; i++) { if (dhpb->dthpb_args[i] != i) { pp->ftp_argmap = dhpb->dthpb_args; break; } } /* * The probe is fully constructed -- register it with DTrace. */ pp->ftp_id = dtrace_probe_create(provider->ftp_provid, dhpb->dthpb_mod, dhpb->dthpb_func, dhpb->dthpb_name, FASTTRAP_OFFSET_AFRAMES, pp); } /*ARGSUSED*/ static void fasttrap_meta_remove(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) { /* * Clean up the USDT provider. There may be active consumers of the * provider busy adding probes, no damage will actually befall the * provider until that count has dropped to zero. This just puts * the provider on death row. */ fasttrap_provider_retire(pid, dhpv->dthpv_provname, 1); } static dtrace_mops_t fasttrap_mops = { fasttrap_meta_create_probe, fasttrap_meta_provide, fasttrap_meta_remove }; /*ARGSUSED*/ static int fasttrap_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) { return (0); } /*ARGSUSED*/ static int fasttrap_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int fflag, struct thread *td) { #ifdef notyet struct kinfo_proc kp; const cred_t *cr = td->td_ucred; #endif if (!dtrace_attached()) return (EAGAIN); if (cmd == FASTTRAPIOC_MAKEPROBE) { fasttrap_probe_spec_t *uprobe = *(fasttrap_probe_spec_t **)arg; fasttrap_probe_spec_t *probe; uint64_t noffs; size_t size; int ret, err; if (copyin(&uprobe->ftps_noffs, &noffs, sizeof (uprobe->ftps_noffs))) return (EFAULT); /* * Probes must have at least one tracepoint. */ if (noffs == 0) return (EINVAL); size = sizeof (fasttrap_probe_spec_t) + sizeof (probe->ftps_offs[0]) * (noffs - 1); if (size > 1024 * 1024) return (ENOMEM); probe = kmem_alloc(size, KM_SLEEP); if (copyin(uprobe, probe, size) != 0 || probe->ftps_noffs != noffs) { kmem_free(probe, size); return (EFAULT); } /* * Verify that the function and module strings contain no * funny characters. */ if (u8_validate(probe->ftps_func, strlen(probe->ftps_func), NULL, U8_VALIDATE_ENTIRE, &err) < 0) { ret = EINVAL; goto err; } if (u8_validate(probe->ftps_mod, strlen(probe->ftps_mod), NULL, U8_VALIDATE_ENTIRE, &err) < 0) { ret = EINVAL; goto err; } #ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = probe->ftps_pid; #ifdef illumos mutex_enter(&pidlock); #endif /* * Report an error if the process doesn't exist * or is actively being birthed. */ sx_slock(&proctree_lock); p = pfind(pid); if (p) fill_kinfo_proc(p, &kp); sx_sunlock(&proctree_lock); if (p == NULL || kp.ki_stat == SIDL) { #ifdef illumos mutex_exit(&pidlock); #endif return (ESRCH); } #ifdef illumos mutex_enter(&p->p_lock); mutex_exit(&pidlock); #else PROC_LOCK_ASSERT(p, MA_OWNED); #endif #ifdef notyet if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD | VWRITE)) != 0) { #ifdef illumos mutex_exit(&p->p_lock); #else PROC_UNLOCK(p); #endif return (ret); } #endif /* notyet */ #ifdef illumos mutex_exit(&p->p_lock); #else PROC_UNLOCK(p); #endif } #endif /* notyet */ ret = fasttrap_add_probe(probe); err: kmem_free(probe, size); return (ret); } else if (cmd == FASTTRAPIOC_GETINSTR) { fasttrap_instr_query_t instr; fasttrap_tracepoint_t *tp; uint_t index; #ifdef illumos int ret; #endif #ifdef illumos if (copyin((void *)arg, &instr, sizeof (instr)) != 0) return (EFAULT); #endif #ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = instr.ftiq_pid; #ifdef illumos mutex_enter(&pidlock); #endif /* * Report an error if the process doesn't exist * or is actively being birthed. */ sx_slock(&proctree_lock); p = pfind(pid); if (p) fill_kinfo_proc(p, &kp); sx_sunlock(&proctree_lock); if (p == NULL || kp.ki_stat == SIDL) { #ifdef illumos mutex_exit(&pidlock); #endif return (ESRCH); } #ifdef illumos mutex_enter(&p->p_lock); mutex_exit(&pidlock); #else PROC_LOCK_ASSERT(p, MA_OWNED); #endif #ifdef notyet if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD)) != 0) { #ifdef illumos mutex_exit(&p->p_lock); #else PROC_UNLOCK(p); #endif return (ret); } #endif /* notyet */ #ifdef illumos mutex_exit(&p->p_lock); #else PROC_UNLOCK(p); #endif } #endif /* notyet */ index = FASTTRAP_TPOINTS_INDEX(instr.ftiq_pid, instr.ftiq_pc); mutex_enter(&fasttrap_tpoints.fth_table[index].ftb_mtx); tp = fasttrap_tpoints.fth_table[index].ftb_data; while (tp != NULL) { if (instr.ftiq_pid == tp->ftt_pid && instr.ftiq_pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; tp = tp->ftt_next; } if (tp == NULL) { mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx); return (ENOENT); } bcopy(&tp->ftt_instr, &instr.ftiq_instr, sizeof (instr.ftiq_instr)); mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx); if (copyout(&instr, (void *)arg, sizeof (instr)) != 0) return (EFAULT); return (0); } return (EINVAL); } static int fasttrap_load(void) { ulong_t nent; int i, ret; /* Create the /dev/dtrace/fasttrap entry. */ fasttrap_cdev = make_dev(&fasttrap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/fasttrap"); mtx_init(&fasttrap_cleanup_mtx, "fasttrap clean", "dtrace", MTX_DEF); mutex_init(&fasttrap_count_mtx, "fasttrap count mtx", MUTEX_DEFAULT, NULL); #ifdef illumos fasttrap_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-max-probes", FASTTRAP_MAX_DEFAULT); #endif fasttrap_total = 0; /* * Conjure up the tracepoints hashtable... */ #ifdef illumos nent = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-hash-size", FASTTRAP_TPOINTS_DEFAULT_SIZE); #else nent = tpoints_hash_size; #endif if (nent == 0 || nent > 0x1000000) nent = FASTTRAP_TPOINTS_DEFAULT_SIZE; tpoints_hash_size = nent; if (ISP2(nent)) fasttrap_tpoints.fth_nent = nent; else fasttrap_tpoints.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_tpoints.fth_nent > 0); fasttrap_tpoints.fth_mask = fasttrap_tpoints.fth_nent - 1; fasttrap_tpoints.fth_table = kmem_zalloc(fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_init(&fasttrap_tpoints.fth_table[i].ftb_mtx, "tracepoints bucket mtx", MUTEX_DEFAULT, NULL); #endif /* * ... and the providers hash table... */ nent = FASTTRAP_PROVIDERS_DEFAULT_SIZE; if (ISP2(nent)) fasttrap_provs.fth_nent = nent; else fasttrap_provs.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_provs.fth_nent > 0); fasttrap_provs.fth_mask = fasttrap_provs.fth_nent - 1; fasttrap_provs.fth_table = kmem_zalloc(fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_init(&fasttrap_provs.fth_table[i].ftb_mtx, "providers bucket mtx", MUTEX_DEFAULT, NULL); #endif ret = kproc_create(fasttrap_pid_cleanup_cb, NULL, &fasttrap_cleanup_proc, 0, 0, "ftcleanup"); if (ret != 0) { destroy_dev(fasttrap_cdev); #ifndef illumos for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx); #endif kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t)); mtx_destroy(&fasttrap_cleanup_mtx); mutex_destroy(&fasttrap_count_mtx); return (ret); } /* * ... and the procs hash table. */ nent = FASTTRAP_PROCS_DEFAULT_SIZE; if (ISP2(nent)) fasttrap_procs.fth_nent = nent; else fasttrap_procs.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_procs.fth_nent > 0); fasttrap_procs.fth_mask = fasttrap_procs.fth_nent - 1; fasttrap_procs.fth_table = kmem_zalloc(fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_procs.fth_nent; i++) mutex_init(&fasttrap_procs.fth_table[i].ftb_mtx, "processes bucket mtx", MUTEX_DEFAULT, NULL); rm_init(&fasttrap_tp_lock, "fasttrap tracepoint"); /* * This event handler must run before kdtrace_thread_dtor() since it * accesses the thread's struct kdtrace_thread. */ fasttrap_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, fasttrap_thread_dtor, NULL, EVENTHANDLER_PRI_FIRST); #endif /* * Install our hooks into fork(2), exec(2), and exit(2). */ dtrace_fasttrap_fork = &fasttrap_fork; dtrace_fasttrap_exit = &fasttrap_exec_exit; dtrace_fasttrap_exec = &fasttrap_exec_exit; (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); return (0); } static int fasttrap_unload(void) { int i, fail = 0; /* * Unregister the meta-provider to make sure no new fasttrap- * managed providers come along while we're trying to close up * shop. If we fail to detach, we'll need to re-register as a * meta-provider. We can fail to unregister as a meta-provider * if providers we manage still exist. */ if (fasttrap_meta_id != DTRACE_METAPROVNONE && dtrace_meta_unregister(fasttrap_meta_id) != 0) return (-1); /* * Iterate over all of our providers. If there's still a process * that corresponds to that pid, fail to detach. */ for (i = 0; i < fasttrap_provs.fth_nent; i++) { fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket = &fasttrap_provs.fth_table[i]; mutex_enter(&bucket->ftb_mtx); fpp = (fasttrap_provider_t **)&bucket->ftb_data; while ((fp = *fpp) != NULL) { /* * Acquire and release the lock as a simple way of * waiting for any other consumer to finish with * this provider. A thread must first acquire the * bucket lock so there's no chance of another thread * blocking on the provider's lock. */ mutex_enter(&fp->ftp_mtx); mutex_exit(&fp->ftp_mtx); if (dtrace_unregister(fp->ftp_provid) != 0) { fail = 1; fpp = &fp->ftp_next; } else { *fpp = fp->ftp_next; fasttrap_provider_free(fp); } } mutex_exit(&bucket->ftb_mtx); } if (fail) { (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); return (-1); } /* * Stop new processes from entering these hooks now, before the * fasttrap_cleanup thread runs. That way all processes will hopefully * be out of these hooks before we free fasttrap_provs.fth_table */ ASSERT(dtrace_fasttrap_fork == &fasttrap_fork); dtrace_fasttrap_fork = NULL; ASSERT(dtrace_fasttrap_exec == &fasttrap_exec_exit); dtrace_fasttrap_exec = NULL; ASSERT(dtrace_fasttrap_exit == &fasttrap_exec_exit); dtrace_fasttrap_exit = NULL; mtx_lock(&fasttrap_cleanup_mtx); fasttrap_cleanup_drain = 1; /* Wait for the cleanup thread to finish up and signal us. */ wakeup(&fasttrap_cleanup_cv); mtx_sleep(&fasttrap_cleanup_drain, &fasttrap_cleanup_mtx, 0, "ftcld", 0); fasttrap_cleanup_proc = NULL; mtx_destroy(&fasttrap_cleanup_mtx); #ifdef DEBUG mutex_enter(&fasttrap_count_mtx); ASSERT(fasttrap_pid_count == 0); mutex_exit(&fasttrap_count_mtx); #endif #ifndef illumos EVENTHANDLER_DEREGISTER(thread_dtor, fasttrap_thread_dtor_tag); for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_procs.fth_nent; i++) mutex_destroy(&fasttrap_procs.fth_table[i].ftb_mtx); #endif kmem_free(fasttrap_tpoints.fth_table, fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_tpoints.fth_nent = 0; kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_provs.fth_nent = 0; kmem_free(fasttrap_procs.fth_table, fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_procs.fth_nent = 0; #ifndef illumos destroy_dev(fasttrap_cdev); mutex_destroy(&fasttrap_count_mtx); rm_destroy(&fasttrap_tp_lock); #endif return (0); } /* ARGSUSED */ static int fasttrap_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } SYSINIT(fasttrap_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_load, NULL); SYSUNINIT(fasttrap_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_unload, NULL); DEV_MODULE(fasttrap, fasttrap_modevent, NULL); MODULE_VERSION(fasttrap, 1); MODULE_DEPEND(fasttrap, dtrace, 1, 1, 1); MODULE_DEPEND(fasttrap, opensolaris, 1, 1, 1); Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c (revision 303642) @@ -1,661 +1,673 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #include #include #include #include #include /* * Virtual device vector for mirroring. */ typedef struct mirror_child { vdev_t *mc_vd; uint64_t mc_offset; int mc_error; int mc_load; uint8_t mc_tried; uint8_t mc_skipped; uint8_t mc_speculative; } mirror_child_t; typedef struct mirror_map { int *mm_preferred; int mm_preferred_cnt; int mm_children; boolean_t mm_replacing; boolean_t mm_root; mirror_child_t mm_child[]; } mirror_map_t; static int vdev_mirror_shift = 21; +#ifdef _KERNEL SYSCTL_DECL(_vfs_zfs_vdev); static SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0, "ZFS VDEV Mirror"); +#endif /* * The load configuration settings below are tuned by default for * the case where all devices are of the same rotational type. * * If there is a mixture of rotating and non-rotating media, setting * non_rotating_seek_inc to 0 may well provide better results as it * will direct more reads to the non-rotating vdevs which are more * likely to have a higher performance. */ /* Rotating media load calculation configuration. */ static int rotating_inc = 0; +#ifdef _KERNEL SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_inc, CTLFLAG_RWTUN, &rotating_inc, 0, "Rotating media load increment for non-seeking I/O's"); +#endif static int rotating_seek_inc = 5; +#ifdef _KERNEL SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_seek_inc, CTLFLAG_RWTUN, &rotating_seek_inc, 0, "Rotating media load increment for seeking I/O's"); +#endif static int rotating_seek_offset = 1 * 1024 * 1024; +#ifdef _KERNEL SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_seek_offset, CTLFLAG_RWTUN, &rotating_seek_offset, 0, "Offset in bytes from the last I/O which " "triggers a reduced rotating media seek increment"); +#endif /* Non-rotating media load calculation configuration. */ static int non_rotating_inc = 0; +#ifdef _KERNEL SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, non_rotating_inc, CTLFLAG_RWTUN, &non_rotating_inc, 0, "Non-rotating media load increment for non-seeking I/O's"); +#endif static int non_rotating_seek_inc = 1; +#ifdef _KERNEL SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, non_rotating_seek_inc, CTLFLAG_RWTUN, &non_rotating_seek_inc, 0, "Non-rotating media load increment for seeking I/O's"); +#endif static inline size_t vdev_mirror_map_size(int children) { return (offsetof(mirror_map_t, mm_child[children]) + sizeof(int) * children); } static inline mirror_map_t * vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root) { mirror_map_t *mm; mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP); mm->mm_children = children; mm->mm_replacing = replacing; mm->mm_root = root; mm->mm_preferred = (int *)((uintptr_t)mm + offsetof(mirror_map_t, mm_child[children])); return mm; } static void vdev_mirror_map_free(zio_t *zio) { mirror_map_t *mm = zio->io_vsd; kmem_free(mm, vdev_mirror_map_size(mm->mm_children)); } static const zio_vsd_ops_t vdev_mirror_vsd_ops = { vdev_mirror_map_free, zio_vsd_default_cksum_report }; static int vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset) { uint64_t lastoffset; int load; /* All DVAs have equal weight at the root. */ if (mm->mm_root) return (INT_MAX); /* * We don't return INT_MAX if the device is resilvering i.e. * vdev_resilver_txg != 0 as when tested performance was slightly * worse overall when resilvering with compared to without. */ /* Standard load based on pending queue length. */ load = vdev_queue_length(vd); lastoffset = vdev_queue_lastoffset(vd); if (vd->vdev_rotation_rate == VDEV_RATE_NON_ROTATING) { /* Non-rotating media. */ if (lastoffset == zio_offset) return (load + non_rotating_inc); /* * Apply a seek penalty even for non-rotating devices as * sequential I/O'a can be aggregated into fewer operations * on the device, thus avoiding unnecessary per-command * overhead and boosting performance. */ return (load + non_rotating_seek_inc); } /* Rotating media I/O's which directly follow the last I/O. */ if (lastoffset == zio_offset) return (load + rotating_inc); /* * Apply half the seek increment to I/O's within seek offset * of the last I/O queued to this vdev as they should incure less * of a seek increment. */ if (ABS(lastoffset - zio_offset) < rotating_seek_offset) return (load + (rotating_seek_inc / 2)); /* Apply the full seek increment to all other I/O's. */ return (load + rotating_seek_inc); } static mirror_map_t * vdev_mirror_map_init(zio_t *zio) { mirror_map_t *mm = NULL; mirror_child_t *mc; vdev_t *vd = zio->io_vd; int c; if (vd == NULL) { dva_t *dva = zio->io_bp->blk_dva; spa_t *spa = zio->io_spa; mm = vdev_mirror_map_alloc(BP_GET_NDVAS(zio->io_bp), B_FALSE, B_TRUE); for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); mc->mc_offset = DVA_GET_OFFSET(&dva[c]); } } else { mm = vdev_mirror_map_alloc(vd->vdev_children, (vd->vdev_ops == &vdev_replacing_ops || vd->vdev_ops == &vdev_spare_ops), B_FALSE); for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; mc->mc_vd = vd->vdev_child[c]; mc->mc_offset = zio->io_offset; } } zio->io_vsd = mm; zio->io_vsd_ops = &vdev_mirror_vsd_ops; return (mm); } static int vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, uint64_t *logical_ashift, uint64_t *physical_ashift) { int numerrors = 0; int lasterror = 0; if (vd->vdev_children == 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } vdev_open_children(vd); for (int c = 0; c < vd->vdev_children; c++) { vdev_t *cvd = vd->vdev_child[c]; if (cvd->vdev_open_error) { lasterror = cvd->vdev_open_error; numerrors++; continue; } *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift); *physical_ashift = MAX(*physical_ashift, cvd->vdev_physical_ashift); } if (numerrors == vd->vdev_children) { vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; return (lasterror); } return (0); } static void vdev_mirror_close(vdev_t *vd) { for (int c = 0; c < vd->vdev_children; c++) vdev_close(vd->vdev_child[c]); } static void vdev_mirror_child_done(zio_t *zio) { mirror_child_t *mc = zio->io_private; mc->mc_error = zio->io_error; mc->mc_tried = 1; mc->mc_skipped = 0; } static void vdev_mirror_scrub_done(zio_t *zio) { mirror_child_t *mc = zio->io_private; if (zio->io_error == 0) { zio_t *pio; mutex_enter(&zio->io_lock); while ((pio = zio_walk_parents(zio)) != NULL) { mutex_enter(&pio->io_lock); ASSERT3U(zio->io_size, >=, pio->io_size); bcopy(zio->io_data, pio->io_data, pio->io_size); mutex_exit(&pio->io_lock); } mutex_exit(&zio->io_lock); } zio_buf_free(zio->io_data, zio->io_size); mc->mc_error = zio->io_error; mc->mc_tried = 1; mc->mc_skipped = 0; } /* * Check the other, lower-index DVAs to see if they're on the same * vdev as the child we picked. If they are, use them since they * are likely to have been allocated from the primary metaslab in * use at the time, and hence are more likely to have locality with * single-copy data. */ static int vdev_mirror_dva_select(zio_t *zio, int p) { dva_t *dva = zio->io_bp->blk_dva; mirror_map_t *mm = zio->io_vsd; int preferred; int c; preferred = mm->mm_preferred[p]; for (p-- ; p >= 0; p--) { c = mm->mm_preferred[p]; if (DVA_GET_VDEV(&dva[c]) == DVA_GET_VDEV(&dva[preferred])) preferred = c; } return (preferred); } static int vdev_mirror_preferred_child_randomize(zio_t *zio) { mirror_map_t *mm = zio->io_vsd; int p; if (mm->mm_root) { p = spa_get_random(mm->mm_preferred_cnt); return (vdev_mirror_dva_select(zio, p)); } /* * To ensure we don't always favour the first matching vdev, * which could lead to wear leveling issues on SSD's, we * use the I/O offset as a pseudo random seed into the vdevs * which have the lowest load. */ p = (zio->io_offset >> vdev_mirror_shift) % mm->mm_preferred_cnt; return (mm->mm_preferred[p]); } /* * Try to find a vdev whose DTL doesn't contain the block we want to read * prefering vdevs based on determined load. * * If we can't, try the read on any vdev we haven't already tried. */ static int vdev_mirror_child_select(zio_t *zio) { mirror_map_t *mm = zio->io_vsd; uint64_t txg = zio->io_txg; int c, lowest_load; ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg); lowest_load = INT_MAX; mm->mm_preferred_cnt = 0; for (c = 0; c < mm->mm_children; c++) { mirror_child_t *mc; mc = &mm->mm_child[c]; if (mc->mc_tried || mc->mc_skipped) continue; if (!vdev_readable(mc->mc_vd)) { mc->mc_error = SET_ERROR(ENXIO); mc->mc_tried = 1; /* don't even try */ mc->mc_skipped = 1; continue; } if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) { mc->mc_error = SET_ERROR(ESTALE); mc->mc_skipped = 1; mc->mc_speculative = 1; continue; } mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset); if (mc->mc_load > lowest_load) continue; if (mc->mc_load < lowest_load) { lowest_load = mc->mc_load; mm->mm_preferred_cnt = 0; } mm->mm_preferred[mm->mm_preferred_cnt] = c; mm->mm_preferred_cnt++; } if (mm->mm_preferred_cnt == 1) { vdev_queue_register_lastoffset( mm->mm_child[mm->mm_preferred[0]].mc_vd, zio); return (mm->mm_preferred[0]); } if (mm->mm_preferred_cnt > 1) { int c = vdev_mirror_preferred_child_randomize(zio); vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio); return (c); } /* * Every device is either missing or has this txg in its DTL. * Look for any child we haven't already tried before giving up. */ for (c = 0; c < mm->mm_children; c++) { if (!mm->mm_child[c].mc_tried) { vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio); return (c); } } /* * Every child failed. There's no place left to look. */ return (-1); } static void vdev_mirror_io_start(zio_t *zio) { mirror_map_t *mm; mirror_child_t *mc; int c, children; mm = vdev_mirror_map_init(zio); if (zio->io_type == ZIO_TYPE_READ) { if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing && mm->mm_children > 1) { /* * For scrubbing reads we need to allocate a read * buffer for each child and issue reads to all * children. If any child succeeds, it will copy its * data into zio->io_data in vdev_mirror_scrub_done. */ for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; zio_nowait(zio_vdev_child_io(zio, zio->io_bp, mc->mc_vd, mc->mc_offset, zio_buf_alloc(zio->io_size), zio->io_size, zio->io_type, zio->io_priority, 0, vdev_mirror_scrub_done, mc)); } zio_execute(zio); return; } /* * For normal reads just pick one child. */ c = vdev_mirror_child_select(zio); children = (c >= 0); } else { ASSERT(zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_FREE); /* * Writes and frees go to all children. */ c = 0; children = mm->mm_children; } while (children--) { mc = &mm->mm_child[c]; zio_nowait(zio_vdev_child_io(zio, zio->io_bp, mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, zio->io_type, zio->io_priority, 0, vdev_mirror_child_done, mc)); c++; } zio_execute(zio); } static int vdev_mirror_worst_error(mirror_map_t *mm) { int error[2] = { 0, 0 }; for (int c = 0; c < mm->mm_children; c++) { mirror_child_t *mc = &mm->mm_child[c]; int s = mc->mc_speculative; error[s] = zio_worst_error(error[s], mc->mc_error); } return (error[0] ? error[0] : error[1]); } static void vdev_mirror_io_done(zio_t *zio) { mirror_map_t *mm = zio->io_vsd; mirror_child_t *mc; int c; int good_copies = 0; int unexpected_errors = 0; for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; if (mc->mc_error) { if (!mc->mc_skipped) unexpected_errors++; } else if (mc->mc_tried) { good_copies++; } } if (zio->io_type == ZIO_TYPE_WRITE) { /* * XXX -- for now, treat partial writes as success. * * Now that we support write reallocation, it would be better * to treat partial failure as real failure unless there are * no non-degraded top-level vdevs left, and not update DTLs * if we intend to reallocate. */ /* XXPOLICY */ if (good_copies != mm->mm_children) { /* * Always require at least one good copy. * * For ditto blocks (io_vd == NULL), require * all copies to be good. * * XXX -- for replacing vdevs, there's no great answer. * If the old device is really dead, we may not even * be able to access it -- so we only want to * require good writes to the new device. But if * the new device turns out to be flaky, we want * to be able to detach it -- which requires all * writes to the old device to have succeeded. */ if (good_copies == 0 || zio->io_vd == NULL) zio->io_error = vdev_mirror_worst_error(mm); } return; } else if (zio->io_type == ZIO_TYPE_FREE) { return; } ASSERT(zio->io_type == ZIO_TYPE_READ); /* * If we don't have a good copy yet, keep trying other children. */ /* XXPOLICY */ if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) { ASSERT(c >= 0 && c < mm->mm_children); mc = &mm->mm_child[c]; zio_vdev_io_redone(zio); zio_nowait(zio_vdev_child_io(zio, zio->io_bp, mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, ZIO_TYPE_READ, zio->io_priority, 0, vdev_mirror_child_done, mc)); return; } /* XXPOLICY */ if (good_copies == 0) { zio->io_error = vdev_mirror_worst_error(mm); ASSERT(zio->io_error != 0); } if (good_copies && spa_writeable(zio->io_spa) && (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER) || ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) { /* * Use the good data we have in hand to repair damaged children. */ for (c = 0; c < mm->mm_children; c++) { /* * Don't rewrite known good children. * Not only is it unnecessary, it could * actually be harmful: if the system lost * power while rewriting the only good copy, * there would be no good copies left! */ mc = &mm->mm_child[c]; if (mc->mc_error == 0) { if (mc->mc_tried) continue; if (!(zio->io_flags & ZIO_FLAG_SCRUB) && !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL, zio->io_txg, 1)) continue; mc->mc_error = SET_ERROR(ESTALE); } zio_nowait(zio_vdev_child_io(zio, zio->io_bp, mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_IO_REPAIR | (unexpected_errors ? ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); } } } static void vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) { if (faulted == vd->vdev_children) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_NO_REPLICAS); else if (degraded + faulted != 0) vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); else vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); } vdev_ops_t vdev_mirror_ops = { vdev_mirror_open, vdev_mirror_close, vdev_default_asize, vdev_mirror_io_start, vdev_mirror_io_done, vdev_mirror_state_change, NULL, NULL, VDEV_TYPE_MIRROR, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_replacing_ops = { vdev_mirror_open, vdev_mirror_close, vdev_default_asize, vdev_mirror_io_start, vdev_mirror_io_done, vdev_mirror_state_change, NULL, NULL, VDEV_TYPE_REPLACING, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_spare_ops = { vdev_mirror_open, vdev_mirror_close, vdev_default_asize, vdev_mirror_io_start, vdev_mirror_io_done, vdev_mirror_state_change, NULL, NULL, VDEV_TYPE_SPARE, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c (revision 303642) @@ -1,916 +1,918 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ #include #include #include #include #include #include /* * ZFS I/O Scheduler * --------------- * * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios. The * I/O scheduler determines when and in what order those operations are * issued. The I/O scheduler divides operations into six I/O classes * prioritized in the following order: sync read, sync write, async read, * async write, scrub/resilver and trim. Each queue defines the minimum and * maximum number of concurrent operations that may be issued to the device. * In addition, the device has an aggregate maximum. Note that the sum of the * per-queue minimums must not exceed the aggregate maximum, and if the * aggregate maximum is equal to or greater than the sum of the per-queue * maximums, the per-queue minimum has no effect. * * For many physical devices, throughput increases with the number of * concurrent operations, but latency typically suffers. Further, physical * devices typically have a limit at which more concurrent operations have no * effect on throughput or can actually cause it to decrease. * * The scheduler selects the next operation to issue by first looking for an * I/O class whose minimum has not been satisfied. Once all are satisfied and * the aggregate maximum has not been hit, the scheduler looks for classes * whose maximum has not been satisfied. Iteration through the I/O classes is * done in the order specified above. No further operations are issued if the * aggregate maximum number of concurrent operations has been hit or if there * are no operations queued for an I/O class that has not hit its maximum. * Every time an I/O is queued or an operation completes, the I/O scheduler * looks for new operations to issue. * * All I/O classes have a fixed maximum number of outstanding operations * except for the async write class. Asynchronous writes represent the data * that is committed to stable storage during the syncing stage for * transaction groups (see txg.c). Transaction groups enter the syncing state * periodically so the number of queued async writes will quickly burst up and * then bleed down to zero. Rather than servicing them as quickly as possible, * the I/O scheduler changes the maximum number of active async write I/Os * according to the amount of dirty data in the pool (see dsl_pool.c). Since * both throughput and latency typically increase with the number of * concurrent operations issued to physical devices, reducing the burstiness * in the number of concurrent operations also stabilizes the response time of * operations from other -- and in particular synchronous -- queues. In broad * strokes, the I/O scheduler will issue more concurrent operations from the * async write queue as there's more dirty data in the pool. * * Async Writes * * The number of concurrent operations issued for the async write I/O class * follows a piece-wise linear function defined by a few adjustable points. * * | o---------| <-- zfs_vdev_async_write_max_active * ^ | /^ | * | | / | | * active | / | | * I/O | / | | * count | / | | * | / | | * |------------o | | <-- zfs_vdev_async_write_min_active * 0|____________^______|_________| * 0% | | 100% of zfs_dirty_data_max * | | * | `-- zfs_vdev_async_write_active_max_dirty_percent * `--------- zfs_vdev_async_write_active_min_dirty_percent * * Until the amount of dirty data exceeds a minimum percentage of the dirty * data allowed in the pool, the I/O scheduler will limit the number of * concurrent operations to the minimum. As that threshold is crossed, the * number of concurrent operations issued increases linearly to the maximum at * the specified maximum percentage of the dirty data allowed in the pool. * * Ideally, the amount of dirty data on a busy pool will stay in the sloped * part of the function between zfs_vdev_async_write_active_min_dirty_percent * and zfs_vdev_async_write_active_max_dirty_percent. If it exceeds the * maximum percentage, this indicates that the rate of incoming data is * greater than the rate that the backend storage can handle. In this case, we * must further throttle incoming writes (see dmu_tx_delay() for details). */ /* * The maximum number of I/Os active to each device. Ideally, this will be >= * the sum of each queue's max_active. It must be at least the sum of each * queue's min_active. */ uint32_t zfs_vdev_max_active = 1000; /* * Per-queue limits on the number of I/Os active to each device. If the * sum of the queue's max_active is < zfs_vdev_max_active, then the * min_active comes into play. We will send min_active from each queue, * and then select from queues in the order defined by zio_priority_t. * * In general, smaller max_active's will lead to lower latency of synchronous * operations. Larger max_active's may lead to higher overall throughput, * depending on underlying storage. * * The ratio of the queues' max_actives determines the balance of performance * between reads, writes, and scrubs. E.g., increasing * zfs_vdev_scrub_max_active will cause the scrub or resilver to complete * more quickly, but reads and writes to have higher latency and lower * throughput. */ uint32_t zfs_vdev_sync_read_min_active = 10; uint32_t zfs_vdev_sync_read_max_active = 10; uint32_t zfs_vdev_sync_write_min_active = 10; uint32_t zfs_vdev_sync_write_max_active = 10; uint32_t zfs_vdev_async_read_min_active = 1; uint32_t zfs_vdev_async_read_max_active = 3; uint32_t zfs_vdev_async_write_min_active = 1; uint32_t zfs_vdev_async_write_max_active = 10; uint32_t zfs_vdev_scrub_min_active = 1; uint32_t zfs_vdev_scrub_max_active = 2; uint32_t zfs_vdev_trim_min_active = 1; /* * TRIM max active is large in comparison to the other values due to the fact * that TRIM IOs are coalesced at the device layer. This value is set such * that a typical SSD can process the queued IOs in a single request. */ uint32_t zfs_vdev_trim_max_active = 64; /* * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent * dirty data, use zfs_vdev_async_write_min_active. When it has more than * zfs_vdev_async_write_active_max_dirty_percent, use * zfs_vdev_async_write_max_active. The value is linearly interpolated * between min and max. */ int zfs_vdev_async_write_active_min_dirty_percent = 30; int zfs_vdev_async_write_active_max_dirty_percent = 60; /* * To reduce IOPs, we aggregate small adjacent I/Os into one large I/O. * For read I/Os, we also aggregate across small adjacency gaps; for writes * we include spans of optional I/Os to aid aggregation at the disk even when * they aren't able to help us aggregate at this level. */ int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE; int zfs_vdev_read_gap_limit = 32 << 10; int zfs_vdev_write_gap_limit = 4 << 10; #ifdef __FreeBSD__ +#ifdef _KERNEL SYSCTL_DECL(_vfs_zfs_vdev); static int sysctl_zfs_async_write_active_min_dirty_percent(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vfs_zfs_vdev, OID_AUTO, async_write_active_min_dirty_percent, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), sysctl_zfs_async_write_active_min_dirty_percent, "I", "Percentage of async write dirty data below which " "async_write_min_active is used."); static int sysctl_zfs_async_write_active_max_dirty_percent(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vfs_zfs_vdev, OID_AUTO, async_write_active_max_dirty_percent, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), sysctl_zfs_async_write_active_max_dirty_percent, "I", "Percentage of async write dirty data above which " "async_write_max_active is used."); SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, max_active, CTLFLAG_RWTUN, &zfs_vdev_max_active, 0, "The maximum number of I/Os of all types active for each device."); #define ZFS_VDEV_QUEUE_KNOB_MIN(name) \ SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\ &zfs_vdev_ ## name ## _min_active, 0, \ "Initial number of I/O requests of type " #name \ " active for each device"); #define ZFS_VDEV_QUEUE_KNOB_MAX(name) \ SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN,\ &zfs_vdev_ ## name ## _max_active, 0, \ "Maximum number of I/O requests of type " #name \ " active for each device"); ZFS_VDEV_QUEUE_KNOB_MIN(sync_read); ZFS_VDEV_QUEUE_KNOB_MAX(sync_read); ZFS_VDEV_QUEUE_KNOB_MIN(sync_write); ZFS_VDEV_QUEUE_KNOB_MAX(sync_write); ZFS_VDEV_QUEUE_KNOB_MIN(async_read); ZFS_VDEV_QUEUE_KNOB_MAX(async_read); ZFS_VDEV_QUEUE_KNOB_MIN(async_write); ZFS_VDEV_QUEUE_KNOB_MAX(async_write); ZFS_VDEV_QUEUE_KNOB_MIN(scrub); ZFS_VDEV_QUEUE_KNOB_MAX(scrub); ZFS_VDEV_QUEUE_KNOB_MIN(trim); ZFS_VDEV_QUEUE_KNOB_MAX(trim); #undef ZFS_VDEV_QUEUE_KNOB SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, aggregation_limit, CTLFLAG_RWTUN, &zfs_vdev_aggregation_limit, 0, "I/O requests are aggregated up to this size"); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, read_gap_limit, CTLFLAG_RWTUN, &zfs_vdev_read_gap_limit, 0, "Acceptable gap between two reads being aggregated"); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, write_gap_limit, CTLFLAG_RWTUN, &zfs_vdev_write_gap_limit, 0, "Acceptable gap between two writes being aggregated"); static int sysctl_zfs_async_write_active_min_dirty_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_vdev_async_write_active_min_dirty_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < 0 || val > 100 || val >= zfs_vdev_async_write_active_max_dirty_percent) return (EINVAL); zfs_vdev_async_write_active_min_dirty_percent = val; return (0); } static int sysctl_zfs_async_write_active_max_dirty_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_vdev_async_write_active_max_dirty_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < 0 || val > 100 || val <= zfs_vdev_async_write_active_min_dirty_percent) return (EINVAL); zfs_vdev_async_write_active_max_dirty_percent = val; return (0); } +#endif #endif int vdev_queue_offset_compare(const void *x1, const void *x2) { const zio_t *z1 = x1; const zio_t *z2 = x2; if (z1->io_offset < z2->io_offset) return (-1); if (z1->io_offset > z2->io_offset) return (1); if (z1 < z2) return (-1); if (z1 > z2) return (1); return (0); } static inline avl_tree_t * vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p) { return (&vq->vq_class[p].vqc_queued_tree); } static inline avl_tree_t * vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t) { if (t == ZIO_TYPE_READ) return (&vq->vq_read_offset_tree); else if (t == ZIO_TYPE_WRITE) return (&vq->vq_write_offset_tree); else return (NULL); } int vdev_queue_timestamp_compare(const void *x1, const void *x2) { const zio_t *z1 = x1; const zio_t *z2 = x2; if (z1->io_timestamp < z2->io_timestamp) return (-1); if (z1->io_timestamp > z2->io_timestamp) return (1); if (z1->io_offset < z2->io_offset) return (-1); if (z1->io_offset > z2->io_offset) return (1); if (z1 < z2) return (-1); if (z1 > z2) return (1); return (0); } void vdev_queue_init(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); vq->vq_vdev = vd; avl_create(&vq->vq_active_tree, vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_queue_node)); avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ), vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_offset_node)); avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE), vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_offset_node)); for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { int (*compfn) (const void *, const void *); /* * The synchronous i/o queues are dispatched in FIFO rather * than LBA order. This provides more consistent latency for * these i/os. */ if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE) compfn = vdev_queue_timestamp_compare; else compfn = vdev_queue_offset_compare; avl_create(vdev_queue_class_tree(vq, p), compfn, sizeof (zio_t), offsetof(struct zio, io_queue_node)); } vq->vq_lastoffset = 0; } void vdev_queue_fini(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) avl_destroy(vdev_queue_class_tree(vq, p)); avl_destroy(&vq->vq_active_tree); avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ)); avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE)); mutex_destroy(&vq->vq_lock); } static void vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; avl_tree_t *qtt; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); qtt = vdev_queue_type_tree(vq, zio->io_type); if (qtt) avl_add(qtt, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); spa->spa_queue_stats[zio->io_priority].spa_queued++; if (spa->spa_iokstat != NULL) kstat_waitq_enter(spa->spa_iokstat->ks_data); mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; avl_tree_t *qtt; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); qtt = vdev_queue_type_tree(vq, zio->io_type); if (qtt) avl_remove(qtt, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); ASSERT3U(spa->spa_queue_stats[zio->io_priority].spa_queued, >, 0); spa->spa_queue_stats[zio->io_priority].spa_queued--; if (spa->spa_iokstat != NULL) kstat_waitq_exit(spa->spa_iokstat->ks_data); mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active++; avl_add(&vq->vq_active_tree, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); spa->spa_queue_stats[zio->io_priority].spa_active++; if (spa->spa_iokstat != NULL) kstat_runq_enter(spa->spa_iokstat->ks_data); mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active--; avl_remove(&vq->vq_active_tree, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); ASSERT3U(spa->spa_queue_stats[zio->io_priority].spa_active, >, 0); spa->spa_queue_stats[zio->io_priority].spa_active--; if (spa->spa_iokstat != NULL) { kstat_io_t *ksio = spa->spa_iokstat->ks_data; kstat_runq_exit(spa->spa_iokstat->ks_data); if (zio->io_type == ZIO_TYPE_READ) { ksio->reads++; ksio->nread += zio->io_size; } else if (zio->io_type == ZIO_TYPE_WRITE) { ksio->writes++; ksio->nwritten += zio->io_size; } } mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_agg_io_done(zio_t *aio) { if (aio->io_type == ZIO_TYPE_READ) { zio_t *pio; while ((pio = zio_walk_parents(aio)) != NULL) { bcopy((char *)aio->io_data + (pio->io_offset - aio->io_offset), pio->io_data, pio->io_size); } } zio_buf_free(aio->io_data, aio->io_size); } static int vdev_queue_class_min_active(zio_priority_t p) { switch (p) { case ZIO_PRIORITY_SYNC_READ: return (zfs_vdev_sync_read_min_active); case ZIO_PRIORITY_SYNC_WRITE: return (zfs_vdev_sync_write_min_active); case ZIO_PRIORITY_ASYNC_READ: return (zfs_vdev_async_read_min_active); case ZIO_PRIORITY_ASYNC_WRITE: return (zfs_vdev_async_write_min_active); case ZIO_PRIORITY_SCRUB: return (zfs_vdev_scrub_min_active); case ZIO_PRIORITY_TRIM: return (zfs_vdev_trim_min_active); default: panic("invalid priority %u", p); return (0); } } static __noinline int vdev_queue_max_async_writes(spa_t *spa) { int writes; uint64_t dirty = spa->spa_dsl_pool->dp_dirty_total; uint64_t min_bytes = zfs_dirty_data_max * zfs_vdev_async_write_active_min_dirty_percent / 100; uint64_t max_bytes = zfs_dirty_data_max * zfs_vdev_async_write_active_max_dirty_percent / 100; /* * Sync tasks correspond to interactive user actions. To reduce the * execution time of those actions we push data out as fast as possible. */ if (spa_has_pending_synctask(spa)) { return (zfs_vdev_async_write_max_active); } if (dirty < min_bytes) return (zfs_vdev_async_write_min_active); if (dirty > max_bytes) return (zfs_vdev_async_write_max_active); /* * linear interpolation: * slope = (max_writes - min_writes) / (max_bytes - min_bytes) * move right by min_bytes * move up by min_writes */ writes = (dirty - min_bytes) * (zfs_vdev_async_write_max_active - zfs_vdev_async_write_min_active) / (max_bytes - min_bytes) + zfs_vdev_async_write_min_active; ASSERT3U(writes, >=, zfs_vdev_async_write_min_active); ASSERT3U(writes, <=, zfs_vdev_async_write_max_active); return (writes); } static int vdev_queue_class_max_active(spa_t *spa, zio_priority_t p) { switch (p) { case ZIO_PRIORITY_SYNC_READ: return (zfs_vdev_sync_read_max_active); case ZIO_PRIORITY_SYNC_WRITE: return (zfs_vdev_sync_write_max_active); case ZIO_PRIORITY_ASYNC_READ: return (zfs_vdev_async_read_max_active); case ZIO_PRIORITY_ASYNC_WRITE: return (vdev_queue_max_async_writes(spa)); case ZIO_PRIORITY_SCRUB: return (zfs_vdev_scrub_max_active); case ZIO_PRIORITY_TRIM: return (zfs_vdev_trim_max_active); default: panic("invalid priority %u", p); return (0); } } /* * Return the i/o class to issue from, or ZIO_PRIORITY_MAX_QUEUEABLE if * there is no eligible class. */ static zio_priority_t vdev_queue_class_to_issue(vdev_queue_t *vq) { spa_t *spa = vq->vq_vdev->vdev_spa; zio_priority_t p; ASSERT(MUTEX_HELD(&vq->vq_lock)); if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active) return (ZIO_PRIORITY_NUM_QUEUEABLE); /* find a queue that has not reached its minimum # outstanding i/os */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_min_active(p)) return (p); } /* * If we haven't found a queue, look for one that hasn't reached its * maximum # outstanding i/os. */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_max_active(spa, p)) return (p); } /* No eligible queued i/os */ return (ZIO_PRIORITY_NUM_QUEUEABLE); } /* * Compute the range spanned by two i/os, which is the endpoint of the last * (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset). * Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio); * thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0. */ #define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset) #define IO_GAP(fio, lio) (-IO_SPAN(lio, fio)) static zio_t * vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) { zio_t *first, *last, *aio, *dio, *mandatory, *nio; uint64_t maxgap = 0; uint64_t size; boolean_t stretch; avl_tree_t *t; enum zio_flag flags; ASSERT(MUTEX_HELD(&vq->vq_lock)); if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE) return (NULL); first = last = zio; if (zio->io_type == ZIO_TYPE_READ) maxgap = zfs_vdev_read_gap_limit; /* * We can aggregate I/Os that are sufficiently adjacent and of * the same flavor, as expressed by the AGG_INHERIT flags. * The latter requirement is necessary so that certain * attributes of the I/O, such as whether it's a normal I/O * or a scrub/resilver, can be preserved in the aggregate. * We can include optional I/Os, but don't allow them * to begin a range as they add no benefit in that situation. */ /* * We keep track of the last non-optional I/O. */ mandatory = (first->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : first; /* * Walk backwards through sufficiently contiguous I/Os * recording the last non-option I/O. */ flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; t = vdev_queue_type_tree(vq, zio->io_type); while (t != NULL && (dio = AVL_PREV(t, first)) != NULL && (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit && IO_GAP(dio, first) <= maxgap) { first = dio; if (mandatory == NULL && !(first->io_flags & ZIO_FLAG_OPTIONAL)) mandatory = first; } /* * Skip any initial optional I/Os. */ while ((first->io_flags & ZIO_FLAG_OPTIONAL) && first != last) { first = AVL_NEXT(t, first); ASSERT(first != NULL); } /* * Walk forward through sufficiently contiguous I/Os. */ while ((dio = AVL_NEXT(t, last)) != NULL && (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && IO_SPAN(first, dio) <= zfs_vdev_aggregation_limit && IO_GAP(last, dio) <= maxgap) { last = dio; if (!(last->io_flags & ZIO_FLAG_OPTIONAL)) mandatory = last; } /* * Now that we've established the range of the I/O aggregation * we must decide what to do with trailing optional I/Os. * For reads, there's nothing to do. While we are unable to * aggregate further, it's possible that a trailing optional * I/O would allow the underlying device to aggregate with * subsequent I/Os. We must therefore determine if the next * non-optional I/O is close enough to make aggregation * worthwhile. */ stretch = B_FALSE; if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) { zio_t *nio = last; while ((dio = AVL_NEXT(t, nio)) != NULL && IO_GAP(nio, dio) == 0 && IO_GAP(mandatory, dio) <= zfs_vdev_write_gap_limit) { nio = dio; if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) { stretch = B_TRUE; break; } } } if (stretch) { /* This may be a no-op. */ dio = AVL_NEXT(t, last); dio->io_flags &= ~ZIO_FLAG_OPTIONAL; } else { while (last != mandatory && last != first) { ASSERT(last->io_flags & ZIO_FLAG_OPTIONAL); last = AVL_PREV(t, last); ASSERT(last != NULL); } } if (first == last) return (NULL); size = IO_SPAN(first, last); ASSERT3U(size, <=, zfs_vdev_aggregation_limit); aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, zio_buf_alloc(size), size, first->io_type, zio->io_priority, flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL); aio->io_timestamp = first->io_timestamp; nio = first; do { dio = nio; nio = AVL_NEXT(t, dio); ASSERT3U(dio->io_type, ==, aio->io_type); if (dio->io_flags & ZIO_FLAG_NODATA) { ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE); bzero((char *)aio->io_data + (dio->io_offset - aio->io_offset), dio->io_size); } else if (dio->io_type == ZIO_TYPE_WRITE) { bcopy(dio->io_data, (char *)aio->io_data + (dio->io_offset - aio->io_offset), dio->io_size); } zio_add_child(dio, aio); vdev_queue_io_remove(vq, dio); zio_vdev_io_bypass(dio); zio_execute(dio); } while (dio != last); return (aio); } static zio_t * vdev_queue_io_to_issue(vdev_queue_t *vq) { zio_t *zio, *aio; zio_priority_t p; avl_index_t idx; avl_tree_t *tree; zio_t search; again: ASSERT(MUTEX_HELD(&vq->vq_lock)); p = vdev_queue_class_to_issue(vq); if (p == ZIO_PRIORITY_NUM_QUEUEABLE) { /* No eligible queued i/os */ return (NULL); } /* * For LBA-ordered queues (async / scrub), issue the i/o which follows * the most recently issued i/o in LBA (offset) order. * * For FIFO queues (sync), issue the i/o with the lowest timestamp. */ tree = vdev_queue_class_tree(vq, p); search.io_timestamp = 0; search.io_offset = vq->vq_last_offset + 1; VERIFY3P(avl_find(tree, &search, &idx), ==, NULL); zio = avl_nearest(tree, idx, AVL_AFTER); if (zio == NULL) zio = avl_first(tree); ASSERT3U(zio->io_priority, ==, p); aio = vdev_queue_aggregate(vq, zio); if (aio != NULL) zio = aio; else vdev_queue_io_remove(vq, zio); /* * If the I/O is or was optional and therefore has no data, we need to * simply discard it. We need to drop the vdev queue's lock to avoid a * deadlock that we could encounter since this I/O will complete * immediately. */ if (zio->io_flags & ZIO_FLAG_NODATA) { mutex_exit(&vq->vq_lock); zio_vdev_io_bypass(zio); zio_execute(zio); mutex_enter(&vq->vq_lock); goto again; } vdev_queue_pending_add(vq, zio); vq->vq_last_offset = zio->io_offset; return (zio); } zio_t * vdev_queue_io(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; zio_t *nio; if (zio->io_flags & ZIO_FLAG_DONT_QUEUE) return (zio); /* * Children i/os inherent their parent's priority, which might * not match the child's i/o type. Fix it up here. */ if (zio->io_type == ZIO_TYPE_READ) { if (zio->io_priority != ZIO_PRIORITY_SYNC_READ && zio->io_priority != ZIO_PRIORITY_ASYNC_READ && zio->io_priority != ZIO_PRIORITY_SCRUB) zio->io_priority = ZIO_PRIORITY_ASYNC_READ; } else if (zio->io_type == ZIO_TYPE_WRITE) { if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE && zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE) zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE; } else { ASSERT(zio->io_type == ZIO_TYPE_FREE); zio->io_priority = ZIO_PRIORITY_TRIM; } zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE; mutex_enter(&vq->vq_lock); zio->io_timestamp = gethrtime(); vdev_queue_io_add(vq, zio); nio = vdev_queue_io_to_issue(vq); mutex_exit(&vq->vq_lock); if (nio == NULL) return (NULL); if (nio->io_done == vdev_queue_agg_io_done) { zio_nowait(nio); return (NULL); } return (nio); } void vdev_queue_io_done(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; zio_t *nio; mutex_enter(&vq->vq_lock); vdev_queue_pending_remove(vq, zio); vq->vq_io_complete_ts = gethrtime(); while ((nio = vdev_queue_io_to_issue(vq)) != NULL) { mutex_exit(&vq->vq_lock); if (nio->io_done == vdev_queue_agg_io_done) { zio_nowait(nio); } else { zio_vdev_io_reissue(nio); zio_execute(nio); } mutex_enter(&vq->vq_lock); } mutex_exit(&vq->vq_lock); } /* * As these three methods are only used for load calculations we're not concerned * if we get an incorrect value on 32bit platforms due to lack of vq_lock mutex * use here, instead we prefer to keep it lock free for performance. */ int vdev_queue_length(vdev_t *vd) { return (avl_numnodes(&vd->vdev_queue.vq_active_tree)); } uint64_t vdev_queue_lastoffset(vdev_t *vd) { return (vd->vdev_queue.vq_lastoffset); } void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio) { vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size; } Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c (revision 303642) @@ -1,1935 +1,1922 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2010 The FreeBSD Foundation * * $FreeBSD$ */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifdef illumos #pragma ident "%Z%%M% %I% %E% SMI" #endif #include #include #include #include #include #ifdef illumos #include #include #include #include #else -#include #include +#include #include #include -#include +#include #include #include #include #include #endif #include #ifdef illumos #include #include #else #include static int uread(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr) { ssize_t n; PHOLD(p); n = proc_readmem(curthread, p, uaddr, kaddr, len); PRELE(p); if (n != len) return (ENOMEM); return (0); } static int uwrite(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr) { ssize_t n; PHOLD(p); n = proc_writemem(curthread, p, uaddr, kaddr, len); PRELE(p); if (n != len) return (ENOMEM); return (0); } #endif /* illumos */ #ifdef __i386__ #define r_rax r_eax #define r_rbx r_ebx #define r_rip r_eip #define r_rflags r_eflags #define r_rsp r_esp #define r_rbp r_ebp #endif /* * Lossless User-Land Tracing on x86 * --------------------------------- * * The execution of most instructions is not dependent on the address; for * these instructions it is sufficient to copy them into the user process's * address space and execute them. To effectively single-step an instruction * in user-land, we copy out the following sequence of instructions to scratch * space in the user thread's ulwp_t structure. * * We then set the program counter (%eip or %rip) to point to this scratch * space. Once execution resumes, the original instruction is executed and * then control flow is redirected to what was originally the subsequent * instruction. If the kernel attemps to deliver a signal while single- * stepping, the signal is deferred and the program counter is moved into the * second sequence of instructions. The second sequence ends in a trap into * the kernel where the deferred signal is then properly handled and delivered. * * For instructions whose execute is position dependent, we perform simple * emulation. These instructions are limited to control transfer * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle * of %rip-relative addressing that means that almost any instruction can be * position dependent. For all the details on how we emulate generic * instructions included %rip-relative instructions, see the code in * fasttrap_pid_probe() below where we handle instructions of type * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). */ #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) #define FASTTRAP_REX_B(rex) ((rex) & 1) #define FASTTRAP_REX(w, r, x, b) \ (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) /* * Single-byte op-codes. */ #define FASTTRAP_PUSHL_EBP 0x55 #define FASTTRAP_JO 0x70 #define FASTTRAP_JNO 0x71 #define FASTTRAP_JB 0x72 #define FASTTRAP_JAE 0x73 #define FASTTRAP_JE 0x74 #define FASTTRAP_JNE 0x75 #define FASTTRAP_JBE 0x76 #define FASTTRAP_JA 0x77 #define FASTTRAP_JS 0x78 #define FASTTRAP_JNS 0x79 #define FASTTRAP_JP 0x7a #define FASTTRAP_JNP 0x7b #define FASTTRAP_JL 0x7c #define FASTTRAP_JGE 0x7d #define FASTTRAP_JLE 0x7e #define FASTTRAP_JG 0x7f #define FASTTRAP_NOP 0x90 #define FASTTRAP_MOV_EAX 0xb8 #define FASTTRAP_MOV_ECX 0xb9 #define FASTTRAP_RET16 0xc2 #define FASTTRAP_RET 0xc3 #define FASTTRAP_LOOPNZ 0xe0 #define FASTTRAP_LOOPZ 0xe1 #define FASTTRAP_LOOP 0xe2 #define FASTTRAP_JCXZ 0xe3 #define FASTTRAP_CALL 0xe8 #define FASTTRAP_JMP32 0xe9 #define FASTTRAP_JMP8 0xeb #define FASTTRAP_INT3 0xcc #define FASTTRAP_INT 0xcd #define FASTTRAP_2_BYTE_OP 0x0f #define FASTTRAP_GROUP5_OP 0xff /* * Two-byte op-codes (second byte only). */ #define FASTTRAP_0F_JO 0x80 #define FASTTRAP_0F_JNO 0x81 #define FASTTRAP_0F_JB 0x82 #define FASTTRAP_0F_JAE 0x83 #define FASTTRAP_0F_JE 0x84 #define FASTTRAP_0F_JNE 0x85 #define FASTTRAP_0F_JBE 0x86 #define FASTTRAP_0F_JA 0x87 #define FASTTRAP_0F_JS 0x88 #define FASTTRAP_0F_JNS 0x89 #define FASTTRAP_0F_JP 0x8a #define FASTTRAP_0F_JNP 0x8b #define FASTTRAP_0F_JL 0x8c #define FASTTRAP_0F_JGE 0x8d #define FASTTRAP_0F_JLE 0x8e #define FASTTRAP_0F_JG 0x8f #define FASTTRAP_EFLAGS_OF 0x800 #define FASTTRAP_EFLAGS_DF 0x400 #define FASTTRAP_EFLAGS_SF 0x080 #define FASTTRAP_EFLAGS_ZF 0x040 #define FASTTRAP_EFLAGS_AF 0x010 #define FASTTRAP_EFLAGS_PF 0x004 #define FASTTRAP_EFLAGS_CF 0x001 /* * Instruction prefixes. */ #define FASTTRAP_PREFIX_OPERAND 0x66 #define FASTTRAP_PREFIX_ADDRESS 0x67 #define FASTTRAP_PREFIX_CS 0x2E #define FASTTRAP_PREFIX_DS 0x3E #define FASTTRAP_PREFIX_ES 0x26 #define FASTTRAP_PREFIX_FS 0x64 #define FASTTRAP_PREFIX_GS 0x65 #define FASTTRAP_PREFIX_SS 0x36 #define FASTTRAP_PREFIX_LOCK 0xF0 #define FASTTRAP_PREFIX_REP 0xF3 #define FASTTRAP_PREFIX_REPNE 0xF2 #define FASTTRAP_NOREG 0xff /* * Map between instruction register encodings and the kernel constants which * correspond to indicies into struct regs. */ #ifdef __amd64 static const uint8_t regmap[16] = { REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, }; #else static const uint8_t regmap[8] = { EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI }; #endif static ulong_t fasttrap_getreg(struct reg *, uint_t); static uint64_t fasttrap_anarg(struct reg *rp, int function_entry, int argno) { uint64_t value = 0; int shift = function_entry ? 1 : 0; #ifdef __amd64 if (curproc->p_model == DATAMODEL_LP64) { uintptr_t *stack; /* * In 64-bit mode, the first six arguments are stored in * registers. */ if (argno < 6) switch (argno) { case 0: return (rp->r_rdi); case 1: return (rp->r_rsi); case 2: return (rp->r_rdx); case 3: return (rp->r_rcx); case 4: return (rp->r_r8); case 5: return (rp->r_r9); } stack = (uintptr_t *)rp->r_rsp; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fulword(&stack[argno - 6 + shift]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); } else { #endif -#ifdef __i386 - uint32_t *stack = (uint32_t *)rp->r_esp; + uint32_t *stack = (uint32_t *)rp->r_rsp; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fuword32(&stack[argno + shift]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); -#endif #ifdef __amd64 } #endif return (value); } /*ARGSUSED*/ int fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, fasttrap_probe_type_t type) { uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; size_t len = FASTTRAP_MAX_INSTR_SIZE; size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); uint_t start = 0; int rmindex, size; uint8_t seg, rex = 0; /* * Read the instruction at the given address out of the process's * address space. We don't have to worry about a debugger * changing this instruction before we overwrite it with our trap * instruction since P_PR_LOCK is set. Since instructions can span * pages, we potentially read the instruction in two parts. If the * second part fails, we just zero out that part of the instruction. */ if (uread(p, &instr[0], first, pc) != 0) return (-1); if (len > first && uread(p, &instr[first], len - first, pc + first) != 0) { bzero(&instr[first], len - first); len = first; } /* * If the disassembly fails, then we have a malformed instruction. */ if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) return (-1); /* * Make sure the disassembler isn't completely broken. */ ASSERT(-1 <= rmindex && rmindex < size); /* * If the computed size is greater than the number of bytes read, * then it was a malformed instruction possibly because it fell on a * page boundary and the subsequent page was missing or because of * some malicious user. */ if (size > len) return (-1); tp->ftt_size = (uint8_t)size; tp->ftt_segment = FASTTRAP_SEG_NONE; /* * Find the start of the instruction's opcode by processing any * legacy prefixes. */ for (;;) { seg = 0; switch (instr[start]) { case FASTTRAP_PREFIX_SS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_GS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_FS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_ES: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_DS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_CS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_OPERAND: case FASTTRAP_PREFIX_ADDRESS: case FASTTRAP_PREFIX_LOCK: case FASTTRAP_PREFIX_REP: case FASTTRAP_PREFIX_REPNE: if (seg != 0) { /* * It's illegal for an instruction to specify * two segment prefixes -- give up on this * illegal instruction. */ if (tp->ftt_segment != FASTTRAP_SEG_NONE) return (-1); tp->ftt_segment = seg; } start++; continue; } break; } #ifdef __amd64 /* * Identify the REX prefix on 64-bit processes. */ if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) rex = instr[start++]; #endif /* * Now that we're pretty sure that the instruction is okay, copy the * valid part to the tracepoint. */ bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); tp->ftt_type = FASTTRAP_T_COMMON; if (instr[start] == FASTTRAP_2_BYTE_OP) { switch (instr[start + 1]) { case FASTTRAP_0F_JO: case FASTTRAP_0F_JNO: case FASTTRAP_0F_JB: case FASTTRAP_0F_JAE: case FASTTRAP_0F_JE: case FASTTRAP_0F_JNE: case FASTTRAP_0F_JBE: case FASTTRAP_0F_JA: case FASTTRAP_0F_JS: case FASTTRAP_0F_JNS: case FASTTRAP_0F_JP: case FASTTRAP_0F_JNP: case FASTTRAP_0F_JL: case FASTTRAP_0F_JGE: case FASTTRAP_0F_JLE: case FASTTRAP_0F_JG: tp->ftt_type = FASTTRAP_T_JCC; tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 2]; break; } } else if (instr[start] == FASTTRAP_GROUP5_OP) { uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); if (reg == 2 || reg == 4) { uint_t i, sz; if (reg == 2) tp->ftt_type = FASTTRAP_T_CALL; else tp->ftt_type = FASTTRAP_T_JMP; if (mod == 3) tp->ftt_code = 2; else tp->ftt_code = 1; ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); /* * See AMD x86-64 Architecture Programmer's Manual * Volume 3, Section 1.2.7, Table 1-12, and * Appendix A.3.1, Table A-15. */ if (mod != 3 && rm == 4) { uint8_t sib = instr[start + 2]; uint_t index = FASTTRAP_SIB_INDEX(sib); uint_t base = FASTTRAP_SIB_BASE(sib); tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); tp->ftt_index = (index == 4) ? FASTTRAP_NOREG : regmap[index | (FASTTRAP_REX_X(rex) << 3)]; tp->ftt_base = (mod == 0 && base == 5) ? FASTTRAP_NOREG : regmap[base | (FASTTRAP_REX_B(rex) << 3)]; i = 3; sz = mod == 1 ? 1 : 4; } else { /* * In 64-bit mode, mod == 0 and r/m == 5 * denotes %rip-relative addressing; in 32-bit * mode, the base register isn't used. In both * modes, there is a 32-bit operand. */ if (mod == 0 && rm == 5) { #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) tp->ftt_base = REG_RIP; else #endif tp->ftt_base = FASTTRAP_NOREG; sz = 4; } else { uint8_t base = rm | (FASTTRAP_REX_B(rex) << 3); tp->ftt_base = regmap[base]; sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; } tp->ftt_index = FASTTRAP_NOREG; i = 2; } if (sz == 1) { tp->ftt_dest = *(int8_t *)&instr[start + i]; } else if (sz == 4) { /* LINTED - alignment */ tp->ftt_dest = *(int32_t *)&instr[start + i]; } else { tp->ftt_dest = 0; } } } else { switch (instr[start]) { case FASTTRAP_RET: tp->ftt_type = FASTTRAP_T_RET; break; case FASTTRAP_RET16: tp->ftt_type = FASTTRAP_T_RET16; /* LINTED - alignment */ tp->ftt_dest = *(uint16_t *)&instr[start + 1]; break; case FASTTRAP_JO: case FASTTRAP_JNO: case FASTTRAP_JB: case FASTTRAP_JAE: case FASTTRAP_JE: case FASTTRAP_JNE: case FASTTRAP_JBE: case FASTTRAP_JA: case FASTTRAP_JS: case FASTTRAP_JNS: case FASTTRAP_JP: case FASTTRAP_JNP: case FASTTRAP_JL: case FASTTRAP_JGE: case FASTTRAP_JLE: case FASTTRAP_JG: tp->ftt_type = FASTTRAP_T_JCC; tp->ftt_code = instr[start]; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_LOOPNZ: case FASTTRAP_LOOPZ: case FASTTRAP_LOOP: tp->ftt_type = FASTTRAP_T_LOOP; tp->ftt_code = instr[start]; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_JCXZ: tp->ftt_type = FASTTRAP_T_JCXZ; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_CALL: tp->ftt_type = FASTTRAP_T_CALL; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; tp->ftt_code = 0; break; case FASTTRAP_JMP32: tp->ftt_type = FASTTRAP_T_JMP; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; break; case FASTTRAP_JMP8: tp->ftt_type = FASTTRAP_T_JMP; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_PUSHL_EBP: if (start == 0) tp->ftt_type = FASTTRAP_T_PUSHL_EBP; break; case FASTTRAP_NOP: #ifdef __amd64 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); /* * On amd64 we have to be careful not to confuse a nop * (actually xchgl %eax, %eax) with an instruction using * the same opcode, but that does something different * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). */ if (FASTTRAP_REX_B(rex) == 0) #endif tp->ftt_type = FASTTRAP_T_NOP; break; case FASTTRAP_INT3: /* * The pid provider shares the int3 trap with debugger * breakpoints so we can't instrument them. */ ASSERT(instr[start] == FASTTRAP_INSTR); return (-1); case FASTTRAP_INT: /* * Interrupts seem like they could be traced with * no negative implications, but it's possible that * a thread could be redirected by the trap handling * code which would eventually return to the * instruction after the interrupt. If the interrupt * were in our scratch space, the subsequent * instruction might be overwritten before we return. * Accordingly we refuse to instrument any interrupt. */ return (-1); } } #ifdef __amd64 if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { /* * If the process is 64-bit and the instruction type is still * FASTTRAP_T_COMMON -- meaning we're going to copy it out an * execute it -- we need to watch for %rip-relative * addressing mode. See the portion of fasttrap_pid_probe() * below where we handle tracepoints with type * FASTTRAP_T_COMMON for how we emulate instructions that * employ %rip-relative addressing. */ if (rmindex != -1) { uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); ASSERT(rmindex > start); if (mod == 0 && rm == 5) { /* * We need to be sure to avoid other * registers used by this instruction. While * the reg field may determine the op code * rather than denoting a register, assuming * that it denotes a register is always safe. * We leave the REX field intact and use * whatever value's there for simplicity. */ if (reg != 0) { tp->ftt_ripmode = FASTTRAP_RIP_1 | (FASTTRAP_RIP_X * FASTTRAP_REX_B(rex)); rm = 0; } else { tp->ftt_ripmode = FASTTRAP_RIP_2 | (FASTTRAP_RIP_X * FASTTRAP_REX_B(rex)); rm = 1; } tp->ftt_modrm = tp->ftt_instr[rmindex]; tp->ftt_instr[rmindex] = FASTTRAP_MODRM(2, reg, rm); } } } #endif return (0); } int fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) { fasttrap_instr_t instr = FASTTRAP_INSTR; if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) return (-1); return (0); } int fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) { uint8_t instr; /* * Distinguish between read or write failures and a changed * instruction. */ if (uread(p, &instr, 1, tp->ftt_pc) != 0) return (0); if (instr != FASTTRAP_INSTR) return (0); if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) return (-1); return (0); } #ifdef __amd64 static uintptr_t fasttrap_fulword_noerr(const void *uaddr) { uintptr_t ret; if ((ret = fasttrap_fulword(uaddr)) != -1) return (ret); return (0); } #endif -#ifdef __i386__ static uint32_t fasttrap_fuword32_noerr(const void *uaddr) { uint32_t ret; if ((ret = fasttrap_fuword32(uaddr)) != -1) return (ret); return (0); } -#endif static void fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid, uintptr_t new_pc) { fasttrap_tracepoint_t *tp; fasttrap_bucket_t *bucket; fasttrap_id_t *id; #ifdef illumos kmutex_t *pid_mtx; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; mutex_enter(pid_mtx); #else struct rm_priotracker tracker; rm_rlock(&fasttrap_tp_lock, &tracker); #endif bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * Don't sweat it if we can't find the tracepoint again; unlike * when we're in fasttrap_pid_probe(), finding the tracepoint here * is not essential to the correct execution of the process. */ if (tp == NULL) { #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif return; } for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { /* * If there's a branch that could act as a return site, we * need to trace it, and check here if the program counter is * external to the function. */ if (tp->ftt_type != FASTTRAP_T_RET && tp->ftt_type != FASTTRAP_T_RET16 && new_pc - id->fti_probe->ftp_faddr < id->fti_probe->ftp_fsize) continue; dtrace_probe(id->fti_probe->ftp_id, pc - id->fti_probe->ftp_faddr, rp->r_rax, rp->r_rbx, 0, 0); } #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif } static void fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) { #ifdef illumos sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); sqp->sq_info.si_signo = SIGSEGV; sqp->sq_info.si_code = SEGV_MAPERR; sqp->sq_info.si_addr = (caddr_t)addr; mutex_enter(&p->p_lock); sigaddqa(p, t, sqp); mutex_exit(&p->p_lock); if (t != NULL) aston(t); #else ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); ksiginfo_init(ksi); ksi->ksi_signo = SIGSEGV; ksi->ksi_code = SEGV_MAPERR; ksi->ksi_addr = (caddr_t)addr; (void) tdksignal(t, SIGSEGV, ksi); #endif } #ifdef __amd64 static void fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc, uintptr_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); uintptr_t *stack = (uintptr_t *)rp->r_rsp; for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; if (x < 6) argv[i] = (&rp->r_rdi)[x]; else argv[i] = fasttrap_fulword_noerr(&stack[x]); } for (; i < argc; i++) { argv[i] = 0; } } #endif -#ifdef __i386__ static void fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc, uint32_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); uint32_t *stack = (uint32_t *)rp->r_rsp; for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; argv[i] = fasttrap_fuword32_noerr(&stack[x]); } for (; i < argc; i++) { argv[i] = 0; } } -#endif static int fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr) { proc_t *p = curproc; #ifdef __i386__ struct segment_descriptor *desc; #else struct user_segment_descriptor *desc; #endif uint16_t sel = 0, ndx, type; uintptr_t limit; switch (tp->ftt_segment) { case FASTTRAP_SEG_CS: sel = rp->r_cs; break; case FASTTRAP_SEG_DS: sel = rp->r_ds; break; case FASTTRAP_SEG_ES: sel = rp->r_es; break; case FASTTRAP_SEG_FS: sel = rp->r_fs; break; case FASTTRAP_SEG_GS: sel = rp->r_gs; break; case FASTTRAP_SEG_SS: sel = rp->r_ss; break; } /* * Make sure the given segment register specifies a user priority * selector rather than a kernel selector. */ if (ISPL(sel) != SEL_UPL) return (-1); ndx = IDXSEL(sel); /* * Check the bounds and grab the descriptor out of the specified * descriptor table. */ if (ISLDT(sel)) { #ifdef __i386__ if (ndx > p->p_md.md_ldt->ldt_len) return (-1); desc = (struct segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #else if (ndx > max_ldt_segment) return (-1); desc = (struct user_segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #endif } else { if (ndx >= NGDT) return (-1); #ifdef __i386__ desc = &gdt[ndx].sd; #else desc = &gdt[ndx]; #endif } /* * The descriptor must have user privilege level and it must be * present in memory. */ if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1) return (-1); type = desc->sd_type; /* * If the S bit in the type field is not set, this descriptor can * only be used in system context. */ if ((type & 0x10) != 0x10) return (-1); limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1); if (tp->ftt_segment == FASTTRAP_SEG_CS) { /* * The code/data bit and readable bit must both be set. */ if ((type & 0xa) != 0xa) return (-1); if (*addr > limit) return (-1); } else { /* * The code/data bit must be clear. */ if ((type & 0x8) != 0) return (-1); /* * If the expand-down bit is clear, we just check the limit as * it would naturally be applied. Otherwise, we need to check * that the address is the range [limit + 1 .. 0xffff] or * [limit + 1 ... 0xffffffff] depending on if the default * operand size bit is set. */ if ((type & 0x4) == 0) { if (*addr > limit) return (-1); } else if (desc->sd_def32) { if (*addr < limit + 1 || 0xffff < *addr) return (-1); } else { if (*addr < limit + 1 || 0xffffffff < *addr) return (-1); } } *addr += USD_GETBASE(desc); return (0); } int fasttrap_pid_probe(struct reg *rp) { proc_t *p = curproc; #ifndef illumos struct rm_priotracker tracker; proc_t *pp; #endif uintptr_t pc = rp->r_rip - 1; uintptr_t new_pc = 0; fasttrap_bucket_t *bucket; #ifdef illumos kmutex_t *pid_mtx; #endif fasttrap_tracepoint_t *tp, tp_local; pid_t pid; dtrace_icookie_t cookie; uint_t is_enabled = 0; /* * It's possible that a user (in a veritable orgy of bad planning) * could redirect this thread's flow of control before it reached the * return probe fasttrap. In this case we need to kill the process * since it's in a unrecoverable state. */ if (curthread->t_dtrace_step) { ASSERT(curthread->t_dtrace_on); fasttrap_sigtrap(p, curthread, pc); return (0); } /* * Clear all user tracing flags. */ curthread->t_dtrace_ft = 0; curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; #ifdef __amd64 curthread->t_dtrace_regv = 0; #endif /* * Treat a child created by a call to vfork(2) as if it were its * parent. We know that there's only one thread of control in such a * process: this one. */ #ifdef illumos while (p->p_flag & SVFORK) { p = p->p_parent; } pid = p->p_pid; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; mutex_enter(pid_mtx); #else pp = p; sx_slock(&proctree_lock); while (pp->p_vmspace == pp->p_pptr->p_vmspace) pp = pp->p_pptr; pid = pp->p_pid; sx_sunlock(&proctree_lock); pp = NULL; rm_rlock(&fasttrap_tp_lock, &tracker); #endif bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; /* * Lookup the tracepoint that the process just hit. */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * If we couldn't find a matching tracepoint, either a tracepoint has * been inserted without using the pid ioctl interface (see * fasttrap_ioctl), or somehow we have mislaid this tracepoint. */ if (tp == NULL) { #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif return (-1); } /* * Set the program counter to the address of the traced instruction * so that it looks right in ustack() output. */ rp->r_rip = pc; if (tp->ftt_ids != NULL) { fasttrap_id_t *id; #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) { for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { fasttrap_probe_t *probe = id->fti_probe; if (id->fti_ptype == DTFTP_ENTRY) { /* * We note that this was an entry * probe to help ustack() find the * first caller. */ cookie = dtrace_interrupt_disable(); DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); dtrace_probe(probe->ftp_id, rp->r_rdi, rp->r_rsi, rp->r_rdx, rp->r_rcx, rp->r_r8); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); dtrace_interrupt_enable(cookie); } else if (id->fti_ptype == DTFTP_IS_ENABLED) { /* * Note that in this case, we don't * call dtrace_probe() since it's only * an artificial probe meant to change * the flow of control so that it * encounters the true probe. */ is_enabled = 1; } else if (probe->ftp_argmap == NULL) { dtrace_probe(probe->ftp_id, rp->r_rdi, rp->r_rsi, rp->r_rdx, rp->r_rcx, rp->r_r8); } else { uintptr_t t[5]; fasttrap_usdt_args64(probe, rp, sizeof (t) / sizeof (t[0]), t); dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]); } } } else { -#else /* __amd64 */ +#endif uintptr_t s0, s1, s2, s3, s4, s5; - uint32_t *stack = (uint32_t *)rp->r_esp; + uint32_t *stack = (uint32_t *)rp->r_rsp; /* * In 32-bit mode, all arguments are passed on the * stack. If this is a function entry probe, we need * to skip the first entry on the stack as it * represents the return address rather than a * parameter to the function. */ s0 = fasttrap_fuword32_noerr(&stack[0]); s1 = fasttrap_fuword32_noerr(&stack[1]); s2 = fasttrap_fuword32_noerr(&stack[2]); s3 = fasttrap_fuword32_noerr(&stack[3]); s4 = fasttrap_fuword32_noerr(&stack[4]); s5 = fasttrap_fuword32_noerr(&stack[5]); for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { fasttrap_probe_t *probe = id->fti_probe; if (id->fti_ptype == DTFTP_ENTRY) { /* * We note that this was an entry * probe to help ustack() find the * first caller. */ cookie = dtrace_interrupt_disable(); DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); dtrace_probe(probe->ftp_id, s1, s2, s3, s4, s5); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); dtrace_interrupt_enable(cookie); } else if (id->fti_ptype == DTFTP_IS_ENABLED) { /* * Note that in this case, we don't * call dtrace_probe() since it's only * an artificial probe meant to change * the flow of control so that it * encounters the true probe. */ is_enabled = 1; } else if (probe->ftp_argmap == NULL) { dtrace_probe(probe->ftp_id, s0, s1, s2, s3, s4); } else { uint32_t t[5]; fasttrap_usdt_args32(probe, rp, sizeof (t) / sizeof (t[0]), t); dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]); } } -#endif /* __amd64 */ #ifdef __amd64 } #endif } /* * We're about to do a bunch of work so we cache a local copy of * the tracepoint to emulate the instruction, and then find the * tracepoint again later if we need to light up any return probes. */ tp_local = *tp; #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif tp = &tp_local; /* * Set the program counter to appear as though the traced instruction * had completely executed. This ensures that fasttrap_getreg() will * report the expected value for REG_RIP. */ rp->r_rip = pc + tp->ftt_size; /* * If there's an is-enabled probe connected to this tracepoint it * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' * instruction that was placed there by DTrace when the binary was * linked. As this probe is, in fact, enabled, we need to stuff 1 * into %eax or %rax. Accordingly, we can bypass all the instruction * emulation logic since we know the inevitable result. It's possible * that a user could construct a scenario where the 'is-enabled' * probe was on some other instruction, but that would be a rather * exotic way to shoot oneself in the foot. */ if (is_enabled) { rp->r_rax = 1; new_pc = rp->r_rip; goto done; } /* * We emulate certain types of instructions to ensure correctness * (in the case of position dependent instructions) or optimize * common cases. The rest we have the thread execute back in user- * land. */ switch (tp->ftt_type) { case FASTTRAP_T_RET: case FASTTRAP_T_RET16: { uintptr_t dst = 0; uintptr_t addr = 0; int ret = 0; /* * We have to emulate _every_ facet of the behavior of a ret * instruction including what happens if the load from %esp * fails; in that case, we send a SIGSEGV. */ #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { ret = dst = fasttrap_fulword((void *)rp->r_rsp); addr = rp->r_rsp + sizeof (uintptr_t); } else { #endif -#ifdef __i386__ uint32_t dst32; - ret = dst32 = fasttrap_fuword32((void *)rp->r_esp); + ret = dst32 = fasttrap_fuword32((void *)rp->r_rsp); dst = dst32; - addr = rp->r_esp + sizeof (uint32_t); -#endif + addr = rp->r_rsp + sizeof (uint32_t); #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, rp->r_rsp); new_pc = pc; break; } if (tp->ftt_type == FASTTRAP_T_RET16) addr += tp->ftt_dest; rp->r_rsp = addr; new_pc = dst; break; } case FASTTRAP_T_JCC: { uint_t taken = 0; switch (tp->ftt_code) { case FASTTRAP_JO: taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0; break; case FASTTRAP_JNO: taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0; break; case FASTTRAP_JB: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0; break; case FASTTRAP_JAE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0; break; case FASTTRAP_JE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; break; case FASTTRAP_JNE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; break; case FASTTRAP_JBE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 || (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; break; case FASTTRAP_JA: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 && (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; break; case FASTTRAP_JS: taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0; break; case FASTTRAP_JNS: taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0; break; case FASTTRAP_JP: taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0; break; case FASTTRAP_JNP: taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0; break; case FASTTRAP_JL: taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JGE: taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JLE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 || ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JG: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; } if (taken) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_LOOP: { uint_t taken = 0; #ifdef __amd64 greg_t cx = rp->r_rcx--; #else greg_t cx = rp->r_ecx--; #endif switch (tp->ftt_code) { case FASTTRAP_LOOPNZ: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && cx != 0; break; case FASTTRAP_LOOPZ: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 && cx != 0; break; case FASTTRAP_LOOP: taken = (cx != 0); break; } if (taken) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_JCXZ: { #ifdef __amd64 greg_t cx = rp->r_rcx; #else greg_t cx = rp->r_ecx; #endif if (cx == 0) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_PUSHL_EBP: { int ret = 0; #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { rp->r_rsp -= sizeof (uintptr_t); ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp); } else { #endif -#ifdef __i386__ rp->r_rsp -= sizeof (uint32_t); ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp); -#endif #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, rp->r_rsp); new_pc = pc; break; } new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_NOP: new_pc = pc + tp->ftt_size; break; case FASTTRAP_T_JMP: case FASTTRAP_T_CALL: if (tp->ftt_code == 0) { new_pc = tp->ftt_dest; } else { uintptr_t value, addr = tp->ftt_dest; if (tp->ftt_base != FASTTRAP_NOREG) addr += fasttrap_getreg(rp, tp->ftt_base); if (tp->ftt_index != FASTTRAP_NOREG) addr += fasttrap_getreg(rp, tp->ftt_index) << tp->ftt_scale; if (tp->ftt_code == 1) { /* * If there's a segment prefix for this * instruction, we'll need to check permissions * and bounds on the given selector, and adjust * the address accordingly. */ if (tp->ftt_segment != FASTTRAP_SEG_NONE && fasttrap_do_seg(tp, rp, &addr) != 0) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { #endif if ((value = fasttrap_fulword((void *)addr)) == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } new_pc = value; #ifdef __amd64 } else { uint32_t value32; addr = (uintptr_t)(uint32_t)addr; if ((value32 = fasttrap_fuword32((void *)addr)) == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } new_pc = value32; } #endif } else { new_pc = addr; } } /* * If this is a call instruction, we need to push the return * address onto the stack. If this fails, we send the process * a SIGSEGV and reset the pc to emulate what would happen if * this instruction weren't traced. */ if (tp->ftt_type == FASTTRAP_T_CALL) { int ret = 0; uintptr_t addr = 0, pcps; #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { addr = rp->r_rsp - sizeof (uintptr_t); pcps = pc + tp->ftt_size; ret = fasttrap_sulword((void *)addr, pcps); } else { #endif addr = rp->r_rsp - sizeof (uint32_t); pcps = (uint32_t)(pc + tp->ftt_size); ret = fasttrap_suword32((void *)addr, pcps); #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } rp->r_rsp = addr; } break; case FASTTRAP_T_COMMON: { uintptr_t addr; #if defined(__amd64) uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; #else uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; #endif uint_t i = 0; #ifdef illumos klwp_t *lwp = ttolwp(curthread); /* * Compute the address of the ulwp_t and step over the * ul_self pointer. The method used to store the user-land * thread pointer is very different on 32- and 64-bit * kernels. */ #if defined(__amd64) if (p->p_model == DATAMODEL_LP64) { addr = lwp->lwp_pcb.pcb_fsbase; addr += sizeof (void *); } else { addr = lwp->lwp_pcb.pcb_gsbase; addr += sizeof (caddr32_t); } #else addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); addr += sizeof (void *); #endif #else /* !illumos */ fasttrap_scrspace_t *scrspace; scrspace = fasttrap_scraddr(curthread, tp->ftt_proc); if (scrspace == NULL) { /* * We failed to allocate scratch space for this thread. * Try to write the original instruction back out and * reset the pc. */ if (fasttrap_copyout(tp->ftt_instr, (void *)pc, tp->ftt_size)) fasttrap_sigtrap(p, curthread, pc); new_pc = pc; break; } addr = scrspace->ftss_addr; #endif /* illumos */ /* * Generic Instruction Tracing * --------------------------- * * This is the layout of the scratch space in the user-land * thread structure for our generated instructions. * * 32-bit mode bytes * ------------------------ ----- * a: <= 15 * jmp ftt_size> 5 * b: <= 15 * int T_DTRACE_RET 2 * ----- * <= 37 * * 64-bit mode bytes * ------------------------ ----- * a: <= 15 * jmp 0(%rip) 6 * ftt_size> 8 * b: <= 15 * int T_DTRACE_RET 2 * ----- * <= 46 * * The %pc is set to a, and curthread->t_dtrace_astpc is set * to b. If we encounter a signal on the way out of the * kernel, trap() will set %pc to curthread->t_dtrace_astpc * so that we execute the original instruction and re-enter * the kernel rather than redirecting to the next instruction. * * If there are return probes (so we know that we're going to * need to reenter the kernel after executing the original * instruction), the scratch space will just contain the * original instruction followed by an interrupt -- the same * data as at b. * * %rip-relative Addressing * ------------------------ * * There's a further complication in 64-bit mode due to %rip- * relative addressing. While this is clearly a beneficial * architectural decision for position independent code, it's * hard not to see it as a personal attack against the pid * provider since before there was a relatively small set of * instructions to emulate; with %rip-relative addressing, * almost every instruction can potentially depend on the * address at which it's executed. Rather than emulating * the broad spectrum of instructions that can now be * position dependent, we emulate jumps and others as in * 32-bit mode, and take a different tack for instructions * using %rip-relative addressing. * * For every instruction that uses the ModRM byte, the * in-kernel disassembler reports its location. We use the * ModRM byte to identify that an instruction uses * %rip-relative addressing and to see what other registers * the instruction uses. To emulate those instructions, * we modify the instruction to be %rax-relative rather than * %rip-relative (or %rcx-relative if the instruction uses * %rax; or %r8- or %r9-relative if the REX.B is present so * we don't have to rewrite the REX prefix). We then load * the value that %rip would have been into the scratch * register and generate an instruction to reset the scratch * register back to its original value. The instruction * sequence looks like this: * * 64-mode %rip-relative bytes * ------------------------ ----- * a: <= 15 * movq $, % 6 * jmp 0(%rip) 6 * ftt_size> 8 * b: <= 15 * int T_DTRACE_RET 2 * ----- * 52 * * We set curthread->t_dtrace_regv so that upon receiving * a signal we can reset the value of the scratch register. */ ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); curthread->t_dtrace_scrpc = addr; bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); i += tp->ftt_size; #ifdef __amd64 if (tp->ftt_ripmode != 0) { greg_t *reg = NULL; ASSERT(p->p_model == DATAMODEL_LP64); ASSERT(tp->ftt_ripmode & (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); /* * If this was a %rip-relative instruction, we change * it to be either a %rax- or %rcx-relative * instruction (depending on whether those registers * are used as another operand; or %r8- or %r9- * relative depending on the value of REX.B). We then * set that register and generate a movq instruction * to reset the value. */ if (tp->ftt_ripmode & FASTTRAP_RIP_X) scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); else scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); if (tp->ftt_ripmode & FASTTRAP_RIP_1) scratch[i++] = FASTTRAP_MOV_EAX; else scratch[i++] = FASTTRAP_MOV_ECX; switch (tp->ftt_ripmode) { case FASTTRAP_RIP_1: reg = &rp->r_rax; curthread->t_dtrace_reg = REG_RAX; break; case FASTTRAP_RIP_2: reg = &rp->r_rcx; curthread->t_dtrace_reg = REG_RCX; break; case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: reg = &rp->r_r8; curthread->t_dtrace_reg = REG_R8; break; case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: reg = &rp->r_r9; curthread->t_dtrace_reg = REG_R9; break; } /* LINTED - alignment */ *(uint64_t *)&scratch[i] = *reg; curthread->t_dtrace_regv = *reg; *reg = pc + tp->ftt_size; i += sizeof (uint64_t); } #endif /* * Generate the branch instruction to what would have * normally been the subsequent instruction. In 32-bit mode, * this is just a relative branch; in 64-bit mode this is a * %rip-relative branch that loads the 64-bit pc value * immediately after the jmp instruction. */ #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) { scratch[i++] = FASTTRAP_GROUP5_OP; scratch[i++] = FASTTRAP_MODRM(0, 4, 5); /* LINTED - alignment */ *(uint32_t *)&scratch[i] = 0; i += sizeof (uint32_t); /* LINTED - alignment */ *(uint64_t *)&scratch[i] = pc + tp->ftt_size; i += sizeof (uint64_t); } else { #endif -#ifdef __i386__ /* * Set up the jmp to the next instruction; note that * the size of the traced instruction cancels out. */ scratch[i++] = FASTTRAP_JMP32; /* LINTED - alignment */ *(uint32_t *)&scratch[i] = pc - addr - 5; i += sizeof (uint32_t); -#endif #ifdef __amd64 } #endif curthread->t_dtrace_astpc = addr + i; bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); i += tp->ftt_size; scratch[i++] = FASTTRAP_INT; scratch[i++] = T_DTRACE_RET; ASSERT(i <= sizeof (scratch)); #ifdef illumos if (fasttrap_copyout(scratch, (char *)addr, i)) { #else if (uwrite(p, scratch, i, addr)) { #endif fasttrap_sigtrap(p, curthread, pc); new_pc = pc; break; } if (tp->ftt_retids != NULL) { curthread->t_dtrace_step = 1; curthread->t_dtrace_ret = 1; new_pc = curthread->t_dtrace_astpc; } else { new_pc = curthread->t_dtrace_scrpc; } curthread->t_dtrace_pc = pc; curthread->t_dtrace_npc = pc + tp->ftt_size; curthread->t_dtrace_on = 1; break; } default: panic("fasttrap: mishandled an instruction"); } done: /* * If there were no return probes when we first found the tracepoint, * we should feel no obligation to honor any return probes that were * subsequently enabled -- they'll just have to wait until the next * time around. */ if (tp->ftt_retids != NULL) { /* * We need to wait until the results of the instruction are * apparent before invoking any return probes. If this * instruction was emulated we can just call * fasttrap_return_common(); if it needs to be executed, we * need to wait until the user thread returns to the kernel. */ if (tp->ftt_type != FASTTRAP_T_COMMON) { /* * Set the program counter to the address of the traced * instruction so that it looks right in ustack() * output. We had previously set it to the end of the * instruction to simplify %rip-relative addressing. */ rp->r_rip = pc; fasttrap_return_common(rp, pc, pid, new_pc); } else { ASSERT(curthread->t_dtrace_ret != 0); ASSERT(curthread->t_dtrace_pc == pc); ASSERT(curthread->t_dtrace_scrpc != 0); ASSERT(new_pc == curthread->t_dtrace_astpc); } } rp->r_rip = new_pc; #ifndef illumos PROC_LOCK(p); proc_write_regs(curthread, rp); PROC_UNLOCK(p); #endif return (0); } int fasttrap_return_probe(struct reg *rp) { proc_t *p = curproc; uintptr_t pc = curthread->t_dtrace_pc; uintptr_t npc = curthread->t_dtrace_npc; curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; #ifdef illumos /* * Treat a child created by a call to vfork(2) as if it were its * parent. We know that there's only one thread of control in such a * process: this one. */ while (p->p_flag & SVFORK) { p = p->p_parent; } #endif /* * We set rp->r_rip to the address of the traced instruction so * that it appears to dtrace_probe() that we're on the original * instruction, and so that the user can't easily detect our * complex web of lies. dtrace_return_probe() (our caller) * will correctly set %pc after we return. */ rp->r_rip = pc; fasttrap_return_common(rp, pc, p->p_pid, npc); return (0); } /*ARGSUSED*/ uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, 1, argno)); } /*ARGSUSED*/ uint64_t fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, 0, argno)); } static ulong_t fasttrap_getreg(struct reg *rp, uint_t reg) { #ifdef __amd64 switch (reg) { case REG_R15: return (rp->r_r15); case REG_R14: return (rp->r_r14); case REG_R13: return (rp->r_r13); case REG_R12: return (rp->r_r12); case REG_R11: return (rp->r_r11); case REG_R10: return (rp->r_r10); case REG_R9: return (rp->r_r9); case REG_R8: return (rp->r_r8); case REG_RDI: return (rp->r_rdi); case REG_RSI: return (rp->r_rsi); case REG_RBP: return (rp->r_rbp); case REG_RBX: return (rp->r_rbx); case REG_RDX: return (rp->r_rdx); case REG_RCX: return (rp->r_rcx); case REG_RAX: return (rp->r_rax); case REG_TRAPNO: return (rp->r_trapno); case REG_ERR: return (rp->r_err); case REG_RIP: return (rp->r_rip); case REG_CS: return (rp->r_cs); #ifdef illumos case REG_RFL: return (rp->r_rfl); #endif case REG_RSP: return (rp->r_rsp); case REG_SS: return (rp->r_ss); case REG_FS: return (rp->r_fs); case REG_GS: return (rp->r_gs); case REG_DS: return (rp->r_ds); case REG_ES: return (rp->r_es); case REG_FSBASE: return (rdmsr(MSR_FSBASE)); case REG_GSBASE: return (rdmsr(MSR_GSBASE)); } panic("dtrace: illegal register constant"); /*NOTREACHED*/ #else #define _NGREG 19 if (reg >= _NGREG) panic("dtrace: illegal register constant"); return (((greg_t *)&rp->r_gs)[reg]); #endif } Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris =================================================================== --- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris (revision 303642) Property changes on: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r303053-303641 Index: user/alc/PQ_LAUNDRY/sys/conf/files.arm =================================================================== --- user/alc/PQ_LAUNDRY/sys/conf/files.arm (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/conf/files.arm (revision 303642) @@ -1,144 +1,145 @@ # $FreeBSD$ arm/arm/autoconf.c standard arm/arm/bcopy_page.S standard arm/arm/bcopyinout.S standard arm/arm/blockio.S standard arm/arm/bus_space_asm_generic.S standard arm/arm/bus_space_base.c optional fdt arm/arm/bus_space_generic.c standard arm/arm/busdma_machdep-v4.c optional !armv6 arm/arm/busdma_machdep-v6.c optional armv6 arm/arm/copystr.S standard arm/arm/cpufunc.c standard arm/arm/cpufunc_asm.S standard arm/arm/cpufunc_asm_arm9.S optional cpu_arm9 | cpu_arm9e arm/arm/cpufunc_asm_arm11.S optional cpu_arm1176 arm/arm/cpufunc_asm_arm11x6.S optional cpu_arm1176 arm/arm/cpufunc_asm_armv4.S optional cpu_arm9 | cpu_arm9e | cpu_fa526 | cpu_xscale_pxa2x0 | cpu_xscale_ixp425 | cpu_xscale_81342 arm/arm/cpufunc_asm_armv5_ec.S optional cpu_arm9e arm/arm/cpufunc_asm_armv6.S optional cpu_arm1176 arm/arm/cpufunc_asm_armv7.S optional cpu_cortexa | cpu_krait | cpu_mv_pj4b arm/arm/cpufunc_asm_fa526.S optional cpu_fa526 arm/arm/cpufunc_asm_pj4b.S optional cpu_mv_pj4b arm/arm/cpufunc_asm_sheeva.S optional cpu_arm9e arm/arm/cpufunc_asm_xscale.S optional cpu_xscale_pxa2x0 | cpu_xscale_ixp425 | cpu_xscale_81342 arm/arm/cpufunc_asm_xscale_c3.S optional cpu_xscale_81342 arm/arm/cpuinfo.c standard arm/arm/cpu_asm-v6.S optional armv6 arm/arm/db_disasm.c optional ddb arm/arm/db_interface.c optional ddb arm/arm/db_trace.c optional ddb arm/arm/debug_monitor.c optional ddb armv6 arm/arm/disassem.c optional ddb arm/arm/dump_machdep.c standard arm/arm/elf_machdep.c standard arm/arm/elf_note.S standard arm/arm/exception.S standard arm/arm/fiq.c standard arm/arm/fiq_subr.S standard arm/arm/fusu.S standard arm/arm/gdb_machdep.c optional gdb arm/arm/generic_timer.c optional generic_timer arm/arm/gic.c optional gic +arm/arm/gic_fdt.c optional gic fdt arm/arm/hdmi_if.m optional hdmi arm/arm/identcpu.c standard arm/arm/in_cksum.c optional inet | inet6 arm/arm/in_cksum_arm.S optional inet | inet6 arm/arm/intr.c optional !intrng kern/subr_intr.c optional intrng arm/arm/locore.S standard no-obj arm/arm/machdep.c standard arm/arm/machdep_intr.c standard arm/arm/mem.c optional mem arm/arm/minidump_machdep.c optional mem arm/arm/mp_machdep.c optional smp arm/arm/mpcore_timer.c optional mpcore_timer arm/arm/nexus.c standard arm/arm/ofw_machdep.c optional fdt arm/arm/physmem.c standard arm/arm/pl190.c optional pl190 arm/arm/pl310.c optional pl310 arm/arm/platform.c optional platform arm/arm/platform_if.m optional platform arm/arm/pmap-v4.c optional !armv6 arm/arm/pmap-v6.c optional armv6 arm/arm/pmu.c optional pmu | fdt hwpmc arm/arm/sc_machdep.c optional sc arm/arm/setcpsr.S standard arm/arm/setstack.s standard arm/arm/stack_machdep.c optional ddb | stack arm/arm/stdatomic.c standard \ compile-with "${NORMAL_C:N-Wmissing-prototypes}" arm/arm/support.S standard arm/arm/swtch.S standard arm/arm/swtch-v4.S optional !armv6 arm/arm/swtch-v6.S optional armv6 arm/arm/sys_machdep.c standard arm/arm/syscall.c standard arm/arm/trap-v4.c optional !armv6 arm/arm/trap-v6.c optional armv6 arm/arm/uio_machdep.c standard arm/arm/undefined.c standard arm/arm/unwind.c optional ddb | kdtrace_hooks arm/arm/vm_machdep.c standard arm/arm/vfp.c standard board_id.h standard \ dependency "$S/arm/conf/genboardid.awk $S/arm/conf/mach-types" \ compile-with "${AWK} -f $S/arm/conf/genboardid.awk $S/arm/conf/mach-types > board_id.h" \ no-obj no-implicit-rule before-depend \ clean "board_id.h" cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/arm/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/arm/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/arm/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb dev/dwc/if_dwc.c optional dwc dev/dwc/if_dwc_if.m optional dwc dev/fb/fb.c optional sc dev/fdt/fdt_arm_platform.c optional platform fdt dev/hwpmc/hwpmc_arm.c optional hwpmc dev/hwpmc/hwpmc_armv7.c optional hwpmc armv6 dev/iicbus/twsi/twsi.c optional twsi dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofwpci.c optional fdt pci dev/pci/pci_host_generic.c optional pci_host_generic pci fdt dev/psci/psci.c optional psci dev/psci/psci_arm.S optional psci dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc dev/uart/uart_cpu_fdt.c optional uart fdt font.h optional sc \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_busdma_bufalloc.c standard kern/subr_devmap.c standard kern/subr_sfbuf.c standard libkern/arm/aeabi_unwind.c standard libkern/arm/divsi3.S standard libkern/arm/ffs.S standard libkern/arm/ldivmod.S standard libkern/arm/ldivmod_helper.c standard libkern/arm/memclr.S standard libkern/arm/memcpy.S standard libkern/arm/memset.S standard libkern/arm/muldi3.c standard libkern/ashldi3.c standard libkern/ashrdi3.c standard libkern/divdi3.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/lshrdi3.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard Index: user/alc/PQ_LAUNDRY/sys/conf/files.arm64 =================================================================== --- user/alc/PQ_LAUNDRY/sys/conf/files.arm64 (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/conf/files.arm64 (revision 303642) @@ -1,101 +1,102 @@ # $FreeBSD$ arm/arm/generic_timer.c standard -arm/arm/gic.c optional intrng +arm/arm/gic.c standard +arm/arm/gic_fdt.c optional fdt arm/arm/pmu.c standard arm64/acpica/acpi_machdep.c optional acpi arm64/acpica/OsdEnvironment.c optional acpi arm64/acpica/acpi_wakeup.c optional acpi arm64/acpica/pci_cfgreg.c optional acpi pci arm64/arm64/autoconf.c standard arm64/arm64/bcopy.c standard arm64/arm64/bus_machdep.c standard arm64/arm64/bus_space_asm.S standard arm64/arm64/busdma_bounce.c standard arm64/arm64/busdma_machdep.c standard arm64/arm64/bzero.S standard arm64/arm64/clock.c standard arm64/arm64/copyinout.S standard arm64/arm64/copystr.c standard arm64/arm64/cpufunc_asm.S standard arm64/arm64/db_disasm.c optional ddb arm64/arm64/db_interface.c optional ddb arm64/arm64/db_trace.c optional ddb arm64/arm64/debug_monitor.c optional kdb arm64/arm64/disassem.c optional ddb arm64/arm64/dump_machdep.c standard arm64/arm64/elf_machdep.c standard arm64/arm64/exception.S standard arm64/arm64/gicv3_its.c optional intrng arm64/arm64/gic_v3.c standard arm64/arm64/gic_v3_fdt.c optional fdt arm64/arm64/identcpu.c standard arm64/arm64/in_cksum.c optional inet | inet6 arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/mem.c standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp arm64/arm64/nexus.c standard arm64/arm64/ofw_machdep.c optional fdt arm64/arm64/pmap.c standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/support.S standard arm64/arm64/swtch.S standard arm64/arm64/sys_machdep.c standard arm64/arm64/trap.c standard arm64/arm64/uio_machdep.c standard arm64/arm64/uma_machdep.c standard arm64/arm64/unwind.c optional ddb | kdtrace_hooks | stack arm64/arm64/vfp.c standard arm64/arm64/vm_machdep.c standard arm64/cavium/thunder_pcie_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_pem.c optional soc_cavm_thunderx pci arm64/cavium/thunder_pcie_pem_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_common.c optional soc_cavm_thunderx pci arm64/cloudabi64/cloudabi64_sysvec.c optional compat_cloudabi64 crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb dev/acpica/acpi_if.m optional acpi dev/ahci/ahci_generic.c optional ahci fdt dev/hwpmc/hwpmc_arm64.c optional hwpmc dev/hwpmc/hwpmc_arm64_md.c optional hwpmc dev/mmc/host/dwmmc.c optional dwmmc fdt dev/mmc/host/dwmmc_hisi.c optional dwmmc fdt soc_hisi_hi6220 dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofwpci.c optional fdt pci dev/pci/pci_host_generic.c optional pci fdt dev/psci/psci.c optional psci dev/psci/psci_arm64.S optional psci dev/uart/uart_cpu_fdt.c optional uart fdt dev/uart/uart_dev_pl011.c optional uart pl011 dev/usb/controller/dwc_otg_hisi.c optional dwcotg fdt soc_hisi_hi6220 dev/usb/controller/generic_ehci.c optional ehci acpi dev/usb/controller/generic_ohci.c optional ohci fdt dev/usb/controller/generic_usb_if.m optional ohci fdt dev/vnic/mrml_bridge.c optional vnic fdt dev/vnic/nic_main.c optional vnic pci dev/vnic/nicvf_main.c optional vnic pci pci_iov dev/vnic/nicvf_queues.c optional vnic pci pci_iov dev/vnic/thunder_bgx_fdt.c optional vnic fdt dev/vnic/thunder_bgx.c optional vnic pci dev/vnic/thunder_mdio_fdt.c optional vnic fdt dev/vnic/thunder_mdio.c optional vnic dev/vnic/lmac_if.m optional inet | inet6 | vnic kern/kern_clocksource.c standard kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_devmap.c standard kern/subr_intr.c optional intrng libkern/bcmp.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/aarch64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/aarch64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/aarch64/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" Index: user/alc/PQ_LAUNDRY/sys/contrib/libnv/nvlist.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/contrib/libnv/nvlist.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/contrib/libnv/nvlist.c (revision 303642) @@ -1,2026 +1,2025 @@ /*- * Copyright (c) 2009-2013 The FreeBSD Foundation * Copyright (c) 2013-2015 Mariusz Zaborski * All rights reserved. * * This software was developed by Pawel Jakub Dawidek under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #ifdef _KERNEL #include #include #include #include #include #include #else #include #include #include #include #include -#define _WITH_DPRINTF #include #include #include #include #include "msgio.h" #endif #ifdef HAVE_PJDLOG #include #endif #include #include "nv_impl.h" #include "nvlist_impl.h" #include "nvpair_impl.h" #ifndef HAVE_PJDLOG #ifdef _KERNEL #define PJDLOG_ASSERT(...) MPASS(__VA_ARGS__) #define PJDLOG_RASSERT(expr, ...) KASSERT(expr, (__VA_ARGS__)) #define PJDLOG_ABORT(...) panic(__VA_ARGS__) #else #include #define PJDLOG_ASSERT(...) assert(__VA_ARGS__) #define PJDLOG_RASSERT(expr, ...) assert(expr) #define PJDLOG_ABORT(...) do { \ fprintf(stderr, "%s:%u: ", __FILE__, __LINE__); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ abort(); \ } while (0) #endif #endif #define NV_FLAG_PRIVATE_MASK (NV_FLAG_BIG_ENDIAN | NV_FLAG_IN_ARRAY) #define NV_FLAG_PUBLIC_MASK (NV_FLAG_IGNORE_CASE | NV_FLAG_NO_UNIQUE) #define NV_FLAG_ALL_MASK (NV_FLAG_PRIVATE_MASK | NV_FLAG_PUBLIC_MASK) #define NVLIST_MAGIC 0x6e766c /* "nvl" */ struct nvlist { int nvl_magic; int nvl_error; int nvl_flags; nvpair_t *nvl_parent; nvpair_t *nvl_array_next; struct nvl_head nvl_head; }; #define NVLIST_ASSERT(nvl) do { \ PJDLOG_ASSERT((nvl) != NULL); \ PJDLOG_ASSERT((nvl)->nvl_magic == NVLIST_MAGIC); \ } while (0) #ifdef _KERNEL MALLOC_DEFINE(M_NVLIST, "nvlist", "kernel nvlist"); #endif #define NVPAIR_ASSERT(nvp) nvpair_assert(nvp) #define NVLIST_HEADER_MAGIC 0x6c #define NVLIST_HEADER_VERSION 0x00 struct nvlist_header { uint8_t nvlh_magic; uint8_t nvlh_version; uint8_t nvlh_flags; uint64_t nvlh_descriptors; uint64_t nvlh_size; } __packed; nvlist_t * nvlist_create(int flags) { nvlist_t *nvl; PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0); nvl = nv_malloc(sizeof(*nvl)); if (nvl == NULL) return (NULL); nvl->nvl_error = 0; nvl->nvl_flags = flags; nvl->nvl_parent = NULL; nvl->nvl_array_next = NULL; TAILQ_INIT(&nvl->nvl_head); nvl->nvl_magic = NVLIST_MAGIC; return (nvl); } void nvlist_destroy(nvlist_t *nvl) { nvpair_t *nvp; if (nvl == NULL) return; ERRNO_SAVE(); NVLIST_ASSERT(nvl); while ((nvp = nvlist_first_nvpair(nvl)) != NULL) { nvlist_remove_nvpair(nvl, nvp); nvpair_free(nvp); } if (nvl->nvl_array_next != NULL) nvpair_free_structure(nvl->nvl_array_next); nvl->nvl_array_next = NULL; nvl->nvl_parent = NULL; nvl->nvl_magic = 0; nv_free(nvl); ERRNO_RESTORE(); } void nvlist_set_error(nvlist_t *nvl, int error) { PJDLOG_ASSERT(error != 0); /* * Check for error != 0 so that we don't do the wrong thing if somebody * tries to abuse this API when asserts are disabled. */ if (nvl != NULL && error != 0 && nvl->nvl_error == 0) nvl->nvl_error = error; } int nvlist_error(const nvlist_t *nvl) { if (nvl == NULL) return (ENOMEM); NVLIST_ASSERT(nvl); return (nvl->nvl_error); } nvpair_t * nvlist_get_nvpair_parent(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); return (nvl->nvl_parent); } const nvlist_t * nvlist_get_parent(const nvlist_t *nvl, void **cookiep) { nvpair_t *nvp; NVLIST_ASSERT(nvl); nvp = nvl->nvl_parent; if (cookiep != NULL) *cookiep = nvp; if (nvp == NULL) return (NULL); return (nvpair_nvlist(nvp)); } void nvlist_set_parent(nvlist_t *nvl, nvpair_t *parent) { NVLIST_ASSERT(nvl); nvl->nvl_parent = parent; } void nvlist_set_array_next(nvlist_t *nvl, nvpair_t *ele) { NVLIST_ASSERT(nvl); if (ele != NULL) { nvl->nvl_flags |= NV_FLAG_IN_ARRAY; } else { nvl->nvl_flags &= ~NV_FLAG_IN_ARRAY; nv_free(nvl->nvl_array_next); } nvl->nvl_array_next = ele; } bool nvlist_in_array(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); return ((nvl->nvl_flags & NV_FLAG_IN_ARRAY) != 0); } const nvlist_t * nvlist_get_array_next(const nvlist_t *nvl) { nvpair_t *nvp; NVLIST_ASSERT(nvl); nvp = nvl->nvl_array_next; if (nvp == NULL) return (NULL); return (nvpair_get_nvlist(nvp)); } const nvlist_t * nvlist_get_pararr(const nvlist_t *nvl, void **cookiep) { const nvlist_t *ret; ret = nvlist_get_array_next(nvl); if (ret != NULL) { if (cookiep != NULL) *cookiep = NULL; return (ret); } ret = nvlist_get_parent(nvl, cookiep); return (ret); } bool nvlist_empty(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); return (nvlist_first_nvpair(nvl) == NULL); } int nvlist_flags(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); return (nvl->nvl_flags & NV_FLAG_PUBLIC_MASK); } void nvlist_set_flags(nvlist_t *nvl, int flags) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); nvl->nvl_flags = flags; } static void nvlist_report_missing(int type, const char *name) { PJDLOG_ABORT("Element '%s' of type %s doesn't exist.", name, nvpair_type_string(type)); } static nvpair_t * nvlist_find(const nvlist_t *nvl, int type, const char *name) { nvpair_t *nvp; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); PJDLOG_ASSERT(type == NV_TYPE_NONE || (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { if (type != NV_TYPE_NONE && nvpair_type(nvp) != type) continue; if ((nvl->nvl_flags & NV_FLAG_IGNORE_CASE) != 0) { if (strcasecmp(nvpair_name(nvp), name) != 0) continue; } else { if (strcmp(nvpair_name(nvp), name) != 0) continue; } break; } if (nvp == NULL) ERRNO_SET(ENOENT); return (nvp); } bool nvlist_exists_type(const nvlist_t *nvl, const char *name, int type) { NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); PJDLOG_ASSERT(type == NV_TYPE_NONE || (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); return (nvlist_find(nvl, type, name) != NULL); } void nvlist_free_type(nvlist_t *nvl, const char *name, int type) { nvpair_t *nvp; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); PJDLOG_ASSERT(type == NV_TYPE_NONE || (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); nvp = nvlist_find(nvl, type, name); if (nvp != NULL) nvlist_free_nvpair(nvl, nvp); else nvlist_report_missing(type, name); } nvlist_t * nvlist_clone(const nvlist_t *nvl) { nvlist_t *newnvl; nvpair_t *nvp, *newnvp; NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { ERRNO_SET(nvl->nvl_error); return (NULL); } newnvl = nvlist_create(nvl->nvl_flags & NV_FLAG_PUBLIC_MASK); for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { newnvp = nvpair_clone(nvp); if (newnvp == NULL) break; (void)nvlist_move_nvpair(newnvl, newnvp); } if (nvp != NULL) { nvlist_destroy(newnvl); return (NULL); } return (newnvl); } #ifndef _KERNEL static bool nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level) { if (nvlist_error(nvl) != 0) { dprintf(fd, "%*serror: %d\n", level * 4, "", nvlist_error(nvl)); return (true); } return (false); } /* * Dump content of nvlist. */ void nvlist_dump(const nvlist_t *nvl, int fd) { const nvlist_t *tmpnvl; nvpair_t *nvp, *tmpnvp; void *cookie; int level; level = 0; if (nvlist_dump_error_check(nvl, fd, level)) return; nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp), nvpair_type_string(nvpair_type(nvp))); switch (nvpair_type(nvp)) { case NV_TYPE_NULL: dprintf(fd, " null\n"); break; case NV_TYPE_BOOL: dprintf(fd, " %s\n", nvpair_get_bool(nvp) ? "TRUE" : "FALSE"); break; case NV_TYPE_NUMBER: dprintf(fd, " %ju (%jd) (0x%jx)\n", (uintmax_t)nvpair_get_number(nvp), (intmax_t)nvpair_get_number(nvp), (uintmax_t)nvpair_get_number(nvp)); break; case NV_TYPE_STRING: dprintf(fd, " [%s]\n", nvpair_get_string(nvp)); break; case NV_TYPE_NVLIST: dprintf(fd, "\n"); tmpnvl = nvpair_get_nvlist(nvp); if (nvlist_dump_error_check(tmpnvl, fd, level + 1)) break; tmpnvp = nvlist_first_nvpair(tmpnvl); if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; level++; continue; } break; case NV_TYPE_DESCRIPTOR: dprintf(fd, " %d\n", nvpair_get_descriptor(nvp)); break; case NV_TYPE_BINARY: { const unsigned char *binary; unsigned int ii; size_t size; binary = nvpair_get_binary(nvp, &size); dprintf(fd, " %zu ", size); for (ii = 0; ii < size; ii++) dprintf(fd, "%02hhx", binary[ii]); dprintf(fd, "\n"); break; } case NV_TYPE_BOOL_ARRAY: { const bool *value; unsigned int ii; size_t nitems; value = nvpair_get_bool_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { dprintf(fd, "%s", value[ii] ? "TRUE" : "FALSE"); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_STRING_ARRAY: { const char * const *value; unsigned int ii; size_t nitems; value = nvpair_get_string_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { if (value[ii] == NULL) dprintf(fd, "NULL"); else dprintf(fd, "\"%s\"", value[ii]); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_NUMBER_ARRAY: { const uint64_t *value; unsigned int ii; size_t nitems; value = nvpair_get_number_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { dprintf(fd, "%ju (%jd) (0x%jx)", value[ii], value[ii], value[ii]); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_DESCRIPTOR_ARRAY: { const int *value; unsigned int ii; size_t nitems; value = nvpair_get_descriptor_array(nvp, &nitems); dprintf(fd, " [ "); for (ii = 0; ii < nitems; ii++) { dprintf(fd, "%d", value[ii]); if (ii != nitems - 1) dprintf(fd, ", "); } dprintf(fd, " ]\n"); break; } case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const *value; unsigned int ii; size_t nitems; value = nvpair_get_nvlist_array(nvp, &nitems); dprintf(fd, " %zu\n", nitems); tmpnvl = NULL; tmpnvp = NULL; for (ii = 0; ii < nitems; ii++) { if (nvlist_dump_error_check(value[ii], fd, level + 1)) { break; } if (tmpnvl == NULL) { tmpnvp = nvlist_first_nvpair(value[ii]); if (tmpnvp != NULL) { tmpnvl = value[ii]; } else { dprintf(fd, "%*s,\n", (level + 1) * 4, ""); } } } if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; level++; continue; } break; } default: PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp)); } while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { cookie = NULL; if (nvlist_in_array(nvl)) dprintf(fd, "%*s,\n", level * 4, ""); nvl = nvlist_get_pararr(nvl, &cookie); if (nvl == NULL) return; if (nvlist_in_array(nvl) && cookie == NULL) { nvp = nvlist_first_nvpair(nvl); } else { nvp = cookie; level--; } } while (nvp == NULL); if (nvlist_in_array(nvl) && cookie == NULL) break; } } } void nvlist_fdump(const nvlist_t *nvl, FILE *fp) { fflush(fp); nvlist_dump(nvl, fileno(fp)); } #endif /* * The function obtains size of the nvlist after nvlist_pack(). */ size_t nvlist_size(const nvlist_t *nvl) { const nvlist_t *tmpnvl; const nvlist_t * const *nvlarray; const nvpair_t *nvp, *tmpnvp; void *cookie; size_t size, nitems; unsigned int ii; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); size = sizeof(struct nvlist_header); nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { size += nvpair_header_size(); size += strlen(nvpair_name(nvp)) + 1; if (nvpair_type(nvp) == NV_TYPE_NVLIST) { size += sizeof(struct nvlist_header); size += nvpair_header_size() + 1; tmpnvl = nvpair_get_nvlist(nvp); PJDLOG_ASSERT(tmpnvl->nvl_error == 0); tmpnvp = nvlist_first_nvpair(tmpnvl); if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; continue; } } else if (nvpair_type(nvp) == NV_TYPE_NVLIST_ARRAY) { nvlarray = nvpair_get_nvlist_array(nvp, &nitems); PJDLOG_ASSERT(nitems > 0); size += (nvpair_header_size() + 1) * nitems; size += sizeof(struct nvlist_header) * nitems; tmpnvl = NULL; tmpnvp = NULL; for (ii = 0; ii < nitems; ii++) { PJDLOG_ASSERT(nvlarray[ii]->nvl_error == 0); tmpnvp = nvlist_first_nvpair(nvlarray[ii]); if (tmpnvp != NULL) { tmpnvl = nvlarray[ii]; break; } } if (tmpnvp != NULL) { nvp = tmpnvp; nvl = tmpnvl; continue; } } else { size += nvpair_size(nvp); } while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { cookie = NULL; nvl = nvlist_get_pararr(nvl, &cookie); if (nvl == NULL) goto out; if (nvlist_in_array(nvl) && cookie == NULL) { nvp = nvlist_first_nvpair(nvl); } else { nvp = cookie; } } while (nvp == NULL); if (nvlist_in_array(nvl) && cookie == NULL) break; } } out: return (size); } #ifndef _KERNEL static int * nvlist_xdescriptors(const nvlist_t *nvl, int *descs) { nvpair_t *nvp; const char *name; int type; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); nvp = NULL; do { while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) { switch (type) { case NV_TYPE_DESCRIPTOR: *descs = nvpair_get_descriptor(nvp); descs++; break; case NV_TYPE_DESCRIPTOR_ARRAY: { const int *value; size_t nitems; unsigned int ii; value = nvpair_get_descriptor_array(nvp, &nitems); for (ii = 0; ii < nitems; ii++) { *descs = value[ii]; descs++; } break; } case NV_TYPE_NVLIST: nvl = nvpair_get_nvlist(nvp); nvp = NULL; break; case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const *value; size_t nitems; value = nvpair_get_nvlist_array(nvp, &nitems); PJDLOG_ASSERT(value != NULL); PJDLOG_ASSERT(nitems > 0); nvl = value[0]; nvp = NULL; break; } } } } while ((nvl = nvlist_get_pararr(nvl, (void**)&nvp)) != NULL); return (descs); } #endif #ifndef _KERNEL int * nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp) { size_t nitems; int *fds; nitems = nvlist_ndescriptors(nvl); fds = nv_malloc(sizeof(fds[0]) * (nitems + 1)); if (fds == NULL) return (NULL); if (nitems > 0) nvlist_xdescriptors(nvl, fds); fds[nitems] = -1; if (nitemsp != NULL) *nitemsp = nitems; return (fds); } #endif size_t nvlist_ndescriptors(const nvlist_t *nvl) { #ifndef _KERNEL nvpair_t *nvp; const char *name; size_t ndescs; int type; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); ndescs = 0; nvp = NULL; do { while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) { switch (type) { case NV_TYPE_DESCRIPTOR: ndescs++; break; case NV_TYPE_NVLIST: nvl = nvpair_get_nvlist(nvp); nvp = NULL; break; case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const *value; size_t nitems; value = nvpair_get_nvlist_array(nvp, &nitems); PJDLOG_ASSERT(value != NULL); PJDLOG_ASSERT(nitems > 0); nvl = value[0]; nvp = NULL; break; } case NV_TYPE_DESCRIPTOR_ARRAY: { size_t nitems; (void)nvpair_get_descriptor_array(nvp, &nitems); ndescs += nitems; break; } } } } while ((nvl = nvlist_get_pararr(nvl, (void**)&nvp)) != NULL); return (ndescs); #else return (0); #endif } static unsigned char * nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp) { struct nvlist_header nvlhdr; NVLIST_ASSERT(nvl); nvlhdr.nvlh_magic = NVLIST_HEADER_MAGIC; nvlhdr.nvlh_version = NVLIST_HEADER_VERSION; nvlhdr.nvlh_flags = nvl->nvl_flags; #if BYTE_ORDER == BIG_ENDIAN nvlhdr.nvlh_flags |= NV_FLAG_BIG_ENDIAN; #endif nvlhdr.nvlh_descriptors = nvlist_ndescriptors(nvl); nvlhdr.nvlh_size = *leftp - sizeof(nvlhdr); PJDLOG_ASSERT(*leftp >= sizeof(nvlhdr)); memcpy(ptr, &nvlhdr, sizeof(nvlhdr)); ptr += sizeof(nvlhdr); *leftp -= sizeof(nvlhdr); return (ptr); } static void * nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) { unsigned char *buf, *ptr; size_t left, size; const nvlist_t *tmpnvl; nvpair_t *nvp, *tmpnvp; void *cookie; NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { ERRNO_SET(nvl->nvl_error); return (NULL); } size = nvlist_size(nvl); buf = nv_malloc(size); if (buf == NULL) return (NULL); ptr = buf; left = size; ptr = nvlist_pack_header(nvl, ptr, &left); nvp = nvlist_first_nvpair(nvl); while (nvp != NULL) { NVPAIR_ASSERT(nvp); nvpair_init_datasize(nvp); ptr = nvpair_pack_header(nvp, ptr, &left); if (ptr == NULL) goto fail; switch (nvpair_type(nvp)) { case NV_TYPE_NULL: ptr = nvpair_pack_null(nvp, ptr, &left); break; case NV_TYPE_BOOL: ptr = nvpair_pack_bool(nvp, ptr, &left); break; case NV_TYPE_NUMBER: ptr = nvpair_pack_number(nvp, ptr, &left); break; case NV_TYPE_STRING: ptr = nvpair_pack_string(nvp, ptr, &left); break; case NV_TYPE_NVLIST: tmpnvl = nvpair_get_nvlist(nvp); ptr = nvlist_pack_header(tmpnvl, ptr, &left); if (ptr == NULL) goto fail; tmpnvp = nvlist_first_nvpair(tmpnvl); if (tmpnvp != NULL) { nvl = tmpnvl; nvp = tmpnvp; continue; } ptr = nvpair_pack_nvlist_up(ptr, &left); break; #ifndef _KERNEL case NV_TYPE_DESCRIPTOR: ptr = nvpair_pack_descriptor(nvp, ptr, fdidxp, &left); break; case NV_TYPE_DESCRIPTOR_ARRAY: ptr = nvpair_pack_descriptor_array(nvp, ptr, fdidxp, &left); break; #endif case NV_TYPE_BINARY: ptr = nvpair_pack_binary(nvp, ptr, &left); break; case NV_TYPE_BOOL_ARRAY: ptr = nvpair_pack_bool_array(nvp, ptr, &left); break; case NV_TYPE_NUMBER_ARRAY: ptr = nvpair_pack_number_array(nvp, ptr, &left); break; case NV_TYPE_STRING_ARRAY: ptr = nvpair_pack_string_array(nvp, ptr, &left); break; case NV_TYPE_NVLIST_ARRAY: { const nvlist_t * const * value; size_t nitems; unsigned int ii; tmpnvl = NULL; value = nvpair_get_nvlist_array(nvp, &nitems); for (ii = 0; ii < nitems; ii++) { ptr = nvlist_pack_header(value[ii], ptr, &left); if (ptr == NULL) goto out; tmpnvp = nvlist_first_nvpair(value[ii]); if (tmpnvp != NULL) { tmpnvl = value[ii]; break; } ptr = nvpair_pack_nvlist_array_next(ptr, &left); if (ptr == NULL) goto out; } if (tmpnvl != NULL) { nvl = tmpnvl; nvp = tmpnvp; continue; } break; } default: PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp)); } if (ptr == NULL) goto fail; while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) { do { cookie = NULL; if (nvlist_in_array(nvl)) { ptr = nvpair_pack_nvlist_array_next(ptr, &left); if (ptr == NULL) goto fail; } nvl = nvlist_get_pararr(nvl, &cookie); if (nvl == NULL) goto out; if (nvlist_in_array(nvl) && cookie == NULL) { nvp = nvlist_first_nvpair(nvl); ptr = nvlist_pack_header(nvl, ptr, &left); if (ptr == NULL) goto fail; } else if (nvpair_type((nvpair_t *)cookie) != NV_TYPE_NVLIST_ARRAY) { ptr = nvpair_pack_nvlist_up(ptr, &left); if (ptr == NULL) goto fail; nvp = cookie; } else { nvp = cookie; } } while (nvp == NULL); if (nvlist_in_array(nvl) && cookie == NULL) break; } } out: if (sizep != NULL) *sizep = size; return (buf); fail: nv_free(buf); return (NULL); } void * nvlist_pack(const nvlist_t *nvl, size_t *sizep) { NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { ERRNO_SET(nvl->nvl_error); return (NULL); } if (nvlist_ndescriptors(nvl) > 0) { ERRNO_SET(EOPNOTSUPP); return (NULL); } return (nvlist_xpack(nvl, NULL, sizep)); } static bool nvlist_check_header(struct nvlist_header *nvlhdrp) { if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) { ERRNO_SET(EINVAL); return (false); } if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) { ERRNO_SET(EINVAL); return (false); } #if BYTE_ORDER == BIG_ENDIAN if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) == 0) { nvlhdrp->nvlh_size = le64toh(nvlhdrp->nvlh_size); nvlhdrp->nvlh_descriptors = le64toh(nvlhdrp->nvlh_descriptors); } #else if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0) { nvlhdrp->nvlh_size = be64toh(nvlhdrp->nvlh_size); nvlhdrp->nvlh_descriptors = be64toh(nvlhdrp->nvlh_descriptors); } #endif return (true); } const unsigned char * nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds, bool *isbep, size_t *leftp) { struct nvlist_header nvlhdr; int inarrayf; if (*leftp < sizeof(nvlhdr)) goto failed; memcpy(&nvlhdr, ptr, sizeof(nvlhdr)); if (!nvlist_check_header(&nvlhdr)) goto failed; if (nvlhdr.nvlh_size != *leftp - sizeof(nvlhdr)) goto failed; /* * nvlh_descriptors might be smaller than nfds in embedded nvlists. */ if (nvlhdr.nvlh_descriptors > nfds) goto failed; if ((nvlhdr.nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) goto failed; inarrayf = (nvl->nvl_flags & NV_FLAG_IN_ARRAY); nvl->nvl_flags = (nvlhdr.nvlh_flags & NV_FLAG_PUBLIC_MASK) | inarrayf; ptr += sizeof(nvlhdr); if (isbep != NULL) *isbep = (((int)nvlhdr.nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0); *leftp -= sizeof(nvlhdr); return (ptr); failed: ERRNO_SET(EINVAL); return (NULL); } static nvlist_t * nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds, int flags) { const unsigned char *ptr; nvlist_t *nvl, *retnvl, *tmpnvl, *array; nvpair_t *nvp; size_t left; bool isbe; PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0); left = size; ptr = buf; tmpnvl = array = NULL; nvl = retnvl = nvlist_create(0); if (nvl == NULL) goto failed; ptr = nvlist_unpack_header(nvl, ptr, nfds, &isbe, &left); if (ptr == NULL) goto failed; if (nvl->nvl_flags != flags) { ERRNO_SET(EILSEQ); goto failed; } while (left > 0) { ptr = nvpair_unpack(isbe, ptr, &left, &nvp); if (ptr == NULL) goto failed; switch (nvpair_type(nvp)) { case NV_TYPE_NULL: ptr = nvpair_unpack_null(isbe, nvp, ptr, &left); break; case NV_TYPE_BOOL: ptr = nvpair_unpack_bool(isbe, nvp, ptr, &left); break; case NV_TYPE_NUMBER: ptr = nvpair_unpack_number(isbe, nvp, ptr, &left); break; case NV_TYPE_STRING: ptr = nvpair_unpack_string(isbe, nvp, ptr, &left); break; case NV_TYPE_NVLIST: ptr = nvpair_unpack_nvlist(isbe, nvp, ptr, &left, nfds, &tmpnvl); if (tmpnvl == NULL || ptr == NULL) goto failed; nvlist_set_parent(tmpnvl, nvp); break; #ifndef _KERNEL case NV_TYPE_DESCRIPTOR: ptr = nvpair_unpack_descriptor(isbe, nvp, ptr, &left, fds, nfds); break; case NV_TYPE_DESCRIPTOR_ARRAY: ptr = nvpair_unpack_descriptor_array(isbe, nvp, ptr, &left, fds, nfds); break; #endif case NV_TYPE_BINARY: ptr = nvpair_unpack_binary(isbe, nvp, ptr, &left); break; case NV_TYPE_NVLIST_UP: if (nvl->nvl_parent == NULL) goto failed; nvl = nvpair_nvlist(nvl->nvl_parent); nvpair_free_structure(nvp); continue; case NV_TYPE_NVLIST_ARRAY_NEXT: if (nvl->nvl_array_next == NULL) { if (nvl->nvl_parent == NULL) goto failed; nvl = nvpair_nvlist(nvl->nvl_parent); } else { nvl = __DECONST(nvlist_t *, nvlist_get_array_next(nvl)); ptr = nvlist_unpack_header(nvl, ptr, nfds, &isbe, &left); if (ptr == NULL) goto failed; } nvpair_free_structure(nvp); continue; case NV_TYPE_BOOL_ARRAY: ptr = nvpair_unpack_bool_array(isbe, nvp, ptr, &left); break; case NV_TYPE_NUMBER_ARRAY: ptr = nvpair_unpack_number_array(isbe, nvp, ptr, &left); break; case NV_TYPE_STRING_ARRAY: ptr = nvpair_unpack_string_array(isbe, nvp, ptr, &left); break; case NV_TYPE_NVLIST_ARRAY: ptr = nvpair_unpack_nvlist_array(isbe, nvp, ptr, &left, &array); if (ptr == NULL) goto failed; tmpnvl = array; while (array != NULL) { nvlist_set_parent(array, nvp); array = __DECONST(nvlist_t *, nvlist_get_array_next(array)); } ptr = nvlist_unpack_header(tmpnvl, ptr, nfds, &isbe, &left); break; default: PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp)); } if (ptr == NULL) goto failed; if (!nvlist_move_nvpair(nvl, nvp)) goto failed; if (tmpnvl != NULL) { nvl = tmpnvl; tmpnvl = NULL; } } return (retnvl); failed: nvlist_destroy(retnvl); return (NULL); } nvlist_t * nvlist_unpack(const void *buf, size_t size, int flags) { return (nvlist_xunpack(buf, size, NULL, 0, flags)); } #ifndef _KERNEL int nvlist_send(int sock, const nvlist_t *nvl) { size_t datasize, nfds; int *fds; void *data; int64_t fdidx; int ret; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return (-1); } fds = nvlist_descriptors(nvl, &nfds); if (fds == NULL) return (-1); ret = -1; data = NULL; fdidx = 0; data = nvlist_xpack(nvl, &fdidx, &datasize); if (data == NULL) goto out; if (buf_send(sock, data, datasize) == -1) goto out; if (nfds > 0) { if (fd_send(sock, fds, nfds) == -1) goto out; } ret = 0; out: ERRNO_SAVE(); nv_free(fds); nv_free(data); ERRNO_RESTORE(); return (ret); } nvlist_t * nvlist_recv(int sock, int flags) { struct nvlist_header nvlhdr; nvlist_t *nvl, *ret; unsigned char *buf; size_t nfds, size, i; int *fds; if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1) return (NULL); if (!nvlist_check_header(&nvlhdr)) return (NULL); nfds = (size_t)nvlhdr.nvlh_descriptors; size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size; buf = nv_malloc(size); if (buf == NULL) return (NULL); memcpy(buf, &nvlhdr, sizeof(nvlhdr)); ret = NULL; fds = NULL; if (buf_recv(sock, buf + sizeof(nvlhdr), size - sizeof(nvlhdr)) == -1) goto out; if (nfds > 0) { fds = nv_malloc(nfds * sizeof(fds[0])); if (fds == NULL) goto out; if (fd_recv(sock, fds, nfds) == -1) goto out; } nvl = nvlist_xunpack(buf, size, fds, nfds, flags); if (nvl == NULL) { ERRNO_SAVE(); for (i = 0; i < nfds; i++) close(fds[i]); ERRNO_RESTORE(); goto out; } ret = nvl; out: ERRNO_SAVE(); nv_free(buf); nv_free(fds); ERRNO_RESTORE(); return (ret); } nvlist_t * nvlist_xfer(int sock, nvlist_t *nvl, int flags) { if (nvlist_send(sock, nvl) < 0) { nvlist_destroy(nvl); return (NULL); } nvlist_destroy(nvl); return (nvlist_recv(sock, flags)); } #endif nvpair_t * nvlist_first_nvpair(const nvlist_t *nvl) { NVLIST_ASSERT(nvl); return (TAILQ_FIRST(&nvl->nvl_head)); } nvpair_t * nvlist_next_nvpair(const nvlist_t *nvl, const nvpair_t *nvp) { nvpair_t *retnvp; NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); retnvp = nvpair_next(nvp); PJDLOG_ASSERT(retnvp == NULL || nvpair_nvlist(retnvp) == nvl); return (retnvp); } nvpair_t * nvlist_prev_nvpair(const nvlist_t *nvl, const nvpair_t *nvp) { nvpair_t *retnvp; NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); retnvp = nvpair_prev(nvp); PJDLOG_ASSERT(nvpair_nvlist(retnvp) == nvl); return (retnvp); } const char * nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep) { nvpair_t *nvp; NVLIST_ASSERT(nvl); if (cookiep == NULL || *cookiep == NULL) nvp = nvlist_first_nvpair(nvl); else nvp = nvlist_next_nvpair(nvl, *cookiep); if (nvp == NULL) return (NULL); if (typep != NULL) *typep = nvpair_type(nvp); if (cookiep != NULL) *cookiep = nvp; return (nvpair_name(nvp)); } bool nvlist_exists(const nvlist_t *nvl, const char *name) { return (nvlist_find(nvl, NV_TYPE_NONE, name) != NULL); } #define NVLIST_EXISTS(type, TYPE) \ bool \ nvlist_exists_##type(const nvlist_t *nvl, const char *name) \ { \ \ return (nvlist_find(nvl, NV_TYPE_##TYPE, name) != NULL); \ } NVLIST_EXISTS(null, NULL) NVLIST_EXISTS(bool, BOOL) NVLIST_EXISTS(number, NUMBER) NVLIST_EXISTS(string, STRING) NVLIST_EXISTS(nvlist, NVLIST) NVLIST_EXISTS(binary, BINARY) NVLIST_EXISTS(bool_array, BOOL_ARRAY) NVLIST_EXISTS(number_array, NUMBER_ARRAY) NVLIST_EXISTS(string_array, STRING_ARRAY) NVLIST_EXISTS(nvlist_array, NVLIST_ARRAY) #ifndef _KERNEL NVLIST_EXISTS(descriptor, DESCRIPTOR) NVLIST_EXISTS(descriptor_array, DESCRIPTOR_ARRAY) #endif #undef NVLIST_EXISTS void nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp) { nvpair_t *newnvp; NVPAIR_ASSERT(nvp); if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) { if (nvlist_exists(nvl, nvpair_name(nvp))) { nvl->nvl_error = EEXIST; ERRNO_SET(nvlist_error(nvl)); return; } } newnvp = nvpair_clone(nvp); if (newnvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvlist_error(nvl)); return; } nvpair_insert(&nvl->nvl_head, newnvp, nvl); } void nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...) { va_list valueap; va_start(valueap, valuefmt); nvlist_add_stringv(nvl, name, valuefmt, valueap); va_end(valueap); } void nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt, va_list valueap) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_create_stringv(name, valuefmt, valueap); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_add_null(nvlist_t *nvl, const char *name) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_create_null(name); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value, size_t size) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_create_binary(name, value, size); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #define NVLIST_ADD(vtype, type) \ void \ nvlist_add_##type(nvlist_t *nvl, const char *name, vtype value) \ { \ nvpair_t *nvp; \ \ if (nvlist_error(nvl) != 0) { \ ERRNO_SET(nvlist_error(nvl)); \ return; \ } \ \ nvp = nvpair_create_##type(name, value); \ if (nvp == NULL) { \ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); \ ERRNO_SET(nvl->nvl_error); \ } else { \ (void)nvlist_move_nvpair(nvl, nvp); \ } \ } NVLIST_ADD(bool, bool) NVLIST_ADD(uint64_t, number) NVLIST_ADD(const char *, string) NVLIST_ADD(const nvlist_t *, nvlist) #ifndef _KERNEL NVLIST_ADD(int, descriptor); #endif #undef NVLIST_ADD #define NVLIST_ADD_ARRAY(vtype, type) \ void \ nvlist_add_##type##_array(nvlist_t *nvl, const char *name, vtype value, \ size_t nitems) \ { \ nvpair_t *nvp; \ \ if (nvlist_error(nvl) != 0) { \ ERRNO_SET(nvlist_error(nvl)); \ return; \ } \ \ nvp = nvpair_create_##type##_array(name, value, nitems); \ if (nvp == NULL) { \ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); \ ERRNO_SET(nvl->nvl_error); \ } else { \ (void)nvlist_move_nvpair(nvl, nvp); \ } \ } NVLIST_ADD_ARRAY(const bool *, bool) NVLIST_ADD_ARRAY(const uint64_t *, number) NVLIST_ADD_ARRAY(const char * const *, string) NVLIST_ADD_ARRAY(const nvlist_t * const *, nvlist) #ifndef _KERNEL NVLIST_ADD_ARRAY(const int *, descriptor) #endif #undef NVLIST_ADD_ARRAY bool nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp) { NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == NULL); if (nvlist_error(nvl) != 0) { nvpair_free(nvp); ERRNO_SET(nvlist_error(nvl)); return (false); } if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) { if (nvlist_exists(nvl, nvpair_name(nvp))) { nvpair_free(nvp); nvl->nvl_error = EEXIST; ERRNO_SET(nvl->nvl_error); return (false); } } nvpair_insert(&nvl->nvl_head, nvp, nvl); return (true); } void nvlist_move_string(nvlist_t *nvl, const char *name, char *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_string(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { if (value != NULL && nvlist_get_nvpair_parent(value) != NULL) nvlist_destroy(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_nvlist(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #ifndef _KERNEL void nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { close(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_descriptor(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #endif void nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_binary(name, value, size); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_bool_array(nvlist_t *nvl, const char *name, bool *value, size_t nitems) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_bool_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_string_array(nvlist_t *nvl, const char *name, char **value, size_t nitems) { nvpair_t *nvp; size_t i; if (nvlist_error(nvl) != 0) { if (value != NULL) { for (i = 0; i < nitems; i++) nv_free(value[i]); nv_free(value); } ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_string_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **value, size_t nitems) { nvpair_t *nvp; size_t i; if (nvlist_error(nvl) != 0) { if (value != NULL) { for (i = 0; i < nitems; i++) { if (nvlist_get_pararr(value[i], NULL) == NULL) nvlist_destroy(value[i]); } } nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_nvlist_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } void nvlist_move_number_array(nvlist_t *nvl, const char *name, uint64_t *value, size_t nitems) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_number_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #ifndef _KERNEL void nvlist_move_descriptor_array(nvlist_t *nvl, const char *name, int *value, size_t nitems) { nvpair_t *nvp; size_t i; if (nvlist_error(nvl) != 0) { if (value != 0) { for (i = 0; i < nitems; i++) close(value[i]); nv_free(value); } ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_move_descriptor_array(name, value, nitems); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); ERRNO_SET(nvl->nvl_error); } else { (void)nvlist_move_nvpair(nvl, nvp); } } #endif const nvpair_t * nvlist_get_nvpair(const nvlist_t *nvl, const char *name) { return (nvlist_find(nvl, NV_TYPE_NONE, name)); } #define NVLIST_GET(ftype, type, TYPE) \ ftype \ nvlist_get_##type(const nvlist_t *nvl, const char *name) \ { \ const nvpair_t *nvp; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE, name); \ return (nvpair_get_##type(nvp)); \ } NVLIST_GET(bool, bool, BOOL) NVLIST_GET(uint64_t, number, NUMBER) NVLIST_GET(const char *, string, STRING) NVLIST_GET(const nvlist_t *, nvlist, NVLIST) #ifndef _KERNEL NVLIST_GET(int, descriptor, DESCRIPTOR) #endif #undef NVLIST_GET const void * nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep) { nvpair_t *nvp; nvp = nvlist_find(nvl, NV_TYPE_BINARY, name); if (nvp == NULL) nvlist_report_missing(NV_TYPE_BINARY, name); return (nvpair_get_binary(nvp, sizep)); } #define NVLIST_GET_ARRAY(ftype, type, TYPE) \ ftype \ nvlist_get_##type##_array(const nvlist_t *nvl, const char *name, \ size_t *nitems) \ { \ const nvpair_t *nvp; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name); \ return (nvpair_get_##type##_array(nvp, nitems)); \ } NVLIST_GET_ARRAY(const bool *, bool, BOOL) NVLIST_GET_ARRAY(const uint64_t *, number, NUMBER) NVLIST_GET_ARRAY(const char * const *, string, STRING) NVLIST_GET_ARRAY(const nvlist_t * const *, nvlist, NVLIST) #ifndef _KERNEL NVLIST_GET_ARRAY(const int *, descriptor, DESCRIPTOR) #endif #undef NVLIST_GET_ARRAY #define NVLIST_TAKE(ftype, type, TYPE) \ ftype \ nvlist_take_##type(nvlist_t *nvl, const char *name) \ { \ nvpair_t *nvp; \ ftype value; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE, name); \ value = (ftype)(intptr_t)nvpair_get_##type(nvp); \ nvlist_remove_nvpair(nvl, nvp); \ nvpair_free_structure(nvp); \ return (value); \ } NVLIST_TAKE(bool, bool, BOOL) NVLIST_TAKE(uint64_t, number, NUMBER) NVLIST_TAKE(char *, string, STRING) NVLIST_TAKE(nvlist_t *, nvlist, NVLIST) #ifndef _KERNEL NVLIST_TAKE(int, descriptor, DESCRIPTOR) #endif #undef NVLIST_TAKE void * nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep) { nvpair_t *nvp; void *value; nvp = nvlist_find(nvl, NV_TYPE_BINARY, name); if (nvp == NULL) nvlist_report_missing(NV_TYPE_BINARY, name); value = (void *)(intptr_t)nvpair_get_binary(nvp, sizep); nvlist_remove_nvpair(nvl, nvp); nvpair_free_structure(nvp); return (value); } #define NVLIST_TAKE_ARRAY(ftype, type, TYPE) \ ftype \ nvlist_take_##type##_array(nvlist_t *nvl, const char *name, \ size_t *nitems) \ { \ nvpair_t *nvp; \ ftype value; \ \ nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name); \ if (nvp == NULL) \ nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name); \ value = (ftype)(intptr_t)nvpair_get_##type##_array(nvp, nitems);\ nvlist_remove_nvpair(nvl, nvp); \ nvpair_free_structure(nvp); \ return (value); \ } NVLIST_TAKE_ARRAY(bool *, bool, BOOL) NVLIST_TAKE_ARRAY(uint64_t *, number, NUMBER) NVLIST_TAKE_ARRAY(char **, string, STRING) NVLIST_TAKE_ARRAY(nvlist_t **, nvlist, NVLIST) #ifndef _KERNEL NVLIST_TAKE_ARRAY(int *, descriptor, DESCRIPTOR) #endif void nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) { NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); nvpair_remove(&nvl->nvl_head, nvp, nvl); } void nvlist_free(nvlist_t *nvl, const char *name) { nvlist_free_type(nvl, name, NV_TYPE_NONE); } #define NVLIST_FREE(type, TYPE) \ void \ nvlist_free_##type(nvlist_t *nvl, const char *name) \ { \ \ nvlist_free_type(nvl, name, NV_TYPE_##TYPE); \ } NVLIST_FREE(null, NULL) NVLIST_FREE(bool, BOOL) NVLIST_FREE(number, NUMBER) NVLIST_FREE(string, STRING) NVLIST_FREE(nvlist, NVLIST) NVLIST_FREE(binary, BINARY) NVLIST_FREE(bool_array, BOOL_ARRAY) NVLIST_FREE(number_array, NUMBER_ARRAY) NVLIST_FREE(string_array, STRING_ARRAY) NVLIST_FREE(nvlist_array, NVLIST_ARRAY) #ifndef _KERNEL NVLIST_FREE(descriptor, DESCRIPTOR) NVLIST_FREE(descriptor_array, DESCRIPTOR_ARRAY) #endif #undef NVLIST_FREE void nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp) { NVLIST_ASSERT(nvl); NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); nvlist_remove_nvpair(nvl, nvp); nvpair_free(nvp); } Index: user/alc/PQ_LAUNDRY/sys/dev/ath/if_athioctl.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/ath/if_athioctl.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/ath/if_athioctl.h (revision 303642) @@ -1,452 +1,452 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ /* * Ioctl-related defintions for the Atheros Wireless LAN controller driver. */ #ifndef _DEV_ATH_ATHIOCTL_H #define _DEV_ATH_ATHIOCTL_H struct ath_tx_aggr_stats { u_int32_t aggr_pkts[64]; u_int32_t aggr_single_pkt; u_int32_t aggr_nonbaw_pkt; u_int32_t aggr_aggr_pkt; u_int32_t aggr_baw_closed_single_pkt; u_int32_t aggr_low_hwq_single_pkt; u_int32_t aggr_sched_nopkt; u_int32_t aggr_rts_aggr_limited; }; struct ath_intr_stats { u_int32_t sync_intr[32]; }; struct ath_stats { u_int32_t ast_watchdog; /* device reset by watchdog */ u_int32_t ast_hardware; /* fatal hardware error interrupts */ u_int32_t ast_bmiss; /* beacon miss interrupts */ u_int32_t ast_bmiss_phantom;/* beacon miss interrupts */ u_int32_t ast_bstuck; /* beacon stuck interrupts */ u_int32_t ast_rxorn; /* rx overrun interrupts */ u_int32_t ast_rxeol; /* rx eol interrupts */ u_int32_t ast_txurn; /* tx underrun interrupts */ u_int32_t ast_mib; /* mib interrupts */ u_int32_t ast_intrcoal; /* interrupts coalesced */ u_int32_t ast_tx_packets; /* packet sent on the interface */ u_int32_t ast_tx_mgmt; /* management frames transmitted */ u_int32_t ast_tx_discard; /* frames discarded prior to assoc */ u_int32_t ast_tx_qstop; /* output stopped 'cuz no buffer */ u_int32_t ast_tx_encap; /* tx encapsulation failed */ u_int32_t ast_tx_nonode; /* tx failed 'cuz no node */ u_int32_t ast_tx_nombuf; /* tx failed 'cuz no mbuf */ u_int32_t ast_tx_nomcl; /* tx failed 'cuz no cluster */ u_int32_t ast_tx_linear; /* tx linearized to cluster */ u_int32_t ast_tx_nodata; /* tx discarded empty frame */ u_int32_t ast_tx_busdma; /* tx failed for dma resrcs */ u_int32_t ast_tx_xretries;/* tx failed 'cuz too many retries */ u_int32_t ast_tx_fifoerr; /* tx failed 'cuz FIFO underrun */ u_int32_t ast_tx_filtered;/* tx failed 'cuz xmit filtered */ u_int32_t ast_tx_shortretry;/* tx on-chip retries (short) */ u_int32_t ast_tx_longretry;/* tx on-chip retries (long) */ u_int32_t ast_tx_badrate; /* tx failed 'cuz bogus xmit rate */ u_int32_t ast_tx_noack; /* tx frames with no ack marked */ u_int32_t ast_tx_rts; /* tx frames with rts enabled */ u_int32_t ast_tx_cts; /* tx frames with cts enabled */ u_int32_t ast_tx_shortpre;/* tx frames with short preamble */ u_int32_t ast_tx_altrate; /* tx frames with alternate rate */ u_int32_t ast_tx_protect; /* tx frames with protection */ u_int32_t ast_tx_ctsburst;/* tx frames with cts and bursting */ u_int32_t ast_tx_ctsext; /* tx frames with cts extension */ u_int32_t ast_rx_nombuf; /* rx setup failed 'cuz no mbuf */ u_int32_t ast_rx_busdma; /* rx setup failed for dma resrcs */ u_int32_t ast_rx_orn; /* rx failed 'cuz of desc overrun */ u_int32_t ast_rx_crcerr; /* rx failed 'cuz of bad CRC */ u_int32_t ast_rx_fifoerr; /* rx failed 'cuz of FIFO overrun */ u_int32_t ast_rx_badcrypt;/* rx failed 'cuz decryption */ u_int32_t ast_rx_badmic; /* rx failed 'cuz MIC failure */ u_int32_t ast_rx_phyerr; /* rx failed 'cuz of PHY err */ u_int32_t ast_rx_phy[64]; /* rx PHY error per-code counts */ u_int32_t ast_rx_tooshort;/* rx discarded 'cuz frame too short */ u_int32_t ast_rx_toobig; /* rx discarded 'cuz frame too large */ u_int32_t ast_rx_packets; /* packet recv on the interface */ u_int32_t ast_rx_mgt; /* management frames received */ u_int32_t ast_rx_ctl; /* rx discarded 'cuz ctl frame */ int8_t ast_tx_rssi; /* tx rssi of last ack */ int8_t ast_rx_rssi; /* rx rssi from histogram */ u_int8_t ast_tx_rate; /* IEEE rate of last unicast tx */ u_int32_t ast_be_xmit; /* beacons transmitted */ u_int32_t ast_be_nombuf; /* beacon setup failed 'cuz no mbuf */ u_int32_t ast_per_cal; /* periodic calibration calls */ u_int32_t ast_per_calfail;/* periodic calibration failed */ u_int32_t ast_per_rfgain; /* periodic calibration rfgain reset */ u_int32_t ast_rate_calls; /* rate control checks */ u_int32_t ast_rate_raise; /* rate control raised xmit rate */ u_int32_t ast_rate_drop; /* rate control dropped xmit rate */ u_int32_t ast_ant_defswitch;/* rx/default antenna switches */ u_int32_t ast_ant_txswitch;/* tx antenna switches */ u_int32_t ast_ant_rx[8]; /* rx frames with antenna */ u_int32_t ast_ant_tx[8]; /* tx frames with antenna */ u_int32_t ast_cabq_xmit; /* cabq frames transmitted */ u_int32_t ast_cabq_busy; /* cabq found busy */ u_int32_t ast_tx_raw; /* tx frames through raw api */ u_int32_t ast_ff_txok; /* fast frames tx'd successfully */ u_int32_t ast_ff_txerr; /* fast frames tx'd w/ error */ u_int32_t ast_ff_rx; /* fast frames rx'd */ u_int32_t ast_ff_flush; /* fast frames flushed from staging q */ u_int32_t ast_tx_qfull; /* tx dropped 'cuz of queue limit */ int8_t ast_rx_noise; /* rx noise floor */ u_int32_t ast_tx_nobuf; /* tx dropped 'cuz no ath buffer */ u_int32_t ast_tdma_update;/* TDMA slot timing updates */ u_int32_t ast_tdma_timers;/* TDMA slot update set beacon timers */ u_int32_t ast_tdma_tsf; /* TDMA slot update set TSF */ u_int16_t ast_tdma_tsfadjp;/* TDMA slot adjust+ (usec, smoothed)*/ u_int16_t ast_tdma_tsfadjm;/* TDMA slot adjust- (usec, smoothed)*/ u_int32_t ast_tdma_ack; /* TDMA tx failed 'cuz ACK required */ u_int32_t ast_tx_raw_fail;/* raw tx failed 'cuz h/w down */ u_int32_t ast_tx_nofrag; /* tx dropped 'cuz no ath frag buffer */ u_int32_t ast_be_missed; /* missed beacons */ u_int32_t ast_ani_cal; /* ANI calibrations performed */ u_int32_t ast_rx_agg; /* number of aggregate frames RX'ed */ u_int32_t ast_rx_halfgi; /* RX half-GI */ u_int32_t ast_rx_2040; /* RX 40mhz frame */ u_int32_t ast_rx_pre_crc_err; /* RX pre-delimiter CRC error */ u_int32_t ast_rx_post_crc_err; /* RX post-delimiter CRC error */ u_int32_t ast_rx_decrypt_busy_err; /* RX decrypt engine busy error */ u_int32_t ast_rx_hi_rx_chain; u_int32_t ast_tx_htprotect; /* HT tx frames with protection */ u_int32_t ast_rx_hitqueueend; /* RX hit descr queue end */ u_int32_t ast_tx_timeout; /* Global TX timeout */ u_int32_t ast_tx_cst; /* Carrier sense timeout */ u_int32_t ast_tx_xtxop; /* tx exceeded TXOP */ u_int32_t ast_tx_timerexpired; /* tx exceeded TX_TIMER */ u_int32_t ast_tx_desccfgerr; /* tx desc cfg error */ u_int32_t ast_tx_swretries; /* software TX retries */ u_int32_t ast_tx_swretrymax; /* software TX retry max limit reach */ u_int32_t ast_tx_data_underrun; u_int32_t ast_tx_delim_underrun; u_int32_t ast_tx_aggr_failall; /* aggregate TX failed in its entirety */ u_int32_t ast_tx_getnobuf; u_int32_t ast_tx_getbusybuf; u_int32_t ast_tx_intr; u_int32_t ast_rx_intr; u_int32_t ast_tx_aggr_ok; /* aggregate TX ok */ u_int32_t ast_tx_aggr_fail; /* aggregate TX failed */ u_int32_t ast_tx_mcastq_overflow; /* multicast queue overflow */ u_int32_t ast_rx_keymiss; u_int32_t ast_tx_swfiltered; u_int32_t ast_tx_node_psq_overflow; u_int32_t ast_rx_stbc; /* RX STBC frame */ u_int32_t ast_tx_nodeq_overflow; /* node sw queue overflow */ u_int32_t ast_tx_ldpc; /* TX LDPC frame */ u_int32_t ast_tx_stbc; /* TX STBC frame */ u_int32_t ast_pad[10]; }; #define SIOCGATHSTATS _IOWR('i', 137, struct ifreq) #define SIOCZATHSTATS _IOWR('i', 139, struct ifreq) #define SIOCGATHAGSTATS _IOWR('i', 141, struct ifreq) struct ath_diag { char ad_name[IFNAMSIZ]; /* if name, e.g. "ath0" */ u_int16_t ad_id; #define ATH_DIAG_DYN 0x8000 /* allocate buffer in caller */ #define ATH_DIAG_IN 0x4000 /* copy in parameters */ #define ATH_DIAG_OUT 0x0000 /* copy out results (always) */ #define ATH_DIAG_ID 0x0fff u_int16_t ad_in_size; /* pack to fit, yech */ caddr_t ad_in_data; caddr_t ad_out_data; u_int ad_out_size; }; #define SIOCGATHDIAG _IOWR('i', 138, struct ath_diag) #define SIOCGATHPHYERR _IOWR('i', 140, struct ath_diag) /* * The rate control ioctl has to support multiple potential rate * control classes. For now, instead of trying to support an * abstraction for this in the API, let's just use a TLV * representation for the payload and let userspace sort it out. */ struct ath_rateioctl_tlv { uint16_t tlv_id; uint16_t tlv_len; /* length excluding TLV header */ }; /* * This is purely the six byte MAC address. */ #define ATH_RATE_TLV_MACADDR 0xaab0 /* * The rate control modules may decide to push a mapping table * of rix -> net80211 ratecode as part of the update. */ #define ATH_RATE_TLV_RATETABLE_NENTRIES 64 struct ath_rateioctl_rt { uint16_t nentries; uint16_t pad[1]; uint8_t ratecode[ATH_RATE_TLV_RATETABLE_NENTRIES]; }; #define ATH_RATE_TLV_RATETABLE 0xaab1 /* * This is the sample node statistics structure. * More in ath_rate/sample/sample.h. */ #define ATH_RATE_TLV_SAMPLENODE 0xaab2 struct ath_rateioctl { char if_name[IFNAMSIZ]; /* if name */ union { uint8_t macaddr[IEEE80211_ADDR_LEN]; uint64_t pad; } is_u; uint32_t len; caddr_t buf; }; #define SIOCGATHNODERATESTATS _IOWR('i', 149, struct ath_rateioctl) #define SIOCGATHRATESTATS _IOWR('i', 150, struct ath_rateioctl) /* * Radio capture format. */ #define ATH_RX_RADIOTAP_PRESENT_BASE ( \ (1 << IEEE80211_RADIOTAP_TSFT) | \ (1 << IEEE80211_RADIOTAP_FLAGS) | \ (1 << IEEE80211_RADIOTAP_RATE) | \ (1 << IEEE80211_RADIOTAP_ANTENNA) | \ (1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) | \ (1 << IEEE80211_RADIOTAP_DBM_ANTNOISE) | \ (1 << IEEE80211_RADIOTAP_XCHANNEL) | \ 0) #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT #define ATH_RX_RADIOTAP_PRESENT \ (ATH_RX_RADIOTAP_PRESENT_BASE | \ (1 << IEEE80211_RADIOTAP_VENDOREXT) | \ (1 << IEEE80211_RADIOTAP_EXT) | \ 0) #else #define ATH_RX_RADIOTAP_PRESENT ATH_RX_RADIOTAP_PRESENT_BASE #endif /* ATH_ENABLE_RADIOTAP_PRESENT */ #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT /* * This is higher than the vendor bitmap used inside * the Atheros reference codebase. */ /* Bit 8 */ #define ATH_RADIOTAP_VENDOR_HEADER 8 /* * Using four chains makes all the fields in the * per-chain info header be 4-byte aligned. */ #define ATH_RADIOTAP_MAX_CHAINS 4 /* * AR9380 and later chips are 3x3, which requires * 5 EVM DWORDs in HT40 mode. */ #define ATH_RADIOTAP_MAX_EVM 5 /* * The vendor radiotap header data needs to be: * * + Aligned to a 4 byte address * + .. so all internal fields are 4 bytes aligned; * + .. and no 64 bit fields are allowed. * * So padding is required to ensure this is the case. * * Note that because of the lack of alignment with the * vendor header (6 bytes), the first field must be * two bytes so it can be accessed by alignment-strict * platform (eg MIPS.) */ struct ath_radiotap_vendor_hdr { /* 30 bytes */ uint8_t vh_version; /* 1 */ uint8_t vh_rx_chainmask; /* 1 */ /* At this point it should be 4 byte aligned */ uint32_t evm[ATH_RADIOTAP_MAX_EVM]; /* 5 * 4 = 20 */ - uint8_t rssi_ctl[ATH_RADIOTAP_MAX_CHAINS]; /* 4 */ - uint8_t rssi_ext[ATH_RADIOTAP_MAX_CHAINS]; /* 4 */ + uint8_t rssi_ctl[ATH_RADIOTAP_MAX_CHAINS]; /* 4 * 4 = 16 */ + uint8_t rssi_ext[ATH_RADIOTAP_MAX_CHAINS]; /* 4 * 4 = 16 */ uint8_t vh_phyerr_code; /* Phy error code, or 0xff */ uint8_t vh_rs_status; /* RX status */ uint8_t vh_rssi; /* Raw RSSI */ uint8_t vh_flags; /* General flags */ #define ATH_VENDOR_PKT_RX 0x01 #define ATH_VENDOR_PKT_TX 0x02 #define ATH_VENDOR_PKT_RXPHYERR 0x04 #define ATH_VENDOR_PKT_ISAGGR 0x08 #define ATH_VENDOR_PKT_MOREAGGR 0x10 uint8_t vh_rx_hwrate; /* hardware RX ratecode */ uint8_t vh_rs_flags; /* RX HAL flags */ uint8_t vh_pad[2]; /* pad to DWORD boundary */ } __packed; #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ struct ath_rx_radiotap_header { struct ieee80211_radiotap_header wr_ihdr; #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT /* Vendor extension header bitmap */ uint32_t wr_ext_bitmap; /* 4 */ /* * This padding is needed because: * + the radiotap header is 8 bytes; * + the extension bitmap is 4 bytes; * + the tsf is 8 bytes, so it must start on an 8 byte * boundary. */ uint32_t wr_pad1; #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ /* Normal radiotap fields */ u_int64_t wr_tsf; u_int8_t wr_flags; u_int8_t wr_rate; int8_t wr_antsignal; int8_t wr_antnoise; u_int8_t wr_antenna; u_int8_t wr_pad[3]; u_int32_t wr_chan_flags; u_int16_t wr_chan_freq; u_int8_t wr_chan_ieee; int8_t wr_chan_maxpow; #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT /* * Vendor header section, as required by the * presence of the vendor extension bit and bitmap * entry. * * XXX This must be aligned to a 4 byte address? * XXX or 8 byte address? */ struct ieee80211_radiotap_vendor_header wr_vh; /* 6 bytes */ /* * Because of the lack of alignment enforced by the above * header, this vendor section won't be aligned in any * useful way. So, this will include a two-byte version * value which will force the structure to be 4-byte aligned. */ struct ath_radiotap_vendor_hdr wr_v; #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ } __packed; #define ATH_TX_RADIOTAP_PRESENT ( \ (1 << IEEE80211_RADIOTAP_TSFT) | \ (1 << IEEE80211_RADIOTAP_FLAGS) | \ (1 << IEEE80211_RADIOTAP_RATE) | \ (1 << IEEE80211_RADIOTAP_DBM_TX_POWER) | \ (1 << IEEE80211_RADIOTAP_ANTENNA) | \ (1 << IEEE80211_RADIOTAP_XCHANNEL) | \ 0) struct ath_tx_radiotap_header { struct ieee80211_radiotap_header wt_ihdr; u_int64_t wt_tsf; u_int8_t wt_flags; u_int8_t wt_rate; u_int8_t wt_txpower; u_int8_t wt_antenna; u_int32_t wt_chan_flags; u_int16_t wt_chan_freq; u_int8_t wt_chan_ieee; int8_t wt_chan_maxpow; } __packed; /* * DFS ioctl commands */ #define DFS_SET_THRESH 2 #define DFS_GET_THRESH 3 #define DFS_RADARDETECTS 6 /* * DFS ioctl parameter types */ #define DFS_PARAM_FIRPWR 1 #define DFS_PARAM_RRSSI 2 #define DFS_PARAM_HEIGHT 3 #define DFS_PARAM_PRSSI 4 #define DFS_PARAM_INBAND 5 #define DFS_PARAM_NOL 6 /* XXX not used in FreeBSD */ #define DFS_PARAM_RELSTEP_EN 7 #define DFS_PARAM_RELSTEP 8 #define DFS_PARAM_RELPWR_EN 9 #define DFS_PARAM_RELPWR 10 #define DFS_PARAM_MAXLEN 11 #define DFS_PARAM_USEFIR128 12 #define DFS_PARAM_BLOCKRADAR 13 #define DFS_PARAM_MAXRSSI_EN 14 /* FreeBSD-specific start at 32 */ #define DFS_PARAM_ENABLE 32 #define DFS_PARAM_EN_EXTCH 33 /* * Spectral ioctl parameter types */ #define SPECTRAL_PARAM_FFT_PERIOD 1 #define SPECTRAL_PARAM_SS_PERIOD 2 #define SPECTRAL_PARAM_SS_COUNT 3 #define SPECTRAL_PARAM_SS_SHORT_RPT 4 #define SPECTRAL_PARAM_ENABLED 5 #define SPECTRAL_PARAM_ACTIVE 6 /* * Spectral control parameters */ #define SIOCGATHSPECTRAL _IOWR('i', 151, struct ath_diag) #define SPECTRAL_CONTROL_ENABLE 2 #define SPECTRAL_CONTROL_DISABLE 3 #define SPECTRAL_CONTROL_START 4 #define SPECTRAL_CONTROL_STOP 5 #define SPECTRAL_CONTROL_GET_PARAMS 6 #define SPECTRAL_CONTROL_SET_PARAMS 7 #define SPECTRAL_CONTROL_ENABLE_AT_RESET 8 #define SPECTRAL_CONTROL_DISABLE_AT_RESET 9 #endif /* _DEV_ATH_ATHIOCTL_H */ Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.c (revision 303642) @@ -1,538 +1,558 @@ -/* $NetBSD: cfe_api.c,v 1.5 2005/12/11 12:18:07 christos Exp $ */ -/* from: SiByte Id: cfe_api.c,v 1.16 2002/07/09 23:29:11 cgd Exp $ */ +/* from: Broadcom Id: cfe_api.c,v 1.18 2006/08/24 02:13:56 binh Exp $ */ /*- * Copyright 2000, 2001, 2002 * Broadcom Corporation. All rights reserved. * * This software is furnished under license and may be used and copied only * in accordance with the following terms and conditions. Subject to these * conditions, you may download, copy, install, use, modify and distribute * modified or unmodified copies of this software in source and/or binary * form. No title or ownership is transferred hereby. * * 1) Any source code used, modified or distributed must reproduce and * retain this copyright notice and list of conditions as they appear in * the source file. * * 2) No right is granted to use any trade name, trademark, or logo of * Broadcom Corporation. The "Broadcom Corporation" name may not be * used to endorse or promote products derived from this software * without the prior written permission of Broadcom Corporation. * * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE * FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE * LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* ********************************************************************* * * Broadcom Common Firmware Environment (CFE) * * Device Function stubs File: cfe_api.c * * This module contains device function stubs (small routines to * call the standard "iocb" interface entry point to CFE). * There should be one routine here per iocb function call. * * Authors: Mitch Lichtenberg, Chris Demetriou * ********************************************************************* */ #include __FBSDID("$FreeBSD$"); #include #include /* Cast from a native pointer to a cfe_xptr_t and back. */ #define XPTR_FROM_NATIVE(n) ((cfe_xptr_t) (intptr_t) (n)) #define NATIVE_FROM_XPTR(x) ((void *) (intptr_t) (x)) #ifdef CFE_API_IMPL_NAMESPACE #define cfe_iocb_dispatch(a) __cfe_iocb_dispatch(a) #endif int cfe_iocb_dispatch(cfe_xiocb_t *xiocb); #if defined(CFE_API_common) || defined(CFE_API_ALL) /* * Declare the dispatch function with args of "intptr_t". * This makes sure whatever model we're compiling in * puts the pointers in a single register. For example, * combining -mlong64 and -mips1 or -mips2 would lead to * trouble, since the handle and IOCB pointer will be * passed in two registers each, and CFE expects one. */ static int (*cfe_dispfunc)(intptr_t handle, intptr_t xiocb) = 0; static cfe_xuint_t cfe_handle = 0; int cfe_init(cfe_xuint_t handle, cfe_xuint_t ept) { cfe_dispfunc = NATIVE_FROM_XPTR(ept); cfe_handle = handle; return 0; } int cfe_iocb_dispatch(cfe_xiocb_t *xiocb) { if (!cfe_dispfunc) return -1; return (*cfe_dispfunc)((intptr_t)cfe_handle, (intptr_t)xiocb); } #endif /* CFE_API_common || CFE_API_ALL */ #if defined(CFE_API_close) || defined(CFE_API_ALL) int cfe_close(int handle) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_CLOSE; xiocb.xiocb_status = 0; xiocb.xiocb_handle = handle; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = 0; cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_close || CFE_API_ALL */ #if defined(CFE_API_cpu_start) || defined(CFE_API_ALL) int cfe_cpu_start(int cpu, void (*fn)(void), long sp, long gp, long a1) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_FW_CPUCTL; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_cpuctl_t); xiocb.plist.xiocb_cpuctl.cpu_number = cpu; xiocb.plist.xiocb_cpuctl.cpu_command = CFE_CPU_CMD_START; xiocb.plist.xiocb_cpuctl.gp_val = gp; xiocb.plist.xiocb_cpuctl.sp_val = sp; xiocb.plist.xiocb_cpuctl.a1_val = a1; xiocb.plist.xiocb_cpuctl.start_addr = (long)fn; cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_cpu_start || CFE_API_ALL */ #if defined(CFE_API_cpu_stop) || defined(CFE_API_ALL) int cfe_cpu_stop(int cpu) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_FW_CPUCTL; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_cpuctl_t); xiocb.plist.xiocb_cpuctl.cpu_number = cpu; xiocb.plist.xiocb_cpuctl.cpu_command = CFE_CPU_CMD_STOP; cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_cpu_stop || CFE_API_ALL */ #if defined(CFE_API_enumenv) || defined(CFE_API_ALL) int cfe_enumenv(int idx, char *name, int namelen, char *val, int vallen) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_ENV_ENUM; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_envbuf_t); xiocb.plist.xiocb_envbuf.enum_idx = idx; xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name); xiocb.plist.xiocb_envbuf.name_length = namelen; xiocb.plist.xiocb_envbuf.val_ptr = XPTR_FROM_NATIVE(val); xiocb.plist.xiocb_envbuf.val_length = vallen; cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_enumenv || CFE_API_ALL */ + +#if defined(CFE_API_enumdev) || defined(CFE_API_ALL) +int +cfe_enumdev(int idx, char *name, int namelen) +{ + cfe_xiocb_t xiocb; + + xiocb.xiocb_fcode = CFE_CMD_DEV_ENUM; + xiocb.xiocb_status = 0; + xiocb.xiocb_handle = 0; + xiocb.xiocb_flags = 0; + xiocb.xiocb_psize = sizeof(xiocb_envbuf_t); + xiocb.plist.xiocb_envbuf.enum_idx = idx; + xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name); + xiocb.plist.xiocb_envbuf.name_length = namelen; + + cfe_iocb_dispatch(&xiocb); + + return xiocb.xiocb_status; +} +#endif /* CFE_API_enumdev || CFE_API_ALL */ #if defined(CFE_API_enummem) || defined(CFE_API_ALL) int cfe_enummem(int idx, int flags, cfe_xuint_t *start, cfe_xuint_t *length, cfe_xuint_t *type) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_FW_MEMENUM; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = flags; xiocb.xiocb_psize = sizeof(xiocb_meminfo_t); xiocb.plist.xiocb_meminfo.mi_idx = idx; cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; *start = xiocb.plist.xiocb_meminfo.mi_addr; *length = xiocb.plist.xiocb_meminfo.mi_size; *type = xiocb.plist.xiocb_meminfo.mi_type; return 0; } #endif /* CFE_API_enummem || CFE_API_ALL */ #if defined(CFE_API_exit) || defined(CFE_API_ALL) int cfe_exit(int warm, int status) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_FW_RESTART; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = warm ? CFE_FLG_WARMSTART : 0; xiocb.xiocb_psize = sizeof(xiocb_exitstat_t); xiocb.plist.xiocb_exitstat.status = status; cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_exit || CFE_API_ALL */ #if defined(CFE_API_flushcache) || defined(CFE_API_ALL) int cfe_flushcache(int flg) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_FW_FLUSHCACHE; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = flg; xiocb.xiocb_psize = 0; cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_flushcache || CFE_API_ALL */ #if defined(CFE_API_getdevinfo) || defined(CFE_API_ALL) int cfe_getdevinfo(char *name) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_GETINFO; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_buffer_t); xiocb.plist.xiocb_buffer.buf_offset = 0; xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(name); xiocb.plist.xiocb_buffer.buf_length = cfe_strlen(name); cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; return xiocb.plist.xiocb_buffer.buf_devflags; } #endif /* CFE_API_getdevinfo || CFE_API_ALL */ #if defined(CFE_API_getenv) || defined(CFE_API_ALL) int cfe_getenv(char *name, char *dest, int destlen) { cfe_xiocb_t xiocb; *dest = 0; xiocb.xiocb_fcode = CFE_CMD_ENV_GET; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_envbuf_t); xiocb.plist.xiocb_envbuf.enum_idx = 0; xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name); xiocb.plist.xiocb_envbuf.name_length = cfe_strlen(name); xiocb.plist.xiocb_envbuf.val_ptr = XPTR_FROM_NATIVE(dest); xiocb.plist.xiocb_envbuf.val_length = destlen; cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_getenv || CFE_API_ALL */ #if defined(CFE_API_getfwinfo) || defined(CFE_API_ALL) int cfe_getfwinfo(cfe_fwinfo_t *info) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_FW_GETINFO; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_fwinfo_t); cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; info->fwi_version = xiocb.plist.xiocb_fwinfo.fwi_version; info->fwi_totalmem = xiocb.plist.xiocb_fwinfo.fwi_totalmem; info->fwi_flags = xiocb.plist.xiocb_fwinfo.fwi_flags; info->fwi_boardid = xiocb.plist.xiocb_fwinfo.fwi_boardid; info->fwi_bootarea_va = xiocb.plist.xiocb_fwinfo.fwi_bootarea_va; info->fwi_bootarea_pa = xiocb.plist.xiocb_fwinfo.fwi_bootarea_pa; info->fwi_bootarea_size = xiocb.plist.xiocb_fwinfo.fwi_bootarea_size; #if 0 info->fwi_reserved1 = xiocb.plist.xiocb_fwinfo.fwi_reserved1; info->fwi_reserved2 = xiocb.plist.xiocb_fwinfo.fwi_reserved2; info->fwi_reserved3 = xiocb.plist.xiocb_fwinfo.fwi_reserved3; #endif return 0; } #endif /* CFE_API_getfwinfo || CFE_API_ALL */ #if defined(CFE_API_getstdhandle) || defined(CFE_API_ALL) int cfe_getstdhandle(int flg) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_GETHANDLE; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = flg; xiocb.xiocb_psize = 0; cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; return xiocb.xiocb_handle; } #endif /* CFE_API_getstdhandle || CFE_API_ALL */ #if defined(CFE_API_getticks) || defined(CFE_API_ALL) int64_t #ifdef CFE_API_IMPL_NAMESPACE __cfe_getticks(void) #else cfe_getticks(void) #endif { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_FW_GETTIME; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_time_t); xiocb.plist.xiocb_time.ticks = 0; cfe_iocb_dispatch(&xiocb); return xiocb.plist.xiocb_time.ticks; } #endif /* CFE_API_getticks || CFE_API_ALL */ #if defined(CFE_API_inpstat) || defined(CFE_API_ALL) int cfe_inpstat(int handle) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_INPSTAT; xiocb.xiocb_status = 0; xiocb.xiocb_handle = handle; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_inpstat_t); xiocb.plist.xiocb_inpstat.inp_status = 0; cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; return xiocb.plist.xiocb_inpstat.inp_status; } #endif /* CFE_API_inpstat || CFE_API_ALL */ #if defined(CFE_API_ioctl) || defined(CFE_API_ALL) int cfe_ioctl(int handle, unsigned int ioctlnum, unsigned char *buffer, int length, int *retlen, cfe_xuint_t offset) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_IOCTL; xiocb.xiocb_status = 0; xiocb.xiocb_handle = handle; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_buffer_t); xiocb.plist.xiocb_buffer.buf_offset = offset; xiocb.plist.xiocb_buffer.buf_ioctlcmd = ioctlnum; xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(buffer); xiocb.plist.xiocb_buffer.buf_length = length; cfe_iocb_dispatch(&xiocb); if (retlen) *retlen = xiocb.plist.xiocb_buffer.buf_retlen; return xiocb.xiocb_status; } #endif /* CFE_API_ioctl || CFE_API_ALL */ #if defined(CFE_API_open) || defined(CFE_API_ALL) int cfe_open(char *name) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_OPEN; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_buffer_t); xiocb.plist.xiocb_buffer.buf_offset = 0; xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(name); xiocb.plist.xiocb_buffer.buf_length = cfe_strlen(name); cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; return xiocb.xiocb_handle; } #endif /* CFE_API_open || CFE_API_ALL */ #if defined(CFE_API_read) || defined(CFE_API_ALL) int cfe_read(int handle, unsigned char *buffer, int length) { return cfe_readblk(handle, 0, buffer, length); } #endif /* CFE_API_read || CFE_API_ALL */ #if defined(CFE_API_readblk) || defined(CFE_API_ALL) int cfe_readblk(int handle, cfe_xint_t offset, unsigned char *buffer, int length) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_READ; xiocb.xiocb_status = 0; xiocb.xiocb_handle = handle; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_buffer_t); xiocb.plist.xiocb_buffer.buf_offset = offset; xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(buffer); xiocb.plist.xiocb_buffer.buf_length = length; cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; return xiocb.plist.xiocb_buffer.buf_retlen; } #endif /* CFE_API_readblk || CFE_API_ALL */ #if defined(CFE_API_setenv) || defined(CFE_API_ALL) int cfe_setenv(char *name, char *val) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_ENV_SET; xiocb.xiocb_status = 0; xiocb.xiocb_handle = 0; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_envbuf_t); xiocb.plist.xiocb_envbuf.enum_idx = 0; xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name); xiocb.plist.xiocb_envbuf.name_length = cfe_strlen(name); xiocb.plist.xiocb_envbuf.val_ptr = XPTR_FROM_NATIVE(val); xiocb.plist.xiocb_envbuf.val_length = cfe_strlen(val); cfe_iocb_dispatch(&xiocb); return xiocb.xiocb_status; } #endif /* CFE_API_setenv || CFE_API_ALL */ #if (defined(CFE_API_strlen) || defined(CFE_API_ALL)) \ && !defined(CFE_API_STRLEN_CUSTOM) int cfe_strlen(char *name) { int count = 0; while (*name++) count++; return count; } #endif /* CFE_API_strlen || CFE_API_ALL */ #if defined(CFE_API_write) || defined(CFE_API_ALL) int cfe_write(int handle, unsigned char *buffer, int length) { return cfe_writeblk(handle, 0, buffer, length); } #endif /* CFE_API_write || CFE_API_ALL */ #if defined(CFE_API_writeblk) || defined(CFE_API_ALL) int cfe_writeblk(int handle, cfe_xint_t offset, unsigned char *buffer, int length) { cfe_xiocb_t xiocb; xiocb.xiocb_fcode = CFE_CMD_DEV_WRITE; xiocb.xiocb_status = 0; xiocb.xiocb_handle = handle; xiocb.xiocb_flags = 0; xiocb.xiocb_psize = sizeof(xiocb_buffer_t); xiocb.plist.xiocb_buffer.buf_offset = offset; xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(buffer); xiocb.plist.xiocb_buffer.buf_length = length; cfe_iocb_dispatch(&xiocb); if (xiocb.xiocb_status < 0) return xiocb.xiocb_status; return xiocb.plist.xiocb_buffer.buf_retlen; } #endif /* CFE_API_writeblk || CFE_API_ALL */ Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.h (revision 303642) @@ -1,199 +1,200 @@ -/* $NetBSD: cfe_api.h,v 1.3 2003/02/07 17:38:48 cgd Exp $ */ -/* from: SiByte Id: cfe_api.h,v 1.29 2002/07/09 23:29:11 cgd Exp $ */ +/* from: Broadcom Id: cfe_api.h,v 1.31 2006/08/24 02:13:56 binh Exp $ */ /*- * Copyright 2000, 2001, 2002 * Broadcom Corporation. All rights reserved. * * This software is furnished under license and may be used and copied only * in accordance with the following terms and conditions. Subject to these * conditions, you may download, copy, install, use, modify and distribute * modified or unmodified copies of this software in source and/or binary * form. No title or ownership is transferred hereby. * * 1) Any source code used, modified or distributed must reproduce and * retain this copyright notice and list of conditions as they appear in * the source file. * * 2) No right is granted to use any trade name, trademark, or logo of * Broadcom Corporation. The "Broadcom Corporation" name may not be * used to endorse or promote products derived from this software * without the prior written permission of Broadcom Corporation. * * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE * FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE * LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* ********************************************************************* * * Broadcom Common Firmware Environment (CFE) * * Device function prototypes File: cfe_api.h * * This file contains declarations for doing callbacks to * cfe from an application. It should be the only header * needed by the application to use this library * * Authors: Mitch Lichtenberg, Chris Demetriou * ********************************************************************* */ #ifndef CFE_API_H #define CFE_API_H /* * Apply customizations here for different OSes. These need to: * * typedef uint64_t, int64_t, intptr_t, uintptr_t. * * define cfe_strlen() if use of an existing function is desired. * * define CFE_API_IMPL_NAMESPACE if API functions are to use * names in the implementation namespace. * Also, optionally, if the build environment does not do so automatically, * CFE_API_* can be defined here as desired. */ /* Begin customization. */ #include /* All of the typedefs. */ #include /* strlen() prototype. */ #define CFE_API_ALL #define cfe_strlen(x) strlen(x) /* End customization. */ /* ********************************************************************* * Constants ********************************************************************* */ /* Seal indicating CFE's presence, passed to user program. */ #define CFE_EPTSEAL 0x43464531 #define CFE_MI_RESERVED 0 /* memory is reserved, do not use */ #define CFE_MI_AVAILABLE 1 /* memory is available */ #define CFE_FLG_WARMSTART 0x00000001 #define CFE_FLG_FULL_ARENA 0x00000001 #define CFE_FLG_ENV_PERMANENT 0x00000001 #define CFE_CPU_CMD_START 1 #define CFE_CPU_CMD_STOP 0 #define CFE_STDHANDLE_CONSOLE 0 #define CFE_DEV_NETWORK 1 #define CFE_DEV_DISK 2 #define CFE_DEV_FLASH 3 #define CFE_DEV_SERIAL 4 #define CFE_DEV_CPU 5 #define CFE_DEV_NVRAM 6 #define CFE_DEV_CLOCK 7 #define CFE_DEV_OTHER 8 #define CFE_DEV_MASK 0x0F #define CFE_CACHE_FLUSH_D 1 #define CFE_CACHE_INVAL_I 2 #define CFE_CACHE_INVAL_D 4 #define CFE_CACHE_INVAL_L2 8 #define CFE_FWI_64BIT 0x00000001 #define CFE_FWI_32BIT 0x00000002 #define CFE_FWI_RELOC 0x00000004 #define CFE_FWI_UNCACHED 0x00000008 #define CFE_FWI_MULTICPU 0x00000010 #define CFE_FWI_FUNCSIM 0x00000020 #define CFE_FWI_RTLSIM 0x00000040 typedef struct { int64_t fwi_version; /* major, minor, eco version */ int64_t fwi_totalmem; /* total installed mem */ int64_t fwi_flags; /* various flags */ int64_t fwi_boardid; /* board ID */ int64_t fwi_bootarea_va; /* VA of boot area */ int64_t fwi_bootarea_pa; /* PA of boot area */ int64_t fwi_bootarea_size; /* size of boot area */ } cfe_fwinfo_t; /* * cfe_strlen is handled specially: If already defined, it has been * overridden in this environment with a standard strlen-like function. */ #ifdef cfe_strlen # define CFE_API_STRLEN_CUSTOM #else # ifdef CFE_API_IMPL_NAMESPACE # define cfe_strlen(a) __cfe_strlen(a) # endif int cfe_strlen(char *name); #endif /* * Defines and prototypes for functions which take no arguments. */ #ifdef CFE_API_IMPL_NAMESPACE int64_t __cfe_getticks(void); #define cfe_getticks() __cfe_getticks() #else int64_t cfe_getticks(void); #endif /* * Defines and prototypes for the rest of the functions. */ #ifdef CFE_API_IMPL_NAMESPACE #define cfe_close(a) __cfe_close(a) #define cfe_cpu_start(a,b,c,d,e) __cfe_cpu_start(a,b,c,d,e) #define cfe_cpu_stop(a) __cfe_cpu_stop(a) #define cfe_enumenv(a,b,d,e,f) __cfe_enumenv(a,b,d,e,f) +#define cfe_enumdev(a,b,c) __cfe_enumdev(a,b,c) #define cfe_enummem(a,b,c,d,e) __cfe_enummem(a,b,c,d,e) #define cfe_exit(a,b) __cfe_exit(a,b) #define cfe_flushcache(a) __cfe_cacheflush(a) #define cfe_getdevinfo(a) __cfe_getdevinfo(a) #define cfe_getenv(a,b,c) __cfe_getenv(a,b,c) #define cfe_getfwinfo(a) __cfe_getfwinfo(a) #define cfe_getstdhandle(a) __cfe_getstdhandle(a) #define cfe_init(a,b) __cfe_init(a,b) #define cfe_inpstat(a) __cfe_inpstat(a) #define cfe_ioctl(a,b,c,d,e,f) __cfe_ioctl(a,b,c,d,e,f) #define cfe_open(a) __cfe_open(a) #define cfe_read(a,b,c) __cfe_read(a,b,c) #define cfe_readblk(a,b,c,d) __cfe_readblk(a,b,c,d) #define cfe_setenv(a,b) __cfe_setenv(a,b) #define cfe_write(a,b,c) __cfe_write(a,b,c) #define cfe_writeblk(a,b,c,d) __cfe_writeblk(a,b,c,d) #endif /* CFE_API_IMPL_NAMESPACE */ int cfe_close(int handle); int cfe_cpu_start(int cpu, void (*fn)(void), long sp, long gp, long a1); int cfe_cpu_stop(int cpu); int cfe_enumenv(int idx, char *name, int namelen, char *val, int vallen); +int cfe_enumdev(int idx, char *name, int namelen); int cfe_enummem(int idx, int flags, uint64_t *start, uint64_t *length, uint64_t *type); int cfe_exit(int warm,int status); int cfe_flushcache(int flg); int cfe_getdevinfo(char *name); int cfe_getenv(char *name, char *dest, int destlen); int cfe_getfwinfo(cfe_fwinfo_t *info); int cfe_getstdhandle(int flg); int cfe_init(uint64_t handle,uint64_t ept); int cfe_inpstat(int handle); int cfe_ioctl(int handle, unsigned int ioctlnum, unsigned char *buffer, int length, int *retlen, uint64_t offset); int cfe_open(char *name); int cfe_read(int handle, unsigned char *buffer, int length); int cfe_readblk(int handle, int64_t offset, unsigned char *buffer, int length); int cfe_setenv(char *name, char *val); int cfe_write(int handle, unsigned char *buffer, int length); int cfe_writeblk(int handle, int64_t offset, unsigned char *buffer, int length); #endif /* CFE_API_H */ Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api_int.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api_int.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api_int.h (revision 303642) @@ -1,171 +1,170 @@ -/* $NetBSD: cfe_api_int.h,v 1.2 2003/02/07 17:38:48 cgd Exp $ */ -/* from: SiByte Id: cfe_api_int.h,v 1.21 2002/07/09 23:29:11 cgd Exp $ */ +/* from: Broadcom Id: cfe_api_int.h,v 1.22 2003/02/07 17:27:56 cgd Exp $ */ /*- * Copyright 2000, 2001, 2002 * Broadcom Corporation. All rights reserved. * * This software is furnished under license and may be used and copied only * in accordance with the following terms and conditions. Subject to these * conditions, you may download, copy, install, use, modify and distribute * modified or unmodified copies of this software in source and/or binary * form. No title or ownership is transferred hereby. * * 1) Any source code used, modified or distributed must reproduce and * retain this copyright notice and list of conditions as they appear in * the source file. * * 2) No right is granted to use any trade name, trademark, or logo of * Broadcom Corporation. The "Broadcom Corporation" name may not be * used to endorse or promote products derived from this software * without the prior written permission of Broadcom Corporation. * * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE * FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE * LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* ********************************************************************* * * Broadcom Common Firmware Environment (CFE) * * Device function prototypes File: cfe_api_int.h * * This header defines all internal types and macros for the * library. This is stuff that's not exported to an app * using the library. * * Authors: Mitch Lichtenberg, Chris Demetriou * ********************************************************************* */ #ifndef CFE_API_INT_H #define CFE_API_INT_H /* ********************************************************************* * Constants ********************************************************************* */ #define CFE_CMD_FW_GETINFO 0 #define CFE_CMD_FW_RESTART 1 #define CFE_CMD_FW_BOOT 2 #define CFE_CMD_FW_CPUCTL 3 #define CFE_CMD_FW_GETTIME 4 #define CFE_CMD_FW_MEMENUM 5 #define CFE_CMD_FW_FLUSHCACHE 6 #define CFE_CMD_DEV_GETHANDLE 9 #define CFE_CMD_DEV_ENUM 10 #define CFE_CMD_DEV_OPEN 11 #define CFE_CMD_DEV_INPSTAT 12 #define CFE_CMD_DEV_READ 13 #define CFE_CMD_DEV_WRITE 14 #define CFE_CMD_DEV_IOCTL 15 #define CFE_CMD_DEV_CLOSE 16 #define CFE_CMD_DEV_GETINFO 17 #define CFE_CMD_ENV_ENUM 20 #define CFE_CMD_ENV_GET 22 #define CFE_CMD_ENV_SET 23 #define CFE_CMD_ENV_DEL 24 #define CFE_CMD_MAX 32 #define CFE_CMD_VENDOR_USE 0x8000 /* codes above this are for customer use */ /* ********************************************************************* * Structures ********************************************************************* */ typedef uint64_t cfe_xuint_t; typedef int64_t cfe_xint_t; typedef int64_t cfe_xptr_t; typedef struct xiocb_buffer_s { cfe_xuint_t buf_offset; /* offset on device (bytes) */ cfe_xptr_t buf_ptr; /* pointer to a buffer */ cfe_xuint_t buf_length; /* length of this buffer */ cfe_xuint_t buf_retlen; /* returned length (for read ops) */ cfe_xuint_t buf_ioctlcmd; /* IOCTL command (used only for IOCTLs) */ } xiocb_buffer_t; #define buf_devflags buf_ioctlcmd /* returned device info flags */ typedef struct xiocb_inpstat_s { cfe_xuint_t inp_status; /* 1 means input available */ } xiocb_inpstat_t; typedef struct xiocb_envbuf_s { cfe_xint_t enum_idx; /* 0-based enumeration index */ cfe_xptr_t name_ptr; /* name string buffer */ cfe_xint_t name_length; /* size of name buffer */ cfe_xptr_t val_ptr; /* value string buffer */ cfe_xint_t val_length; /* size of value string buffer */ } xiocb_envbuf_t; typedef struct xiocb_cpuctl_s { cfe_xuint_t cpu_number; /* cpu number to control */ cfe_xuint_t cpu_command; /* command to issue to CPU */ cfe_xuint_t start_addr; /* CPU start address */ cfe_xuint_t gp_val; /* starting GP value */ cfe_xuint_t sp_val; /* starting SP value */ cfe_xuint_t a1_val; /* starting A1 value */ } xiocb_cpuctl_t; typedef struct xiocb_time_s { cfe_xint_t ticks; /* current time in ticks */ } xiocb_time_t; typedef struct xiocb_exitstat_s { cfe_xint_t status; } xiocb_exitstat_t; typedef struct xiocb_meminfo_s { cfe_xint_t mi_idx; /* 0-based enumeration index */ cfe_xint_t mi_type; /* type of memory block */ cfe_xuint_t mi_addr; /* physical start address */ cfe_xuint_t mi_size; /* block size */ } xiocb_meminfo_t; typedef struct xiocb_fwinfo_s { cfe_xint_t fwi_version; /* major, minor, eco version */ cfe_xint_t fwi_totalmem; /* total installed mem */ cfe_xint_t fwi_flags; /* various flags */ cfe_xint_t fwi_boardid; /* board ID */ cfe_xint_t fwi_bootarea_va; /* VA of boot area */ cfe_xint_t fwi_bootarea_pa; /* PA of boot area */ cfe_xint_t fwi_bootarea_size; /* size of boot area */ cfe_xint_t fwi_reserved1; cfe_xint_t fwi_reserved2; cfe_xint_t fwi_reserved3; } xiocb_fwinfo_t; typedef struct cfe_xiocb_s { cfe_xuint_t xiocb_fcode; /* IOCB function code */ cfe_xint_t xiocb_status; /* return status */ cfe_xint_t xiocb_handle; /* file/device handle */ cfe_xuint_t xiocb_flags; /* flags for this IOCB */ cfe_xuint_t xiocb_psize; /* size of parameter list */ union { xiocb_buffer_t xiocb_buffer; /* buffer parameters */ xiocb_inpstat_t xiocb_inpstat; /* input status parameters */ xiocb_envbuf_t xiocb_envbuf; /* environment function parameters */ xiocb_cpuctl_t xiocb_cpuctl; /* CPU control parameters */ xiocb_time_t xiocb_time; /* timer parameters */ xiocb_meminfo_t xiocb_meminfo; /* memory arena info parameters */ xiocb_fwinfo_t xiocb_fwinfo; /* firmware information */ xiocb_exitstat_t xiocb_exitstat; /* Exit Status */ } plist; } cfe_xiocb_t; #endif /* CFE_API_INT_H */ Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_error.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_error.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_error.h (revision 303642) @@ -1,104 +1,103 @@ -/* $NetBSD: cfe_error.h,v 1.2 2003/02/07 17:38:48 cgd Exp $ */ -/* from: SiByte Id: cfe_error.h,v 1.2 2002/07/09 19:37:52 cgd Exp $ */ +/* from: Broadcom Id: cfe_error.h,v 1.3 2003/02/07 17:27:56 cgd Exp $ */ /*- * Copyright 2000, 2001, 2002 * Broadcom Corporation. All rights reserved. * * This software is furnished under license and may be used and copied only * in accordance with the following terms and conditions. Subject to these * conditions, you may download, copy, install, use, modify and distribute * modified or unmodified copies of this software in source and/or binary * form. No title or ownership is transferred hereby. * * 1) Any source code used, modified or distributed must reproduce and * retain this copyright notice and list of conditions as they appear in * the source file. * * 2) No right is granted to use any trade name, trademark, or logo of * Broadcom Corporation. The "Broadcom Corporation" name may not be * used to endorse or promote products derived from this software * without the prior written permission of Broadcom Corporation. * * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE * FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE * LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* ********************************************************************* * * Broadcom Common Firmware Environment (CFE) * * Error codes File: cfe_error.h * * CFE's global error code list is here. * * Author: Mitch Lichtenberg * ********************************************************************* */ #define CFE_OK 0 #define CFE_ERR -1 /* generic error */ #define CFE_ERR_INV_COMMAND -2 #define CFE_ERR_EOF -3 #define CFE_ERR_IOERR -4 #define CFE_ERR_NOMEM -5 #define CFE_ERR_DEVNOTFOUND -6 #define CFE_ERR_DEVOPEN -7 #define CFE_ERR_INV_PARAM -8 #define CFE_ERR_ENVNOTFOUND -9 #define CFE_ERR_ENVREADONLY -10 #define CFE_ERR_NOTELF -11 #define CFE_ERR_NOT32BIT -12 #define CFE_ERR_WRONGENDIAN -13 #define CFE_ERR_BADELFVERS -14 #define CFE_ERR_NOTMIPS -15 #define CFE_ERR_BADELFFMT -16 #define CFE_ERR_BADADDR -17 #define CFE_ERR_FILENOTFOUND -18 #define CFE_ERR_UNSUPPORTED -19 #define CFE_ERR_HOSTUNKNOWN -20 #define CFE_ERR_TIMEOUT -21 #define CFE_ERR_PROTOCOLERR -22 #define CFE_ERR_NETDOWN -23 #define CFE_ERR_NONAMESERVER -24 #define CFE_ERR_NOHANDLES -25 #define CFE_ERR_ALREADYBOUND -26 #define CFE_ERR_CANNOTSET -27 #define CFE_ERR_NOMORE -28 #define CFE_ERR_BADFILESYS -29 #define CFE_ERR_FSNOTAVAIL -30 #define CFE_ERR_INVBOOTBLOCK -31 #define CFE_ERR_WRONGDEVTYPE -32 #define CFE_ERR_BBCHECKSUM -33 #define CFE_ERR_BOOTPROGCHKSUM -34 #define CFE_ERR_LDRNOTAVAIL -35 #define CFE_ERR_NOTREADY -36 #define CFE_ERR_GETMEM -37 #define CFE_ERR_SETMEM -38 #define CFE_ERR_NOTCONN -39 #define CFE_ERR_ADDRINUSE -40 Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_ioctl.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_ioctl.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_ioctl.h (revision 303642) @@ -1,75 +1,166 @@ -/* $NetBSD: cfe_ioctl.h,v 1.2 2003/02/07 17:52:08 cgd Exp $ */ - /*- - * Copyright 2000, 2001 + * Copyright 2000, 2001, 2002, 2003 * Broadcom Corporation. All rights reserved. * - * This software is furnished under license and may be used and copied only - * in accordance with the following terms and conditions. Subject to these - * conditions, you may download, copy, install, use, modify and distribute - * modified or unmodified copies of this software in source and/or binary - * form. No title or ownership is transferred hereby. + * This software is furnished under license and may be used and + * copied only in accordance with the following terms and + * conditions. Subject to these conditions, you may download, + * copy, install, use, modify and distribute modified or unmodified + * copies of this software in source and/or binary form. No title + * or ownership is transferred hereby. * * 1) Any source code used, modified or distributed must reproduce and * retain this copyright notice and list of conditions as they appear in * the source file. * * 2) No right is granted to use any trade name, trademark, or logo of * Broadcom Corporation. The "Broadcom Corporation" name may not be * used to endorse or promote products derived from this software * without the prior written permission of Broadcom Corporation. * * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE * FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE * LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* ********************************************************************* * Broadcom Common Firmware Environment (CFE) * * IOCTL definitions File: cfe_ioctl.h * * IOCTL function numbers and I/O data structures. * - * Author: Mitch Lichtenberg (mpl@broadcom.com) + * Author: Mitch Lichtenberg * ********************************************************************* */ /* ********************************************************************* * NVFAM and FLASH stuff ********************************************************************* */ #define IOCTL_NVRAM_GETINFO 1 /* return nvram_info_t */ #define IOCTL_NVRAM_ERASE 2 /* erase sector containing nvram_info_t area */ #define IOCTL_FLASH_ERASE_SECTOR 3 /* erase an arbitrary sector */ -#define IOCTL_FLASH_ERASE_ALL 4 /* Erase the entire flash */ +#define IOCTL_FLASH_ERASE_ALL 4 /* Erase the entire flash */ +#define IOCTL_FLASH_WRITE_ALL 5 /* write entire flash */ +#define IOCTL_FLASH_GETINFO 6 /* get flash device info */ +#define IOCTL_FLASH_GETSECTORS 7 /* get sector information */ +#define IOCTL_FLASH_ERASE_RANGE 8 /* erase range of bytes */ +#define IOCTL_NVRAM_UNLOCK 9 /* allow r/w beyond logical end of device */ +#define IOCTL_FLASH_PROTECT_RANGE 10 /* Protect a group of sectors */ +#define IOCTL_FLASH_UNPROTECT_RANGE 11 /* unprotect a group of sectors */ +#define IOCTL_FLASH_DATA_WIDTH_MODE 12 /* switch flash and gen bus to support 8 or 16-bit mode I/Os */ +#define IOCTL_FLASH_BURST_MODE 13 /* configure gen bus for burst mode */ +typedef struct flash_range_s { + unsigned int range_base; + unsigned int range_length; +} flash_range_t; + +typedef struct flash_info_s { + unsigned long long flash_base; /* flash physical base address */ + unsigned int flash_size; /* available device size in bytes */ + unsigned int flash_type; /* type, from FLASH_TYPE below */ + unsigned int flash_flags; /* Various flags (FLASH_FLAG_xxx) */ +} flash_info_t; + +typedef struct flash_sector_s { + int flash_sector_idx; + int flash_sector_status; + unsigned int flash_sector_offset; + unsigned int flash_sector_size; +} flash_sector_t; + +#define FLASH_SECTOR_OK 0 +#define FLASH_SECTOR_INVALID -1 + +#define FLASH_TYPE_UNKNOWN 0 /* not sure what kind of flash */ +#define FLASH_TYPE_SRAM 1 /* not flash: it's SRAM */ +#define FLASH_TYPE_ROM 2 /* not flash: it's ROM */ +#define FLASH_TYPE_FLASH 3 /* it's flash memory of some sort */ + +#define FLASH_FLAG_NOERASE 1 /* Byte-range writes supported, + Erasing is not necessary */ + typedef struct nvram_info_s { - int nvram_offset; /* offset of environment area */ - int nvram_size; /* size of environment area */ - int nvram_eraseflg; /* true if we need to erase first */ + int nvram_offset; /* offset of environment area */ + int nvram_size; /* size of environment area */ + int nvram_eraseflg; /* true if we need to erase first */ } nvram_info_t; /* ********************************************************************* * Ethernet stuff ********************************************************************* */ -#define IOCTL_ETHER_GETHWADDR 1 +#define IOCTL_ETHER_GETHWADDR 1 /* Get hardware address (6bytes) */ +#define IOCTL_ETHER_SETHWADDR 2 /* Set hardware address (6bytes) */ +#define IOCTL_ETHER_GETSPEED 3 /* Get Speed and Media (int) */ +#define IOCTL_ETHER_SETSPEED 4 /* Set Speed and Media (int) */ +#define IOCTL_ETHER_GETLINK 5 /* get link status (int) */ +#define IOCTL_ETHER_GETLOOPBACK 7 /* get loopback state */ +#define IOCTL_ETHER_SETLOOPBACK 8 /* set loopback state */ +#define IOCTL_ETHER_SETPACKETFIFO 9 /* set packet fifo mode (int) */ +#define IOCTL_ETHER_SETSTROBESIG 10 /* set strobe signal (int) */ +#define ETHER_LOOPBACK_OFF 0 /* no loopback */ +#define ETHER_LOOPBACK_INT 1 /* Internal loopback */ +#define ETHER_LOOPBACK_EXT 2 /* External loopback (through PHY) */ + +#define ETHER_SPEED_AUTO 0 /* Auto detect */ +#define ETHER_SPEED_UNKNOWN 0 /* Speed not known (on link status) */ +#define ETHER_SPEED_10HDX 1 /* 10MB hdx and fdx */ +#define ETHER_SPEED_10FDX 2 +#define ETHER_SPEED_100HDX 3 /* 100MB hdx and fdx */ +#define ETHER_SPEED_100FDX 4 +#define ETHER_SPEED_1000HDX 5 /* 1000MB hdx and fdx */ +#define ETHER_SPEED_1000FDX 6 + +#define ETHER_FIFO_8 0 /* 8-bit packet fifo mode */ +#define ETHER_FIFO_16 1 /* 16-bit packet fifo mode */ +#define ETHER_ETHER 2 /* Standard ethernet mode */ + +#define ETHER_STROBE_GMII 0 /* GMII style strobe signal */ +#define ETHER_STROBE_ENCODED 1 /* Encoded */ +#define ETHER_STROBE_SOP 2 /* SOP flagged. Only in 8-bit mode*/ +#define ETHER_STROBE_EOP 3 /* EOP flagged. Only in 8-bit mode*/ + /* ********************************************************************* + * Serial Ports + ********************************************************************* */ + +#define IOCTL_SERIAL_SETSPEED 1 /* get baud rate (int) */ +#define IOCTL_SERIAL_GETSPEED 2 /* set baud rate (int) */ +#define IOCTL_SERIAL_SETFLOW 3 /* Set Flow Control */ +#define IOCTL_SERIAL_GETFLOW 4 /* Get Flow Control */ + +#define SERIAL_FLOW_NONE 0 /* no flow control */ +#define SERIAL_FLOW_SOFTWARE 1 /* software flow control (not impl) */ +#define SERIAL_FLOW_HARDWARE 2 /* hardware flow control */ + +/* ********************************************************************* * Block device stuff ********************************************************************* */ -#define IOCTL_BLOCK_GETBLOCKSIZE 1 -#define IOCTL_BLOCK_GETTOTALBLOCKS 2 +#define IOCTL_BLOCK_GETBLOCKSIZE 1 /* get block size (int) */ +#define IOCTL_BLOCK_GETTOTALBLOCKS 2 /* get total bocks (long long) */ +#define IOCTL_BLOCK_GETDEVTYPE 3 /* get device type (struct) */ + +typedef struct blockdev_info_s { + unsigned long long blkdev_totalblocks; + unsigned int blkdev_blocksize; + unsigned int blkdev_devtype; +} blockdev_info_t; + +#define BLOCK_DEVTYPE_DISK 0 +#define BLOCK_DEVTYPE_CDROM 1 Index: user/alc/PQ_LAUNDRY/sys/dev/cxgbe/t4_main.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/cxgbe/t4_main.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/cxgbe/t4_main.c (revision 303642) @@ -1,9552 +1,9530 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #if defined(__i386__) || defined(__amd64__) #include #include #endif #ifdef DDB #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "t4_ioctl.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #include "t4_if.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static int t4_ready(device_t); static int t4_read_port_unit(device_t, int, int *); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_unit, t4_read_port_unit), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); static device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; /* T4 VI (vcxgbe) interface */ static int vcxgbe_probe(device_t); static int vcxgbe_attach(device_t); static int vcxgbe_detach(device_t); static device_method_t vcxgbe_methods[] = { DEVMETHOD(device_probe, vcxgbe_probe), DEVMETHOD(device_attach, vcxgbe_attach), DEVMETHOD(device_detach, vcxgbe_detach), { 0, 0 } }; static driver_t vcxgbe_driver = { "vcxgbe", vcxgbe_methods, sizeof(struct vi_info) }; static d_ioctl_t t4_ioctl; -static d_open_t t4_open; -static d_close_t t4_close; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, - .d_flags = 0, - .d_open = t4_open, - .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_unit, t4_read_port_unit), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; /* T5 VI (vcxl) interface */ static driver_t vcxl_driver = { "vcxl", vcxgbe_methods, sizeof(struct vi_info) }; -static struct cdevsw t5_cdevsw = { - .d_version = D_VERSION, - .d_flags = 0, - .d_open = t4_open, - .d_close = t4_close, - .d_ioctl = t4_ioctl, - .d_name = "t5nex", -}; - /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct sx t4_list_lock; SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct sx t4_uld_list_lock; SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -1 as an indication to tweak_tunables() that it should * provide a reasonable default when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, 10G and 1G, NIC and offload. */ #define NTXQ_10G 16 static int t4_ntxq10g = -1; TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g); #define NRXQ_10G 8 static int t4_nrxq10g = -1; TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g); #define NTXQ_1G 4 static int t4_ntxq1g = -1; TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g); #define NRXQ_1G 2 static int t4_nrxq1g = -1; TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g); #define NTXQ_VI 1 static int t4_ntxq_vi = -1; TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi); #define NRXQ_VI 1 static int t4_nrxq_vi = -1; TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #ifdef TCP_OFFLOAD #define NOFLDTXQ_10G 8 static int t4_nofldtxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g); #define NOFLDRXQ_10G 2 static int t4_nofldrxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g); #define NOFLDTXQ_1G 2 static int t4_nofldtxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g); #define NOFLDRXQ_1G 1 static int t4_nofldrxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g); #define NOFLDTXQ_VI 1 static int t4_nofldtxq_vi = -1; TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi); #define NOFLDRXQ_VI 1 static int t4_nofldrxq_vi = -1; TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi); #endif #ifdef DEV_NETMAP #define NNMTXQ_VI 2 static int t4_nnmtxq_vi = -1; TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi); #define NNMRXQ_VI 2 static int t4_nnmrxq_vi = -1; TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi); #endif /* * Holdoff parameters for 10G and 1G ports. */ #define TMR_IDX_10G 1 static int t4_tmr_idx_10g = TMR_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g); #define PKTC_IDX_10G (-1) static int t4_pktc_idx_10g = PKTC_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g); #define TMR_IDX_1G 1 static int t4_tmr_idx_1g = TMR_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g); #define PKTC_IDX_1G (-1) static int t4_pktc_idx_1g = PKTC_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g); /* * Size (# of entries) of each tx and rx queue. */ static unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); static unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. */ #define DEFAULT_CF "default" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively). * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them. * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water * mark or when signalled to do so, 0 to never emit PAUSE. */ static int t4_pause_settings = PAUSE_TX | PAUSE_RX; TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_nbmcaps_allowed = 0; TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed); static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS | FW_CAPS_CONFIG_SWITCH_EGRESS; TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = -1; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_tlscaps_allowed = 0; TUNABLE_INT("hw.cxgbe.tlscaps_allowed", &t4_tlscaps_allowed); static int t4_iscsicaps_allowed = -1; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 0; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); /* Functions used by extra VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_OFLD, FW_VI_FUNC_IWARP, FW_VI_FUNC_OPENISCSI, FW_VI_FUNC_OPENFCOE, FW_VI_FUNC_FOISCSI, FW_VI_FUNC_FOFCOE, }; struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t nirq; /* Total # of vectors */ uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */ uint16_t intr_flags_1g; /* Interrupt flags for each 1G port */ uint16_t ntxq10g; /* # of NIC txq's for each 10G port */ uint16_t nrxq10g; /* # of NIC rxq's for each 10G port */ uint16_t ntxq1g; /* # of NIC txq's for each 1G port */ uint16_t nrxq1g; /* # of NIC rxq's for each 1G port */ uint16_t rsrv_noflowq; /* Flag whether to reserve queue 0 */ uint16_t nofldtxq10g; /* # of TOE txq's for each 10G port */ uint16_t nofldrxq10g; /* # of TOE rxq's for each 10G port */ uint16_t nofldtxq1g; /* # of TOE txq's for each 1G port */ uint16_t nofldrxq1g; /* # of TOE rxq's for each 1G port */ /* The vcxgbe/vcxl interfaces use these and not the ones above. */ uint16_t ntxq_vi; /* # of NIC txq's */ uint16_t nrxq_vi; /* # of NIC rxq's */ uint16_t nofldtxq_vi; /* # of TOE txq's */ uint16_t nofldrxq_vi; /* # of TOE rxq's */ uint16_t nnmtxq_vi; /* # of netmap txq's */ uint16_t nnmrxq_vi; /* # of netmap rxq's */ }; struct filter_entry { uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; static int map_bars_0_and_4(struct adapter *); static int map_bar_2(struct adapter *); static void setup_memwin(struct adapter *); static void position_memwin(struct adapter *, int, uint32_t); static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int); static inline int write_via_memwin(struct adapter *, int, uint32_t, const uint32_t *, int); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static int fixup_devlog_params(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, int, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); static int setup_intr_handlers(struct adapter *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static void t4_sysctls(struct adapter *); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #endif #ifdef TCP_OFFLOAD static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS); static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS); #endif static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t mode_to_iconf(uint32_t); static int check_fspec_against_fconf_iconf(struct adapter *, struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); static inline uint64_t get_filter_hits(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); static int set_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); static int set_sched_class(struct adapter *, struct t4_sched_params *); static int set_sched_queue(struct adapter *, struct t4_sched_queue *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); static int notify_siblings(device_t, int); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, {0x5406, "Chelsio T540-CH"}, {0x5408, "Chelsio T520-CX"}, {0x540b, "Chelsio B520-SR"}, {0x540c, "Chelsio B504-BT"}, {0x540f, "Chelsio Amsterdam"}, {0x5413, "Chelsio T580-CHR"}, #endif }; #ifdef TCP_OFFLOAD /* * service_iq() has an iq and needs the fl. Offset of fl from the iq should be * exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void t5_attribute_workaround(device_t dev) { device_t root_port; uint32_t v; /* * The T5 chips do not properly echo the No Snoop and Relaxed * Ordering attributes when replying to a TLP from a Root * Port. As a workaround, find the parent Root Port and * disable No Snoop and Relaxed Ordering. Note that this * affects all devices under this root port. */ root_port = pci_find_pcie_root_port(dev); if (root_port == NULL) { device_printf(dev, "Unable to find parent root port\n"); return; } v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL, PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2); if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) != 0) device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n", device_get_nameunit(root_port)); } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, j, n10g, n1g, rqidx, tqidx; + struct make_dev_args mda; struct intrs_and_queues iaq; struct sge *s; uint8_t *buf; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif int num_vis; sc = device_get_softc(dev); sc->dev = dev; TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags); if ((pci_get_device(dev) & 0xff00) == 0x5400) t5_attribute_workaround(dev); pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5); } sc->traceq = -1; mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF); snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer", device_get_nameunit(dev)); snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); sx_xlock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); sx_xunlock(&t4_list_lock); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); rc = map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI)); sc->mbox = sc->pf; memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); /* Prepare the adapter for operation. */ buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_prep_adapter(sc, buf); free(buf, M_CXGBE); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); if (t4_init_devlog_params(sc, 0) == 0) fixup_devlog_params(sc); - sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw, - device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s", - device_get_nameunit(dev)); - if (sc->cdev == NULL) - device_printf(dev, "failed to create nexus char device.\n"); - else - sc->cdev->si_drv1 = sc; + make_dev_args_init(&mda); + mda.mda_devsw = &t4_cdevsw; + mda.mda_uid = UID_ROOT; + mda.mda_gid = GID_WHEEL; + mda.mda_mode = 0600; + mda.mda_si_drv1 = sc; + rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); + if (rc != 0) + device_printf(dev, "failed to create nexus char device: %d.\n", + rc); /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } #if defined(__i386__) if ((cpu_feature & CPUID_CX8) == 0) { device_printf(dev, "64 bit atomics not available.\n"); rc = ENOTSUP; goto done; } #endif /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * Number of VIs to create per-port. The first VI is the "main" regular * VI for the port. The rest are additional virtual interfaces on the * same physical port. Note that the main VI does not have native * netmap support but the extra VIs do. * * Limit the number of VIs per port to the number of available * MAC addresses per port. */ if (t4_num_vis >= 1) num_vis = t4_num_vis; else num_vis = 1; if (num_vis > nitems(vi_mac_funcs)) { num_vis = nitems(vi_mac_funcs); device_printf(dev, "Number of VIs limited to %d\n", num_vis); } /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. We also figure * out whether a port is 10G or 1G and use that information when * calculating how many interrupts to attempt to allocate. */ n10g = n1g = 0; for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* * XXX: vi[0] is special so we can't delay this allocation until * pi->nvi's final value is known. */ pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE, M_ZERO | M_WAITOK); /* * Allocate the "main" VI and initialize parameters * like mac addr. */ rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX); pi->link_cfg.requested_fc |= t4_pause_settings; pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX); pi->link_cfg.fc |= t4_pause_settings; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, &pi->link_cfg); if (rc != 0) { device_printf(dev, "port %d l1cfg failed: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; pi->tc = malloc(sizeof(struct tx_sched_class) * sc->chip_params->nsched_cls, M_CXGBE, M_ZERO | M_WAITOK); if (is_10G_port(pi) || is_40G_port(pi)) { n10g++; } else { n1g++; } pi->linkdnrc = -1; pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq); if (rc != 0) goto done; /* error message displayed already */ if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0) num_vis = 1; sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; if (num_vis > 1) { s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi; s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi; } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g; s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g; if (num_vis > 1) { s->nofldrxq += (n10g + n1g) * (num_vis - 1) * iaq.nofldrxq_vi; s->nofldtxq += (n10g + n1g) * (num_vis - 1) * iaq.nofldtxq_vi; } s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef DEV_NETMAP if (num_vis > 1) { s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi; s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi; } s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq), M_CXGBE, M_ZERO | M_WAITOK); #endif s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #ifdef TCP_OFFLOAD ofld_rqidx = ofld_tqidx = 0; #endif #ifdef DEV_NETMAP nm_rqidx = nm_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; struct vi_info *vi; if (pi == NULL) continue; pi->nvi = num_vis; for_each_vi(pi, j, vi) { vi->pi = pi; vi->qsize_rxq = t4_qsize_rxq; vi->qsize_txq = t4_qsize_txq; vi->first_rxq = rqidx; vi->first_txq = tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { vi->tmr_idx = t4_tmr_idx_10g; vi->pktc_idx = t4_pktc_idx_10g; vi->flags |= iaq.intr_flags_10g & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi; } else { vi->tmr_idx = t4_tmr_idx_1g; vi->pktc_idx = t4_pktc_idx_1g; vi->flags |= iaq.intr_flags_1g & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi; } rqidx += vi->nrxq; tqidx += vi->ntxq; if (j == 0 && vi->ntxq > 1) vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0; else vi->rsrv_noflowq = 0; #ifdef TCP_OFFLOAD vi->first_ofld_rxq = ofld_rqidx; vi->first_ofld_txq = ofld_tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ; vi->nofldrxq = j == 0 ? iaq.nofldrxq10g : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq10g : iaq.nofldtxq_vi; } else { vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ; vi->nofldrxq = j == 0 ? iaq.nofldrxq1g : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq1g : iaq.nofldtxq_vi; } ofld_rqidx += vi->nofldrxq; ofld_tqidx += vi->nofldtxq; #endif #ifdef DEV_NETMAP if (j > 0) { vi->first_nm_rxq = nm_rqidx; vi->first_nm_txq = nm_tqidx; vi->nnmrxq = iaq.nnmrxq_vi; vi->nnmtxq = iaq.nnmtxq_vi; nm_rqidx += vi->nnmrxq; nm_tqidx += vi->nnmtxq; } #endif } } rc = setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.speed, sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); notify_siblings(dev, 0); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach(dev); else t4_sysctls(sc); return (rc); } static int t4_ready(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); if (sc->flags & FW_OK) return (0); return (ENXIO); } static int t4_read_port_unit(device_t dev, int port, int *unit) { struct adapter *sc; struct port_info *pi; sc = device_get_softc(dev); if (port < 0 || port >= MAX_NPORTS) return (EINVAL); pi = sc->port[port]; if (pi == NULL || pi->dev == NULL) return (ENXIO); *unit = device_get_unit(pi->dev); return (0); } static int notify_siblings(device_t dev, int detaching) { device_t sibling; int error, i; error = 0; for (i = 0; i < PCI_FUNCMAX; i++) { if (i == pci_get_function(dev)) continue; sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), i); if (sibling == NULL || !device_is_attached(sibling)) continue; if (detaching) error = T4_DETACH_CHILD(sibling); else (void)T4_ATTACH_CHILD(sibling); if (error) break; } return (error); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); rc = notify_siblings(dev, 1); if (rc) { device_printf(dev, "failed to detach sibling devices: %d\n", rc); return (rc); } if (sc->flags & FULL_INIT_DONE) t4_intr_disable(sc); if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); free(pi->tc, M_CXGBE); free(pi, M_CXGBE); } } if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if (sc->flags & FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); free(sc->sge.ofld_txq, M_CXGBE); #endif #ifdef DEV_NETMAP free(sc->sge.nm_rxq, M_CXGBE); free(sc->sge.nm_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { sx_xlock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); sx_xunlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); if (mtx_initialized(&sc->ifp_lock)) mtx_destroy(&sc->ifp_lock); if (mtx_initialized(&sc->reg_lock)) mtx_destroy(&sc->reg_lock); for (i = 0; i < NUM_MEMWIN; i++) { struct memwin *mw = &sc->memwin[i]; if (rw_initialized(&mw->mw_lock)) rw_destroy(&mw->mw_lock); } bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_vi_attach(device_t dev, struct vi_info *vi) { struct ifnet *ifp; struct sbuf *sb; vi->xact_addr_filt = -1; callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } vi->ifp = ifp; ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; /* Initialize ifmedia for this VI */ ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); build_medialist(vi->pi, &vi->media); vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP if (vi->nnmrxq != 0) cxgbe_nm_attach(vi); #endif sb = sbuf_new_auto(); sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #ifdef TCP_OFFLOAD if (ifp->if_capabilities & IFCAP_TOE) sbuf_printf(sb, "; %d txq, %d rxq (TOE)", vi->nofldtxq, vi->nofldrxq); #endif #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) sbuf_printf(sb, "; %d txq, %d rxq (netmap)", vi->nnmtxq, vi->nnmrxq); #endif sbuf_finish(sb); device_printf(dev, "%s\n", sbuf_data(sb)); sbuf_delete(sb); vi_sysctls(vi); return (0); } static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct vi_info *vi; int i, rc; callout_init_mtx(&pi->tick, &pi->pi_lock, 0); rc = cxgbe_vi_attach(dev, &pi->vi[0]); if (rc) return (rc); for_each_vi(pi, i, vi) { if (i == 0) continue; vi->dev = device_add_child(dev, is_t4(pi->adapter) ? "vcxgbe" : "vcxl", -1); if (vi->dev == NULL) { device_printf(dev, "failed to add VI %d\n", i); continue; } device_set_softc(vi->dev, vi); } cxgbe_sysctls(pi); bus_generic_attach(dev); return (0); } static void cxgbe_vi_detach(struct vi_info *vi) { struct ifnet *ifp = vi->ifp; ether_ifdetach(ifp); if (vi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); /* Let detach proceed even if these fail. */ #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_detach(vi); #endif cxgbe_uninit_synchronized(vi); callout_drain(&vi->tick); vi_full_uninit(vi); ifmedia_removeall(&vi->media); if_free(vi->ifp); vi->ifp = NULL; } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Detach the extra VIs first. */ rc = bus_generic_detach(dev); if (rc) return (rc); device_delete_children(dev); doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); end_synchronized_op(sc, 0); return (0); } static void cxgbe_init(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags, can_sleep; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) return (EINVAL); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: can_sleep = 0; redo_sifflags: rc = begin_synchronized_op(sc, vi, can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (can_sleep == 1) { end_synchronized_op(sc, 0); can_sleep = 0; goto redo_sifflags; } rc = update_mac_settings(ifp, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_init_synchronized(vi); } vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MCADDRS); end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(vi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifmedia_ioctl(ifp, ifr, &vi->media, cmd); break; case SIOCGI2C: { struct ifi2creq i2c; rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (rc != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { rc = EPERM; break; } if (i2c.len > sizeof(i2c.data)) { rc = EINVAL; break; } rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } rc = parse_pkt(&m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); if (__predict_false(rc != 0)) m_freem(m); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; /* queues do not exist if !VI_INIT_DONE. */ if (vi->flags & VI_INIT_DONE) { for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("qflush", 1); } } } if_qflush(ifp); } static uint64_t vi_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct fw_vi_stats_vf *s = &vi->stats; vi_refresh_stats(vi->pi->adapter, vi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_bcast_frames + s->rx_mcast_frames + s->rx_ucast_frames); case IFCOUNTER_IERRORS: return (s->rx_err_frames); case IFCOUNTER_OPACKETS: return (s->tx_bcast_frames + s->tx_mcast_frames + s->tx_ucast_frames + s->tx_offload_frames); case IFCOUNTER_OERRORS: return (s->tx_drop_frames); case IFCOUNTER_IBYTES: return (s->rx_bcast_bytes + s->rx_mcast_bytes + s->rx_ucast_bytes); case IFCOUNTER_OBYTES: return (s->tx_bcast_bytes + s->tx_mcast_bytes + s->tx_ucast_bytes + s->tx_offload_bytes); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = 0; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } uint64_t cxgbe_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; if (pi->nvi > 1) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_frames); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: return (s->tx_frames); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: return (s->rx_octets); case IFCOUNTER_OBYTES: return (s->tx_octets); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3 + pi->tnl_cong_drops); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = s->tx_drop; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static int cxgbe_media_change(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; device_printf(vi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct ifmedia_entry *cur; int speed = pi->link_cfg.speed; cur = vi->media.ifm_cur; ifmr->ifm_status = IFM_AVALID; if (!pi->link_cfg.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* active and current will differ iff current media is autoselect. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == 10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == 1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == 100) ifmr->ifm_active |= IFM_100_TX; else if (speed == 10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } static int vcxgbe_probe(device_t dev) { char buf[128]; struct vi_info *vi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, vi - vi->pi->vi); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int vcxgbe_attach(device_t dev) { struct vi_info *vi; struct port_info *pi; struct adapter *sc; int func, index, rc; u32 param, val; vi = device_get_softc(dev); pi = vi->pi; sc = pi->adapter; index = vi - pi->vi; KASSERT(index < nitems(vi_mac_funcs), ("%s: VI %s doesn't have a MAC func", __func__, device_get_nameunit(dev))); func = vi_mac_funcs[index]; rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, vi->hw_addr, &vi->rss_size, func, 0); if (rc < 0) { device_printf(dev, "Failed to allocate virtual interface " "for port %d: %d\n", pi->port_id, -rc); return (-rc); } vi->viid = rc; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_YZ(vi->viid); rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc) vi->rss_base = 0xffff; else { /* MPASS((val >> 16) == rss_size); */ vi->rss_base = val & 0xffff; } rc = cxgbe_vi_attach(dev, vi); if (rc) { t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); return (rc); } return (0); } static int vcxgbe_detach(device_t dev) { struct vi_info *vi; struct adapter *sc; vi = device_get_softc(dev); sc = vi->pi->adapter; doom_vi(sc, vi); cxgbe_vi_detach(vi); t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); end_synchronized_op(sc, 0); return (0); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); } static int map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } static int map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (is_t5(sc)) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | V_STATMODE(0)); } #endif } return (0); } struct memwin_init { uint32_t base; uint32_t aperture; }; static const struct memwin_init t4_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin_init t5_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin_init *mw_init; struct memwin *mw; int i; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw_init = &t4_memwin[0]; } else { /* T5+ use the relative offset inside the PCIe BAR */ bar0 = 0; mw_init = &t5_memwin[0]; } for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) { rw_init(&mw->mw_lock, "memory window access"); mw->mw_base = mw_init->base; mw->mw_aperture = mw_init->aperture; mw->mw_curpos = 0; t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->mw_base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->mw_aperture) - 10)); rw_wlock(&mw->mw_lock); position_memwin(sc, i, 0); rw_wunlock(&mw->mw_lock); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Positions the memory window at the given address in the card's address space. * There are some alignment requirements and the actual position may be at an * address prior to the requested address. mw->mw_curpos always has the actual * position of the window. */ static void position_memwin(struct adapter *sc, int idx, uint32_t addr) { struct memwin *mw; uint32_t pf; uint32_t reg; MPASS(idx >= 0 && idx < NUM_MEMWIN); mw = &sc->memwin[idx]; rw_assert(&mw->mw_lock, RA_WLOCKED); if (is_t4(sc)) { pf = 0; mw->mw_curpos = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); mw->mw_curpos = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx); t4_write_reg(sc, reg, mw->mw_curpos | pf); t4_read_reg(sc, reg); /* flush */ } static int rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len, int rw) { struct memwin *mw; uint32_t mw_end, v; MPASS(idx >= 0 && idx < NUM_MEMWIN); /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); mw = &sc->memwin[idx]; while (len > 0) { rw_rlock(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; if (addr >= mw_end || addr < mw->mw_curpos) { /* Will need to reposition the window */ if (!rw_try_upgrade(&mw->mw_lock)) { rw_runlock(&mw->mw_lock); rw_wlock(&mw->mw_lock); } rw_assert(&mw->mw_lock, RA_WLOCKED); position_memwin(sc, idx, addr); rw_downgrade(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; } rw_assert(&mw->mw_lock, RA_RLOCKED); while (addr < mw_end && len > 0) { if (rw == 0) { v = t4_read_reg(sc, mw->mw_base + addr - mw->mw_curpos); *val++ = le32toh(v); } else { v = *val++; t4_write_reg(sc, mw->mw_base + addr - mw->mw_curpos, htole32(v)); } addr += 4; len -= 4; } rw_runlock(&mw->mw_lock); } return (0); } static inline int read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, val, len, 0)); } static inline int write_via_memwin(struct adapter *sc, int idx, uint32_t addr, const uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1)); } static int t4_range_cmp(const void *a, const void *b) { return ((const struct t4_range *)a)->start - ((const struct t4_range *)b)->start; } /* * Verify that the memory range specified by the addr/len pair is valid within * the card's address space. */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { struct t4_range mem_ranges[4], *r, *next; uint32_t em, addr_len; int i, n, remaining; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); r = &mem_ranges[0]; n = 0; bzero(r, sizeof(mem_ranges)); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); r->size = G_EDRAM0_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM0_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); r->size = G_EDRAM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); r->size = G_EXT_MEM_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); r->size = G_EXT_MEM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } MPASS(n <= nitems(mem_ranges)); if (n > 1) { /* Sort and merge the ranges. */ qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp); /* Start from index 0 and examine the next n - 1 entries. */ r = &mem_ranges[0]; for (remaining = n - 1; remaining > 0; remaining--, r++) { MPASS(r->size > 0); /* r is a valid entry. */ next = r + 1; MPASS(next->size > 0); /* and so is the next one. */ while (r->start + r->size >= next->start) { /* Merge the next one into the current entry. */ r->size = max(r->start + r->size, next->start + next->size) - r->start; n--; /* One fewer entry in total. */ if (--remaining == 0) goto done; /* short circuit */ next++; } if (next != r + 1) { /* * Some entries were merged into r and next * points to the first valid entry that couldn't * be merged. */ MPASS(next->size > 0); /* must be valid */ memcpy(r + 1, next, remaining * sizeof(*r)); #ifdef INVARIANTS /* * This so that the foo->size assertion in the * next iteration of the loop do the right * thing for entries that were pulled up and are * no longer valid. */ MPASS(n < nitems(mem_ranges)); bzero(&mem_ranges[n], (nitems(mem_ranges) - n) * sizeof(struct t4_range)); #endif } } done: /* Done merging the ranges. */ MPASS(n > 0); r = &mem_ranges[0]; for (i = 0; i < n; i++, r++) { if (addr >= r->start && addr + len <= r->start + r->size) return (0); } } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; break; case MEM_MC1: if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; break; default: return (EINVAL); } *addr = maddr + off; /* global address */ return (validate_mem_range(sc, *addr, len)); } static int fixup_devlog_params(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; int rc; rc = validate_mt_off_len(sc, dparams->memtype, dparams->start, dparams->size, &dparams->addr); return (rc); } static int cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis, struct intrs_and_queues *iaq) { int rc, itype, navail, nrxq10g, nrxq1g, n; int nofldrxq10g = 0, nofldrxq1g = 0; bzero(iaq, sizeof(*iaq)); iaq->ntxq10g = t4_ntxq10g; iaq->ntxq1g = t4_ntxq1g; iaq->ntxq_vi = t4_ntxq_vi; iaq->nrxq10g = nrxq10g = t4_nrxq10g; iaq->nrxq1g = nrxq1g = t4_nrxq1g; iaq->nrxq_vi = t4_nrxq_vi; iaq->rsrv_noflowq = t4_rsrv_noflowq; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq10g = t4_nofldtxq10g; iaq->nofldtxq1g = t4_nofldtxq1g; iaq->nofldtxq_vi = t4_nofldtxq_vi; iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g; iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g; iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif #ifdef DEV_NETMAP iaq->nnmtxq_vi = t4_nnmtxq_vi; iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; iaq->intr_type = itype; iaq->intr_flags_10g = 0; iaq->intr_flags_1g = 0; /* * Best option: an interrupt vector for errors, one for the * firmware event queue, and one for every rxq (NIC and TOE) of * every VI. The VIs that support netmap use the same * interrupts for the NIC rx queues and the netmap rx queues * because only one set of queues is active at a time. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g * (nrxq10g + nofldrxq10g); iaq->nirq += n1g * (nrxq1g + nofldrxq1g); iaq->nirq += (n10g + n1g) * (num_vis - 1) * max(iaq->nrxq_vi, iaq->nnmrxq_vi); /* See comment above. */ iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi; if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { iaq->intr_flags_10g = INTR_ALL; iaq->intr_flags_1g = INTR_ALL; goto allocate; } /* Disable the VIs (and netmap) if there aren't enough intrs */ if (num_vis > 1) { device_printf(sc->dev, "virtual interfaces disabled " "because num_vis=%u with current settings " "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, " "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, " "nnmrxq_vi=%u) would need %u interrupts but " "only %u are available.\n", num_vis, nrxq10g, nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq, navail); num_vis = 1; iaq->ntxq_vi = iaq->nrxq_vi = 0; iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; goto restart; } /* * Second best option: a vector for errors, one for the firmware * event queue, and vectors for either all the NIC rx queues or * all the TOE rx queues. The queues that don't get vectors * will forward their interrupts to those that do. */ iaq->nirq = T4_EXTRA_INTR; if (nrxq10g >= nofldrxq10g) { iaq->intr_flags_10g = INTR_RXQ; iaq->nirq += n10g * nrxq10g; } else { iaq->intr_flags_10g = INTR_OFLD_RXQ; iaq->nirq += n10g * nofldrxq10g; } if (nrxq1g >= nofldrxq1g) { iaq->intr_flags_1g = INTR_RXQ; iaq->nirq += n1g * nrxq1g; } else { iaq->intr_flags_1g = INTR_OFLD_RXQ; iaq->nirq += n1g * nofldrxq1g; } if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) goto allocate; /* * Next best option: an interrupt vector for errors, one for the * firmware event queue, and at least one per main-VI. At this * point we know we'll have to downsize nrxq and/or nofldrxq to * fit what's available to us. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g + n1g; if (iaq->nirq <= navail) { int leftover = navail - iaq->nirq; if (n10g > 0) { int target = max(nrxq10g, nofldrxq10g); iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ? INTR_RXQ : INTR_OFLD_RXQ; n = 1; while (n < target && leftover >= n10g) { leftover -= n10g; iaq->nirq += n10g; n++; } iaq->nrxq10g = min(n, nrxq10g); #ifdef TCP_OFFLOAD iaq->nofldrxq10g = min(n, nofldrxq10g); #endif } if (n1g > 0) { int target = max(nrxq1g, nofldrxq1g); iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ? INTR_RXQ : INTR_OFLD_RXQ; n = 1; while (n < target && leftover >= n1g) { leftover -= n1g; iaq->nirq += n1g; n++; } iaq->nrxq1g = min(n, nrxq1g); #ifdef TCP_OFFLOAD iaq->nofldrxq1g = min(n, nofldrxq1g); #endif } if (itype != INTR_MSI || powerof2(iaq->nirq)) goto allocate; } /* * Least desirable option: one interrupt vector for everything. */ iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1; iaq->intr_flags_10g = iaq->intr_flags_1g = 0; #ifdef TCP_OFFLOAD if (is_offload(sc)) iaq->nofldrxq10g = iaq->nofldrxq1g = 1; #endif allocate: navail = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &navail); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &navail); if (rc == 0) { if (navail == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate (it's in navail). */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, navail); pci_release_msi(sc->dev); goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, navail); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32_const(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32_const(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); return (rc); } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); return (EDOOFUS); } /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* We're using whatever's on the card and it's known to be good. */ sc->params.fw_vers = ntohl(card_fw->fw_ver); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); t4_get_tp_version(sc, &sc->params.tp_vers); snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers), G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers)); if (t4_get_exprom_version(sc, &sc->params.exprom_vers) != 0) sc->params.exprom_vers = 0; else { snprintf(sc->exprom_version, sizeof(sc->exprom_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.exprom_vers), G_FW_HDR_FW_VER_MINOR(sc->params.exprom_vers), G_FW_HDR_FW_VER_MICRO(sc->params.exprom_vers), G_FW_HDR_FW_VER_BUILD(sc->params.exprom_vers)); } /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen; const uint32_t *cfdata; uint32_t param, val, addr; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } write_via_memwin(sc, 2, addr, cfdata, cflen); } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x).\n", rc, mtype, moff); goto done; } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(nbmcaps); LIMIT_CAPS(linkcaps); LIMIT_CAPS(switchcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(tlscaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ rc = -t4_init_devlog_params(sc, 1); if (rc == 0) fixup_devlog_params(sc); else { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); rc = 0; /* devlog isn't critical for device operation */ } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; sc->params.ftid_min = val[2]; sc->params.ftid_max = val[3]; sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(nbmcaps); READ_CAPS(linkcaps); READ_CAPS(switchcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(tlscaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } sc->tids.etid_base = val[0]; sc->params.etid_min = val[0]; sc->tids.netids = val[1] - val[0] + 1; sc->params.netids = sc->tids.netids; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, " "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "", chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi, struct ifmedia *media) { int m; PORT_LOCK(pi); ifmedia_removeall(media); m = IFM_ETHER | IFM_FDX; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, 0, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, 0, NULL); ifmedia_add(media, m | IFM_100_TX, 0, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, 0, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, 0, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, 0, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, 0, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_40G_LR4, 0, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_40G_SR4, 0, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_40G_CR4, 0, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } PORT_UNLOCK(pi); } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { vi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); MPASS(ETHER_IS_MULTICAST(mcaddr[i])); i++; if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ int begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "begin_synchronized_op"); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; sc->last_op_flags = flags; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } /* * Tell if_ioctl and if_init that the VI is going away. This is * special variant of begin_synchronized_op and must be paired with a * call to end_synchronized_op. */ void doom_vi(struct adapter *sc, struct vi_info *vi) { ADAPTER_LOCK(sc); SET_DOOMED(vi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; sc->last_op_flags = 0; #endif ADAPTER_UNLOCK(sc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* * Can't fail from this point onwards. Review cxgbe_uninit_synchronized * if this changes. */ for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); } /* * The first iq of the first port to come up is used for tracing. */ if (sc->traceq < 0 && IS_MAIN_VI(vi)) { sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); pi->flags |= HAS_TRACEQ; } /* all ok */ PORT_LOCK(pi); ifp->if_drv_flags |= IFF_DRV_RUNNING; pi->up_vis++; if (pi->nvi > 1) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); PORT_UNLOCK(pi); done: if (rc != 0) cxgbe_uninit_synchronized(vi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (!(vi->flags & VI_INIT_DONE)) { KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING), ("uninited VI is running")); return (0); } /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } PORT_LOCK(pi); if (pi->nvi == 1) callout_stop(&pi->tick); else callout_stop(&vi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; pi->up_vis--; if (pi->up_vis > 0) { PORT_UNLOCK(pi); return (0); } PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->linkdnrc = -1; t4_os_link_changed(sc, pi->port_id, 0, -1); return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ static int setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; struct vi_info *vi; struct sge *sge = &sc->sge; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif #ifdef RSS int nbuckets = rss_getnumbuckets(); #endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (sc->intr_count == 1) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; /* The second one is always the firmware event queue */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; for_each_port(sc, p) { pi = sc->port[p]; for_each_vi(pi, v, vi) { vi->first_intr = rid - 1; if (vi->nnmrxq > 0) { int n = max(vi->nrxq, vi->nnmrxq); MPASS(vi->flags & INTR_RXQ); rxq = &sge->rxq[vi->first_rxq]; #ifdef DEV_NETMAP nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; #endif for (q = 0; q < n; q++) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); if (q < vi->nrxq) irq->rxq = rxq++; #ifdef DEV_NETMAP if (q < vi->nnmrxq) irq->nm_rxq = nm_rxq++; #endif rc = t4_alloc_irq(sc, irq, rid, t4_vi_intr, irq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } } else if (vi->flags & INTR_RXQ) { for_each_rxq(vi, q, rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); #ifdef RSS bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); #endif irq++; rid++; vi->nintr++; } } #ifdef TCP_OFFLOAD if (vi->flags & INTR_OFLD_RXQ) { for_each_ofld_rxq(vi, q, ofld_rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } } #endif } } MPASS(irq == &sc->irq[sc->intr_count]); return (0); } int adapter_full_init(struct adapter *sc) { int rc, i; ASSERT_SYNCHRONIZED_OP(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } #ifdef RSS #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ RSS_HASHTYPE_RSS_UDP_IPV6) /* Translates kernel hash types to hardware. */ static int hashconfig_to_hashen(int hashconfig) { int hashen = 0; if (hashconfig & RSS_HASHTYPE_RSS_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; return (hashen); } /* Translates hardware hash types to kernel. */ static int hashen_to_hashconfig(int hashen) { int hashconfig = 0; if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { /* * If UDP hashing was enabled it must have been enabled for * either IPv4 or IPv6 (inclusive or). Enabling UDP without * enabling any 4-tuple hash is nonsense configuration. */ MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; } if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV6; return (hashconfig); } #endif int vi_full_init(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j, hashen; #ifdef RSS int nbuckets = rss_getnumbuckets(); int hashconfig = rss_gethashconfig(); int extra; uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; #endif ASSERT_SYNCHRONIZED_OP(sc); KASSERT((vi->flags & VI_INIT_DONE) == 0, ("%s: VI_INIT_DONE already", __func__)); sysctl_ctx_init(&vi->ctx); vi->flags |= VI_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this VI. */ rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this VI. Save a copy of the RSS table for later use. */ if (vi->nrxq > vi->rss_size) { if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " "some queues will never receive traffic.\n", vi->nrxq, vi->rss_size); } else if (vi->rss_size % vi->nrxq) { if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " "expect uneven traffic distribution.\n", vi->nrxq, vi->rss_size); } #ifdef RSS MPASS(RSS_KEYSIZE == 40); if (vi->nrxq != nbuckets) { if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" "performance will be impacted.\n", vi->nrxq, nbuckets); } rss_getkey((void *)&raw_rss_key[0]); for (i = 0; i < nitems(rss_key); i++) { rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); } t4_write_rss_key(sc, &rss_key[0], -1); #endif rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < vi->rss_size;) { #ifdef RSS j = rss_get_indirection_to_bucket(i); j %= vi->nrxq; rxq = &sc->sge.rxq[vi->first_rxq + j]; rss[i++] = rxq->iq.abs_id; #else for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == vi->rss_size) break; } #endif } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } #ifdef RSS hashen = hashconfig_to_hashen(hashconfig); /* * We may have had to enable some hashes even though the global config * wants them disabled. This is a potential problem that must be * reported to the user. */ extra = hashen_to_hashconfig(hashen) ^ hashconfig; /* * If we consider only the supported hash types, then the enabled hashes * are a superset of the requested hashes. In other words, there cannot * be any supported hash that was requested but not enabled, but there * can be hashes that were not requested but had to be enabled. */ extra &= SUPPORTED_RSS_HASHTYPES; MPASS((extra & hashconfig) == 0); if (extra) { if_printf(ifp, "global RSS config (0x%x) cannot be accommodated.\n", hashconfig); } if (extra & RSS_HASHTYPE_RSS_IPV4) if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_IPV6) if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); #else hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; #endif rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]); if (rc != 0) { if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); goto done; } vi->rss = rss; vi->flags |= VI_INIT_DONE; done: if (rc != 0) vi_full_uninit(vi); return (rc); } /* * Idempotent. */ int vi_full_uninit(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ /* XXX: Only for the first VI? */ if (IS_MAIN_VI(vi)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(vi->rss, M_CXGBE); free(vi->nm_rss, M_CXGBE); } t4_teardown_vi_queues(vi); vi->flags &= ~VI_INIT_DONE; return (0); } static void quiesce_txq(struct adapter *sc, struct sge_txq *txq) { struct sge_eq *eq = &txq->eq; struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; (void) sc; /* unused */ #ifdef INVARIANTS TXQ_LOCK(txq); MPASS((eq->flags & EQ_ENABLED) == 0); TXQ_UNLOCK(txq); #endif /* Wait for the mp_ring to empty. */ while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("rquiesce", 1); } /* Then wait for the hardware to finish. */ while (spg->cidx != htobe16(eq->pidx)) pause("equiesce", 1); /* Finally, wait for the driver to reclaim all descriptors. */ while (eq->cidx != eq->pidx) pause("dquiesce", 1); } static void quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq) { /* XXXTX */ } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { regs->version = chip_id(sc) | chip_rev(sc) << 10; t4_get_regs(sc, buf, regs->len); } #define A_PL_INDIR_CMD 0x1f8 #define S_PL_AUTOINC 31 #define M_PL_AUTOINC 0x1U #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) #define S_PL_VFID 20 #define M_PL_VFID 0xffU #define V_PL_VFID(x) ((x) << S_PL_VFID) #define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) #define S_PL_ADDR 0 #define M_PL_ADDR 0xfffffU #define V_PL_ADDR(x) ((x) << S_PL_ADDR) #define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) #define A_PL_INDIR_DATA 0x1fc static uint64_t read_vf_stat(struct adapter *sc, unsigned int viid, int reg) { u32 stats[2]; mtx_assert(&sc->reg_lock, MA_OWNED); t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); return (((uint64_t)stats[1]) << 32 | stats[0]); } static void t4_get_vi_stats(struct adapter *sc, unsigned int viid, struct fw_vi_stats_vf *stats) { #define GET_STAT(name) \ read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); #undef GET_STAT } static void t4_clr_vi_stats(struct adapter *sc, unsigned int viid) { int reg; t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) t4_write_reg(sc, A_PL_INDIR_DATA, 0); } static void vi_refresh_stats(struct adapter *sc, struct vi_info *vi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ if (!(vi->flags & VI_INIT_DONE)) return; getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &vi->last_refreshed, <)) return; mtx_lock(&sc->reg_lock); t4_get_vi_stats(sc, vi->viid, &vi->stats); getmicrotime(&vi->last_refreshed); mtx_unlock(&sc->reg_lock); } static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { int i; u_int v, tnl_cong_drops; struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; tnl_cong_drops = 0; t4_get_port_stats(sc, pi->tx_chan, &pi->stats); for (i = 0; i < sc->chip_params->nchan; i++) { if (pi->rx_chan_map & (1 << i)) { mtx_lock(&sc->reg_lock); t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); mtx_unlock(&sc->reg_lock); tnl_cong_drops += v; } } pi->tnl_cong_drops = tnl_cong_drops; getmicrotime(&pi->last_refreshed); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); } void vi_tick(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; vi_refresh_stats(sc, vi); callout_schedule(&vi->tick, hz); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } /* * Should match fw_caps_config_ enums in t4fw_interface.h */ static char *caps_decoder[] = { "\20\001IPMI\002NCSI", /* 0: NBM */ "\20\001PPP\002QFC\003DCBX", /* 1: link */ "\20\001INGRESS\002EGRESS", /* 2: switch */ "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL" /* 3: NIC */ "\006HASHFILTER\007ETHOFLD", "\20\001TOE", /* 4: TOE */ "\20\001RDDP\002RDMAC", /* 5: RDMA */ "\20\001INITIATOR_PDU\002TARGET_PDU" /* 6: iSCSI */ "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD" "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD" "\007T10DIF" "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD", "\20\00KEYS", /* 7: TLS */ "\20\001INITIATOR\002TARGET\003CTRL_OFLD" /* 8: FCoE */ "\004PO_INITIATOR\005PO_TARGET", }; static void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version", CTLFLAG_RD, sc->tp_version, 0, "TP microcode version"); if (sc->params.exprom_vers != 0) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "exprom_version", CTLFLAG_RD, sc->exprom_version, 0, "expansion ROM version"); } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells, sysctl_bitfield, "A", "available doorbells"); #define SYSCTL_CAP(name, n, text) \ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \ CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \ sysctl_bitfield, "A", "available " text "capabilities") SYSCTL_CAP(nbmcaps, 0, "NBM"); SYSCTL_CAP(linkcaps, 1, "link"); SYSCTL_CAP(switchcaps, 2, "switch"); SYSCTL_CAP(niccaps, 3, "NIC"); SYSCTL_CAP(toecaps, 4, "TCP offload"); SYSCTL_CAP(rdmacaps, 5, "RDMA"); SYSCTL_CAP(iscsicaps, 6, "iSCSI"); SYSCTL_CAP(tlscaps, 7, "TLS"); SYSCTL_CAP(fcoecaps, 8, "FCoE"); #undef SYSCTL_CAP SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); t4_sge_sysctls(sc, ctx, children); sc->lro_timeout = 100; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW, &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); #ifdef SBUF_DRAIN /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (chip_id(sc) > CHELSIO_T4) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "non-TCP DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (is_t5(sc)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); sc->tt.tx_align = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align", CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); sc->tt.tx_zcopy = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy", CTLFLAG_RW, &sc->tt.tx_zcopy, 0, "Enable zero-copy aio_write(2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A", "TP timer tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A", "TCP timestamp tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A", "DACK tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer", CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer, "IU", "DACK timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN, sysctl_tp_timer, "LU", "Retransmit min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX, sysctl_tp_timer, "LU", "Retransmit max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN, sysctl_tp_timer, "LU", "Persist timer min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX, sysctl_tp_timer, "LU", "Persist timer max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE, sysctl_tp_timer, "LU", "Keepidle idle timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_intvl", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL, sysctl_tp_timer, "LU", "Keepidle interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU", "FINWAIT2 timer (us)"); } #endif } void vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(vi->dev); /* * dev.v?(cxgbe|cxl).X. */ oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &vi->first_txq, 0, "index of first tx queue"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL, vi->rss_size, "size of RSS indirection table"); if (IS_MAIN_VI(vi)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); } #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); } #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, &vi->nnmrxq, 0, "# of netmap rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, &vi->nnmtxq, 0, "# of netmap tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", CTLFLAG_RD, &vi->first_nm_rxq, 0, "index of first netmap rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", CTLFLAG_RD, &vi->first_nm_txq, 0, "index of first netmap tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); } static void cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *children2; struct adapter *sc = pi->adapter; int i; char name[16]; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings, "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL, port_top_speed(pi), "max speed (in Gbps)"); /* * dev.(cxgbe|cxl).X.tc. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL, "Tx scheduler traffic classes"); for (i = 0; i < sc->chip_params->nsched_cls; i++) { struct tx_sched_class *tc = &pi->tc[i]; snprintf(name, sizeof(name), "%d", i); children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "traffic class")); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD, &tc->flags, 0, "flags"); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount", CTLFLAG_RD, &tc->refcount, 0, "references to this class"); #ifdef SBUF_DRAIN SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params", CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i, sysctl_tc_params, "A", "traffic class parameters"); #endif } /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD, &pi->tx_parse_error, 0, "# of tx packets with invalid length or # of segments"); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by up to 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i, space = 0; struct sbuf sb; sbuf_new_for_sysctl(&sb, NULL, 64, req); for (i = arg1; arg2; arg2 -= sizeof(int), i++) { if (space) sbuf_printf(&sb, " "); sbuf_printf(&sb, "%d", *i); space = 1; } rc = sbuf_finish(&sb); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; int rc, val; val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (vi->ntxq > 1)) vi->rsrv_noflowq = 1; else vi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif uint8_t v; idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } #endif vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || qsize > 65536) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1PAUSE_RX\2PAUSE_TX"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX)); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4PAUSE"); if (rc) return (rc); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { int link_ok = lc->link_ok; lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX); lc->requested_fc |= n; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); lc->link_ok = link_ok; /* restore */ } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = sc->chip_params->cim_num_obq; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) <= CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) > CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Inst Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data LS1Stat LS1Addr LS1Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x %08x", p[3] & 0xff, p[2], p[1], p[0]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x %02x%06x", (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8, p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8); sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16); } else { sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x " "%08x %08x %08x %08x %08x %08x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16, p[2], p[1], p[0], p[5], p[4], p[3]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; cim_num_obq = sc->chip_params->cim_num_obq; if (is_t4(sc)) { ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_cpl_stats(sc, &stats); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\nCPL requests: %10u %10u %10u %10u", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "\nCPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\nCPL requests: %10u %10u", stats.req[0], stats.req[1]); sbuf_printf(sb, "\nCPL responses: %10u %10u", stats.rsp[0], stats.rsp[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char * const devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; static const char * const devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE", [FW_DEVLOG_FACILITY_CHNET] = "CHNET", }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->addr == 0) return (ENXIO); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size); if (rc != 0) goto done; nentries = dparams->size / sizeof(struct fw_devlog_e); for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[MAX_NCHAN]; int i, nchan = sc->chip_params->nchan; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nchan; i++) t4_get_fcoe_stats(sc, i, &stats[i]); if (nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp, stats[2].octets_ddp, stats[3].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp, stats[2].frames_ddp, stats[3].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u", stats[0].frames_drop, stats[1].frames_drop, stats[2].frames_drop, stats[3].frames_drop); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u", stats[0].frames_drop, stats[1].frames_drop); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < sc->chip_params->nchan; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (pi->linkdnrc < 0) sbuf_printf(sb, "n/a"); else sbuf_printf(sb, "%s", t4_link_down_rc_str(pi->linkdnrc)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; if (from == to) return; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:" }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t5(sc) ? 3 : 2; /* Call it MC0 for T5 */ i++; } if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { if (chip_id(sc) <= CHELSIO_T5) md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); else md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR); md->limit = 0; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc)) { uint32_t size = 0; uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2); uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE); if (is_t5(sc)) { if (sge_ctrl & F_VFIFO_ENABLE) size = G_DBVFIFO_SIZE(fifo_size); } else size = G_T6_DBVFIFO_SIZE(fifo_size); if (size) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (size << 2) - 1; } } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = 0; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = 0; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < sc->chip_params->nchan; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) <= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%36d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) > CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask VNI Mask" " IVLAN Vld DIP_Hit Lookup Port Vld Ports PF VF" " Replication" " P0 P1 P2 P3 ML\n"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint8_t dip_hit, vlan_vld, lookup_type, port_num; uint16_t ivlan; uint64_t tcamx, tcamy, val, mask; uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy; uint8_t addr[ETHER_ADDR_LEN]; ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0); if (i < 256) ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0); else ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamy = G_DMACH(val) << 32; tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); lookup_type = G_DATALKPTYPE(data2); port_num = G_DATAPORTNUM(data2); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI */ vniy = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); dip_hit = data2 & F_DATADIPHIT; vlan_vld = 0; } else { vniy = 0; dip_hit = 0; vlan_vld = data2 & F_DATAVIDH2; ivlan = G_VIDL(val); } ctl |= V_CTLXYBITSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamx = G_DMACH(val) << 32; tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI mask */ vnix = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); } else vnix = 0; if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (lookup_type && lookup_type != M_DATALKPTYPE) { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx %06x %06x - - %3c" " 'I' %4x %3c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } else { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx - - ", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask); if (vlan_vld) sbuf_printf(sb, "%4u Y ", ivlan); else sbuf_printf(sb, " - N "); sbuf_printf(sb, "- %3c %4x %3c %#x%4u%4d", lookup_type ? 'I' : 'O', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } if (cls_lo & F_T6_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t6mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%72d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x" " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc255_224), be32toh(ldst_cmd.u.mps.rplc.rplc223_192), be32toh(ldst_cmd.u.mps.rplc.rplc191_160), be32toh(ldst_cmd.u.mps.rplc.rplc159_128), be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%72s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#x", G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo), G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo), (cls_lo >> S_T6_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS]; uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS]; static const char *tx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:", "Tx FIFO wait", NULL, "Tx latency" }; static const char *rx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Flush:", " Rx FIFO wait", NULL, "Rx latency" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, tx_cnt, tx_cyc); t4_pmrx_get_stats(sc, rx_cnt, rx_cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); } sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } if (chip_id(sc) > CHELSIO_T5) { sbuf_printf(sb, "\n Total wait Total occupancy"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); i += 2; MPASS(i < nitems(tx_stats)); sbuf_printf(sb, "\n Reads Total wait"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_rdma_stats(sc, &stats); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_tcp_stats(sc, &v4, &v6); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcp_out_rsts, v6.tcp_out_rsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcp_in_segs, v6.tcp_in_segs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcp_out_segs, v6.tcp_out_segs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcp_retrans_segs, v6.tcp_retrans_segs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->ntids) { if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; if (b) { sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1, t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } else { sbuf_printf(sb, "TID range: %u-%u", t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } } else sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, t->ftid_base + t->nftids - 1); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base, t->etid_base + t->netids - 1); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_err_stats(sc, &stats); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1], stats.mac_in_errs[2], stats.mac_in_errs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1], stats.hdr_in_errs[2], stats.hdr_in_errs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1], stats.tcp_in_errs[2], stats.tcp_in_errs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1], stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1], stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1], stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1], stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1], stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "macInErrs: %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1]); sbuf_printf(sb, "hdrInErrs: %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1]); sbuf_printf(sb, "tcpInErrs: %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1]); sbuf_printf(sb, "tcp6InErrs: %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]); sbuf_printf(sb, "tnlCongDrops: %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]); sbuf_printf(sb, "tnlTxDrops: %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]); sbuf_printf(sb, "ofldChanDrops: %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]); } sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofld_no_neigh, stats.ofld_cong_defer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tp_params *tpp = &sc->params.tp; u_int mask; int rc; mask = tpp->la_mask >> 16; rc = sysctl_handle_int(oidp, &mask, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (mask > 0xffff) return (EINVAL); tpp->la_mask = mask << 16; t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask); return (0); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static const struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static const struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static const struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[MAX_NCHAN], orate[MAX_NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju\n", nrate[0], nrate[1]); sbuf_printf(sb, "Offload B/s: %10ju %10ju", orate[0], orate[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { if (G_STATMODE(v) == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (G_STATMODE(v) == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tc_params(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tx_sched_class *tc; struct t4_sched_class_params p; struct sbuf *sb; int i, rc, port_id, flags, mbps, gbps; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); port_id = arg2 >> 16; MPASS(port_id < sc->params.nports); MPASS(sc->port[port_id] != NULL); i = arg2 & 0xffff; MPASS(i < sc->chip_params->nsched_cls); tc = &sc->port[port_id]->tc[i]; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tc_p"); if (rc) goto done; flags = tc->flags; p = tc->params; end_synchronized_op(sc, LOCK_HELD); if ((flags & TX_SC_OK) == 0) { sbuf_printf(sb, "none"); goto done; } if (p.level == SCHED_CLASS_LEVEL_CL_WRR) { sbuf_printf(sb, "cl-wrr weight %u", p.weight); goto done; } else if (p.level == SCHED_CLASS_LEVEL_CL_RL) sbuf_printf(sb, "cl-rl"); else if (p.level == SCHED_CLASS_LEVEL_CH_RL) sbuf_printf(sb, "ch-rl"); else { rc = ENXIO; goto done; } if (p.ratemode == SCHED_CLASS_RATEMODE_REL) { /* XXX: top speed or actual link speed? */ gbps = port_top_speed(sc->port[port_id]); sbuf_printf(sb, " %u%% of %uGbps", p.maxrate, gbps); } else if (p.ratemode == SCHED_CLASS_RATEMODE_ABS) { switch (p.rateunit) { case SCHED_CLASS_RATEUNIT_BITS: mbps = p.maxrate / 1000; gbps = p.maxrate / 1000000; if (p.maxrate == gbps * 1000000) sbuf_printf(sb, " %uGbps", gbps); else if (p.maxrate == mbps * 1000) sbuf_printf(sb, " %uMbps", mbps); else sbuf_printf(sb, " %uKbps", p.maxrate); break; case SCHED_CLASS_RATEUNIT_PKTS: sbuf_printf(sb, " %upps", p.maxrate); break; default: rc = ENXIO; goto done; } } switch (p.mode) { case SCHED_CLASS_MODE_CLASS: sbuf_printf(sb, " aggregate"); break; case SCHED_CLASS_MODE_FLOW: sbuf_printf(sb, " per-flow"); break; default: rc = ENXIO; goto done; } done: if (rc == 0) rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #endif #ifdef TCP_OFFLOAD static void unit_conv(char *buf, size_t len, u_int val, u_int factor) { u_int rem = val % factor; if (rem == 0) snprintf(buf, len, "%u", val / factor); else { while (rem % 10 == 0) rem /= 10; snprintf(buf, len, "%u.%u", val / factor, rem); } } static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; char buf[16]; u_int res, re; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); switch (arg2) { case 0: /* timer_tick */ re = G_TIMERRESOLUTION(res); break; case 1: /* TCP timestamp tick */ re = G_TIMESTAMPRESOLUTION(res); break; case 2: /* DACK tick */ re = G_DELAYEDACKRESOLUTION(res); break; default: return (EDOOFUS); } unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000); return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int res, dack_re, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); dack_re = G_DELAYEDACKRESOLUTION(res); v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER); return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; u_int tre; u_long tp_tick_us, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX || reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX || reg == A_TP_KEEP_IDLE || A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER); tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION)); tp_tick_us = (cclk_ps << tre) / 1000000; if (reg == A_TP_INIT_SRTT) v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg)); else v = tp_tick_us * t4_read_reg(sc, reg); return (sysctl_handle_long(oidp, &v, 0, req)); } #endif static uint32_t fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) { mode |= T4_FILTER_VNIC; if (iconf & F_VNIC) mode |= T4_FILTER_IC_VNIC; } if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t mode_to_iconf(uint32_t mode) { if (mode & T4_FILTER_IC_VNIC) return (F_VNIC); return (0); } static int check_fspec_against_fconf_iconf(struct adapter *sc, struct t4_filter_specification *fs) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { fconf |= F_VNIC_ID; if (tpp->ingress_config & F_VNIC) return (EINVAL); } if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { fconf |= F_VNIC_ID; if ((tpp->ingress_config & F_VNIC) == 0) return (EINVAL); } if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map) return (E2BIG); return (0); } static int get_filter_mode(struct adapter *sc, uint32_t *mode) { struct tp_params *tpp = &sc->params.tp; /* * We trust the cached values of the relevant TP registers. This means * things work reliably only if writes to those registers are always via * t4_set_filter_mode. */ *mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config); return (0); } static int set_filter_mode(struct adapter *sc, uint32_t mode) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf, iconf; int rc; iconf = mode_to_iconf(mode); if ((iconf ^ tpp->ingress_config) & F_VNIC) { /* * For now we just complain if A_TP_INGRESS_CONFIG is not * already set to the correct value for the requested filter * mode. It's not clear if it's safe to write to this register * on the fly. (And we trust the cached value of the register). */ return (EBUSY); } fconf = mode_to_fconf(mode); rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif rc = -t4_set_filter_mode(sc, fconf); done: end_synchronized_op(sc, LOCK_HELD); return (rc); } static inline uint64_t get_filter_hits(struct adapter *sc, uint32_t fid) { uint32_t tcb_addr; tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + (fid + sc->tids.ftid_base) * TCB_SIZE; if (is_t4(sc)) { uint64_t hits; read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8); return (be64toh(hits)); } else { uint32_t hits; read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4); return (be32toh(hits)); } } static int get_filter(struct adapter *sc, struct t4_filter *t) { int i, rc, nfilters = sc->tids.nftids; struct filter_entry *f; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getf"); if (rc) return (rc); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; goto done; } f = &sc->tids.ftid_tab[t->idx]; for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; t->l2tidx = f->l2t ? f->l2t->idx : 0; t->smtidx = f->smtidx; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, t->idx); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters, nports; struct filter_entry *f; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); nfilters = sc->tids.nftids; nports = sc->params.nports; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (t->idx >= nfilters) { rc = EINVAL; goto done; } /* Validate against the global filter mode and ingress config */ rc = check_fspec_against_fconf_iconf(sc, &t->fs); if (rc != 0) goto done; if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { rc = EINVAL; goto done; } if (t->fs.val.iport >= nports) { rc = EINVAL; goto done; } /* Can't specify an iq if not steering to it */ if (!t->fs.dirsteer && t->fs.iq) { rc = EINVAL; goto done; } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) goto done; if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, ("%s: no memory allocated but filters_in_use > 0", __func__)); sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); if (sc->tids.ftid_tab == NULL) { rc = ENOMEM; goto done; } mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; if (f->pending || f->valid) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (t->fs.type == 0) break; } f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; rc = set_filter_wr(sc, t->idx); done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? 0 : EIO; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4setfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static int del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; int rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); if (rc) return (rc); nfilters = sc->tids.nftids; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || t->idx >= nfilters) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } f = &sc->tids.ftid_tab[t->idx]; if (f->pending) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (f->valid) { t->fs = f->fs; /* extra info for the caller */ rc = del_filter_wr(sc, t->idx); } done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? EIO : 0; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4delfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static void clear_filter(struct filter_entry *f) { if (f->l2t) t4_l2t_release(f->l2t); bzero(f, sizeof (*f)); } static int set_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid, vnic_vld, vnic_vld_mask; struct wrq_cookie cookie; ASSERT_SYNCHRONIZED_OP(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ f->l2t = t4_l2t_alloc_switching(sc->l2t); if (f->l2t == NULL) return (EAGAIN); if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { t4_l2t_release(f->l2t); f->l2t = NULL; return (ENOMEM); } } /* Already validated against fconf, iconf */ MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0); MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0); if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld) vnic_vld = 1; else vnic_vld = 0; if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld) vnic_vld_mask = 1; else vnic_vld_mask = 0; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof(*fwr)); fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(ftid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); if (f->fs.newsmac) bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); f->pending = 1; sc->tids.ftids_in_use++; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } static int del_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid; struct wrq_cookie cookie; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof (*fwr)); t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); f->pending = 1; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } int t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); unsigned int idx = GET_TID(rpl); unsigned int rc; struct filter_entry *f; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); MPASS(iq == &sc->sge.fwq); MPASS(is_ftid(sc, idx)); idx -= sc->tids.ftid_base; f = &sc->tids.ftid_tab[idx]; rc = G_COOKIE(rpl->cookie); mtx_lock(&sc->tids.ftid_lock); if (rc == FW_FILTER_WR_FLT_ADDED) { KASSERT(f->pending, ("%s: filter[%u] isn't pending.", __func__, idx)); f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; } else { if (rc != FW_FILTER_WR_FLT_DELETED) { /* Add or delete failed, display an error */ log(LOG_ERR, "filter %u setup failed with error %u\n", idx, rc); } clear_filter(f); sc->tids.ftids_in_use--; } wakeup(&sc->tids.ftid_tab); mtx_unlock(&sc->tids.ftid_lock); return (0); } static int set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { MPASS(iq->set_tcb_rpl != NULL); return (iq->set_tcb_rpl(iq, rss, m)); } static int l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { MPASS(iq->l2t_write_rpl != NULL); return (iq->l2t_write_rpl(iq, rss, m)); } static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); if (sc->flags & FULL_INIT_DONE) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } #define MAX_READ_BUF_SIZE (128 * 1024) static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, remaining, n; uint32_t *buf; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { n = min(remaining, MAX_READ_BUF_SIZE); read_via_memwin(sc, 2, addr, buf, n); rc = copyout(buf, dst, n); if (rc != 0) break; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } #undef MAX_READ_BUF_SIZE static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } static int in_range(int val, int lo, int hi) { return (val < 0 || (val <= hi && val >= lo)); } static int set_sched_class_config(struct adapter *sc, int minmax) { int rc; if (minmax < 0) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); if (rc) return (rc); rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); end_synchronized_op(sc, 0); return (rc); } static int set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, int sleep_ok) { int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; struct port_info *pi; struct tx_sched_class *tc; if (p->level == SCHED_CLASS_LEVEL_CL_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; else if (p->level == SCHED_CLASS_LEVEL_CH_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; else return (EINVAL); if (p->mode == SCHED_CLASS_MODE_CLASS) fw_mode = FW_SCHED_PARAMS_MODE_CLASS; else if (p->mode == SCHED_CLASS_MODE_FLOW) fw_mode = FW_SCHED_PARAMS_MODE_FLOW; else return (EINVAL); if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; else return (EINVAL); if (p->ratemode == SCHED_CLASS_RATEMODE_REL) fw_ratemode = FW_SCHED_PARAMS_RATE_REL; else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; else return (EINVAL); /* Vet our parameters ... */ if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) return (ERANGE); pi = sc->port[sc->chan_map[p->channel]]; if (pi == NULL) return (ENXIO); MPASS(pi->tx_chan == p->channel); top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ if (!in_range(p->cl, 0, sc->chip_params->nsched_cls) || !in_range(p->minrate, 0, top_speed) || !in_range(p->maxrate, 0, top_speed) || !in_range(p->weight, 0, 100)) return (ERANGE); /* * Translate any unset parameters into the firmware's * nomenclature and/or fail the call if the parameters * are required ... */ if (p->rateunit < 0 || p->ratemode < 0 || p->channel < 0 || p->cl < 0) return (EINVAL); if (p->minrate < 0) p->minrate = 0; if (p->maxrate < 0) { if (p->level == SCHED_CLASS_LEVEL_CL_RL || p->level == SCHED_CLASS_LEVEL_CH_RL) return (EINVAL); else p->maxrate = 0; } if (p->weight < 0) { if (p->level == SCHED_CLASS_LEVEL_CL_WRR) return (EINVAL); else p->weight = 0; } if (p->pktsize < 0) { if (p->level == SCHED_CLASS_LEVEL_CL_RL || p->level == SCHED_CLASS_LEVEL_CH_RL) return (EINVAL); else p->pktsize = 0; } rc = begin_synchronized_op(sc, NULL, sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); if (rc) return (rc); tc = &pi->tc[p->cl]; tc->params = *p; rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate, p->weight, p->pktsize, sleep_ok); if (rc == 0) tc->flags |= TX_SC_OK; else { /* * Unknown state at this point, see tc->params for what was * attempted. */ tc->flags &= ~TX_SC_OK; } end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); return (rc); } static int set_sched_class(struct adapter *sc, struct t4_sched_params *p) { if (p->type != SCHED_CLASS_TYPE_PACKET) return (EINVAL); if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) return (set_sched_class_config(sc, p->u.config.minmax)); if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) return (set_sched_class_params(sc, &p->u.params, 1)); return (EINVAL); } static int set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) { struct port_info *pi = NULL; struct vi_info *vi; struct sge_txq *txq; uint32_t fw_mnem, fw_queue, fw_class; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq"); if (rc) return (rc); if (p->port >= sc->params.nports) { rc = EINVAL; goto done; } /* XXX: Only supported for the main VI. */ pi = sc->port[p->port]; vi = &pi->vi[0]; if (!(vi->flags & VI_INIT_DONE)) { /* tx queues not set up yet */ rc = EAGAIN; goto done; } if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) { rc = EINVAL; goto done; } /* * Create a template for the FW_PARAMS_CMD mnemonic and value (TX * Scheduling Class in this case). */ fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH)); fw_class = p->cl < 0 ? 0xffffffff : p->cl; /* * If op.queue is non-negative, then we're only changing the scheduling * on a single specified TX queue. */ if (p->queue >= 0) { txq = &sc->sge.txq[vi->first_txq + p->queue]; fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); goto done; } /* * Change the scheduling on all the TX queues for the * interface. */ for_each_txq(vi, i, txq) { fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); if (rc) goto done; } rc = 0; done: end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(const struct adapter *sc, int idx) { struct port_info *pi = sc->port[idx]; struct vi_info *vi; struct ifnet *ifp; int v; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; for_each_vi(pi, v, vi) { build_medialist(pi, &vi->media); } ifp = pi->vi[0].ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(ifp, "%s transceiver inserted.\n", mod_str[pi->mod_type]); } else { if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason) { struct port_info *pi = sc->port[idx]; struct vi_info *vi; struct ifnet *ifp; int v; if (link_stat) pi->linkdnrc = -1; else { if (reason >= 0) pi->linkdnrc = reason; } for_each_vi(pi, v, vi) { ifp = vi->ifp; if (ifp == NULL) continue; if (link_stat) { ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); } } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; sx_slock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } sx_sunlock(&t4_list_lock); } static int -t4_open(struct cdev *dev, int flags, int type, struct thread *td) -{ - return (0); -} - -static int -t4_close(struct cdev *dev, int flags, int type, struct thread *td) -{ - return (0); -} - -static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE; uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i, v; u_int port_id = *(uint32_t *)data; struct port_info *pi; struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; mtx_lock(&sc->reg_lock); for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) t4_clr_vi_stats(sc, vi->viid); } mtx_unlock(&sc->reg_lock); /* * Since this command accepts a port, clear stats for * all VIs on this port. */ for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(vi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts0_wrs = 0; txq->txpkts1_wrs = 0; txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; mp_ring_reset_stats(txq->r); } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(vi, i, wrq) { wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } #endif if (IS_MAIN_VI(vi)) { wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } } } break; } case CHELSIO_T4_SCHED_CLASS: rc = set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_SET_TRACER: rc = t4_set_tracer(sc, (struct t4_tracer *)data); break; default: - rc = EINVAL; + rc = ENOTTY; } return (rc); } void t4_db_full(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } void t4_db_dropped(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } #ifdef TCP_OFFLOAD void t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order) { t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) | V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) | V_HPZ3(pgsz_order[3])); } static int toe_capability(struct vi_info *vi, int enable) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { /* TOE is already enabled. */ return (0); } /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ if (!(vi->flags & VI_INIT_DONE)) { rc = vi_full_init(vi); if (rc) return (rc); } if (!(pi->vi[0].flags & VI_INIT_DONE)) { rc = vi_full_init(&pi->vi[0]); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) { /* TOE is enabled on another VI of this port. */ pi->uld_vis++; return (0); } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM activated but flag not set", __func__)); } /* Activate iWARP and iSCSI too, if the modules are loaded. */ if (!uld_active(sc, ULD_IWARP)) (void) t4_activate_uld(sc, ULD_IWARP); if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { pi->uld_vis--; if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = EAGAIN; /* kldoad the module with this ULD and try again. */ sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { if (!(sc->flags & FULL_INIT_DONE)) { rc = adapter_full_init(sc); if (rc != 0) break; } rc = ui->activate(sc); if (rc == 0) { setbit(&sc->active_ulds, id); ui->refcount++; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = ENXIO; sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) { clrbit(&sc->active_ulds, id); ui->refcount--; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int uld_active(struct adapter *sc, int uld_id) { MPASS(uld_id >= 0 && uld_id <= ULD_MAX); return (isset(&sc->active_ulds, uld_id)); } #endif /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq10g < 1) { #ifdef RSS t4_ntxq10g = rss_getnumbuckets(); #else t4_ntxq10g = min(nc, NTXQ_10G); #endif } if (t4_ntxq1g < 1) { #ifdef RSS /* XXX: way too many for 1GbE? */ t4_ntxq1g = rss_getnumbuckets(); #else t4_ntxq1g = min(nc, NTXQ_1G); #endif } if (t4_ntxq_vi < 1) t4_ntxq_vi = min(nc, NTXQ_VI); if (t4_nrxq10g < 1) { #ifdef RSS t4_nrxq10g = rss_getnumbuckets(); #else t4_nrxq10g = min(nc, NRXQ_10G); #endif } if (t4_nrxq1g < 1) { #ifdef RSS /* XXX: way too many for 1GbE? */ t4_nrxq1g = rss_getnumbuckets(); #else t4_nrxq1g = min(nc, NRXQ_1G); #endif } if (t4_nrxq_vi < 1) t4_nrxq_vi = min(nc, NRXQ_VI); #ifdef TCP_OFFLOAD if (t4_nofldtxq10g < 1) t4_nofldtxq10g = min(nc, NOFLDTXQ_10G); if (t4_nofldtxq1g < 1) t4_nofldtxq1g = min(nc, NOFLDTXQ_1G); if (t4_nofldtxq_vi < 1) t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI); if (t4_nofldrxq10g < 1) t4_nofldrxq10g = min(nc, NOFLDRXQ_10G); if (t4_nofldrxq1g < 1) t4_nofldrxq1g = min(nc, NOFLDRXQ_1G); if (t4_nofldrxq_vi < 1) t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; if (t4_rdmacaps_allowed == -1) { t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | FW_CAPS_CONFIG_RDMA_RDMAC; } if (t4_iscsicaps_allowed == -1) { t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU | FW_CAPS_CONFIG_ISCSI_TARGET_PDU | FW_CAPS_CONFIG_ISCSI_T10DIF; } #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; if (t4_rdmacaps_allowed == -1) t4_rdmacaps_allowed = 0; if (t4_iscsicaps_allowed == -1) t4_iscsicaps_allowed = 0; #endif #ifdef DEV_NETMAP if (t4_nnmtxq_vi < 1) t4_nnmtxq_vi = min(nc, NNMTXQ_VI); if (t4_nnmrxq_vi < 1) t4_nnmrxq_vi = min(nc, NNMRXQ_VI); #endif if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS) t4_tmr_idx_10g = TMR_IDX_10G; if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS) t4_pktc_idx_10g = PKTC_IDX_10G; if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS) t4_tmr_idx_1g = TMR_IDX_1G; if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS) t4_pktc_idx_1g = PKTC_IDX_1G; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; } #ifdef DDB static void t4_dump_tcb(struct adapter *sc, int tid) { uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos; reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2); save = t4_read_reg(sc, reg); base = sc->memwin[2].mw_base; /* Dump TCB for the tid */ tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE); tcb_addr += tid * TCB_SIZE; if (is_t4(sc)) { pf = 0; win_pos = tcb_addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); win_pos = tcb_addr & ~0x7f; /* start must be 128B aligned */ } t4_write_reg(sc, reg, win_pos | pf); t4_read_reg(sc, reg); off = tcb_addr - win_pos; for (i = 0; i < 4; i++) { uint32_t buf[8]; for (j = 0; j < 8; j++, off += 4) buf[j] = htonl(t4_read_reg(sc, base + off)); db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } t4_write_reg(sc, reg, save); t4_read_reg(sc, reg); } static void t4_dump_devlog(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e e; int i, first, j, m, nentries, rc; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { db_printf("devlog params not valid\n"); return; } nentries = dparams->size / sizeof(struct fw_devlog_e); m = fwmtype_to_hwmtype(dparams->memtype); /* Find the first entry. */ first = -1; for (i = 0; i < nentries && !db_pager_quit; i++) { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) break; if (e.timestamp == 0) break; e.timestamp = be64toh(e.timestamp); if (e.timestamp < ftstamp) { ftstamp = e.timestamp; first = i; } } if (first == -1) return; i = first; do { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) return; if (e.timestamp == 0) return; e.timestamp = be64toh(e.timestamp); e.seqno = be32toh(e.seqno); for (j = 0; j < 8; j++) e.params[j] = be32toh(e.params[j]); db_printf("%10d %15ju %8s %8s ", e.seqno, e.timestamp, (e.level < nitems(devlog_level_strings) ? devlog_level_strings[e.level] : "UNKNOWN"), (e.facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e.facility] : "UNKNOWN")); db_printf(e.fmt, e.params[0], e.params[1], e.params[2], e.params[3], e.params[4], e.params[5], e.params[6], e.params[7]); if (++i == nentries) i = 0; } while (i != first && !db_pager_quit); } static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table); _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table); DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL) { device_t dev; int t; bool valid; valid = false; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); valid = true; } db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 devlog \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } t4_dump_devlog(device_get_softc(dev)); } DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL) { device_t dev; int radix, tid, t; bool valid; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); t = db_read_token(); if (t == tNUMBER) { tid = db_tok_number; valid = true; } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 tcb \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } if (tid < 0) { db_printf("invalid tid\n"); return; } t4_dump_tcb(device_get_softc(dev), tid); } #endif static struct sx mlu; /* mod load unload */ SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload"); static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: sx_xlock(&mlu); if (loaded++ == 0) { t4_sge_modload(); t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl); t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl); t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt); t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt); sx_init(&t4_list_lock, "T4/T5 adapters"); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD sx_init(&t4_uld_list_lock, "T4/T5 ULDs"); SLIST_INIT(&t4_uld_list); #endif t4_tracer_modload(); tweak_tunables(); } sx_xunlock(&mlu); break; case MOD_UNLOAD: sx_xlock(&mlu); if (--loaded == 0) { int tries; sx_slock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; sx_sunlock(&t4_list_lock); goto done_unload; } #ifdef TCP_OFFLOAD sx_slock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; sx_sunlock(&t4_uld_list_lock); sx_sunlock(&t4_list_lock); goto done_unload; } #endif tries = 0; while (tries++ < 5 && t4_sge_extfree_refs() != 0) { uprintf("%ju clusters with custom free routine " "still is use.\n", t4_sge_extfree_refs()); pause("t4unload", 2 * hz); } #ifdef TCP_OFFLOAD sx_sunlock(&t4_uld_list_lock); #endif sx_sunlock(&t4_list_lock); if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); #ifdef TCP_OFFLOAD sx_destroy(&t4_uld_list_lock); #endif sx_destroy(&t4_list_lock); t4_sge_modunload(); loaded = 0; } else { rc = EBUSY; loaded++; /* undo earlier decrement */ } } done_unload: sx_xunlock(&mlu); break; } return (rc); } static devclass_t t4_devclass, t5_devclass; static devclass_t cxgbe_devclass, cxl_devclass; static devclass_t vcxgbe_devclass, vcxl_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t4nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t5nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); MODULE_VERSION(vcxgbe, 1); DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); MODULE_VERSION(vcxl, 1); Index: user/alc/PQ_LAUNDRY/sys/dev/e1000/if_em.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/e1000/if_em.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/e1000/if_em.c (revision 303642) @@ -1,6235 +1,6236 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_em.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #ifdef DDB #include #include #endif #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "if_em.h" /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static em_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *em_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int em_probe(device_t); static int em_attach(device_t); static int em_detach(device_t); static int em_shutdown(device_t); static int em_suspend(device_t); static int em_resume(device_t); #ifdef EM_MULTIQUEUE static int em_mq_start(if_t, struct mbuf *); static int em_mq_start_locked(if_t, struct tx_ring *); static void em_qflush(if_t); #else static void em_start(if_t); static void em_start_locked(if_t, struct tx_ring *); #endif static int em_ioctl(if_t, u_long, caddr_t); static uint64_t em_get_counter(if_t, ift_counter); static void em_init(void *); static void em_init_locked(struct adapter *); static void em_stop(void *); static void em_media_status(if_t, struct ifmediareq *); static int em_media_change(if_t); static void em_identify_hardware(struct adapter *); static int em_allocate_pci_resources(struct adapter *); static int em_allocate_legacy(struct adapter *); static int em_allocate_msix(struct adapter *); static int em_allocate_queues(struct adapter *); static int em_setup_msix(struct adapter *); static void em_free_pci_resources(struct adapter *); static void em_local_timer(void *); static void em_reset(struct adapter *); static int em_setup_interface(device_t, struct adapter *); static void em_flush_desc_rings(struct adapter *); static void em_setup_transmit_structures(struct adapter *); static void em_initialize_transmit_unit(struct adapter *); static int em_allocate_transmit_buffers(struct tx_ring *); static void em_free_transmit_structures(struct adapter *); static void em_free_transmit_buffers(struct tx_ring *); static int em_setup_receive_structures(struct adapter *); static int em_allocate_receive_buffers(struct rx_ring *); static void em_initialize_receive_unit(struct adapter *); static void em_free_receive_structures(struct adapter *); static void em_free_receive_buffers(struct rx_ring *); static void em_enable_intr(struct adapter *); static void em_disable_intr(struct adapter *); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static void em_txeof(struct tx_ring *); static bool em_rxeof(struct rx_ring *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct rx_ring *); #endif static void em_setup_rxdesc(union e1000_rx_desc_extended *, const struct em_rxbuffer *rxbuf); static void em_receive_checksum(uint32_t status, struct mbuf *); static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, struct ip *, u32 *, u32 *); static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, struct tcphdr *, u32 *, u32 *); static void em_set_promisc(struct adapter *); static void em_disable_promisc(struct adapter *); static void em_set_multi(struct adapter *); static void em_update_link_status(struct adapter *); static void em_refresh_mbufs(struct rx_ring *, int); static void em_register_vlan(void *, if_t, u16); static void em_unregister_vlan(void *, if_t, u16); static void em_setup_vlan_hw_support(struct adapter *); static int em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(device_t); static void em_enable_wakeup(device_t); static int em_enable_phy_wakeup(struct adapter *); static void em_led_func(void *, int); static void em_disable_aspm(struct adapter *); static int em_irq_fast(void *); /* MSIX handlers */ static void em_msix_tx(void *); static void em_msix_rx(void *); static void em_msix_link(void *); static void em_handle_tx(void *context, int pending); static void em_handle_rx(void *context, int pending); static void em_handle_link(void *context, int pending); #ifdef EM_MULTIQUEUE static void em_enable_vectors_82574(struct adapter *); #endif static void em_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static __inline void em_rx_discard(struct rx_ring *, int); #ifdef DEVICE_POLLING static poll_handler_t em_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_probe, em_probe), DEVMETHOD(device_attach, em_attach), DEVMETHOD(device_detach, em_detach), DEVMETHOD(device_shutdown, em_shutdown), DEVMETHOD(device_suspend, em_suspend), DEVMETHOD(device_resume, em_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(em, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif #define TSO_WORKAROUND 4 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_rxd = EM_DEFAULT_RXD; static int em_txd = EM_DEFAULT_TXD; SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, "Number of transmit descriptors per queue"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); static int em_enable_msix = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, "Enable MSI-X interrupts"); #ifdef EM_MULTIQUEUE static int em_num_queues = 1; SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, "82574 only: Number of queues to configure, 0 indicates autoconfigure"); #endif /* ** Global variable to store last used CPU when binding queues ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a ** queue is bound to a cpu. */ static int em_last_bind_cpu = -1; /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * em_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int em_probe(device_t dev) { char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != EM_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = em_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", em_strings[ent->index], em_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_attach(device_t dev) { struct adapter *adapter; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_attach: begin"); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ em_identify_hardware(adapter); /* Setup PCI resources */ if (em_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type == e1000_pch_spt) { adapter->osdep.flash_bus_space_tag = adapter->osdep.mem_bus_space_tag; adapter->osdep.flash_bus_space_handle = adapter->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, TRUE); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } /* * Setup MSI/X or MSI if PCI Express */ adapter->msix = em_setup_msix(adapter); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); /* Sysctl for limiting the amount of work done in the taskqueue */ em_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, em_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, em_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = em_txd; if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, em_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; } else adapter->num_rx_desc = em_rxd; hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* ** Get queue/ring memory */ if (em_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* ** Do interrupt configuration */ if (adapter->msix > 1) /* Do MSIX */ error = em_allocate_msix(adapter); else /* MSI or Legacy */ error = em_allocate_legacy(adapter); if (error) goto err_late; /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(dev); /* Setup OS specific network interface */ if (em_setup_interface(dev, adapter) != 0) goto err_late; em_reset(adapter); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_update_link_status(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); /* Tell the stack that the interface is not active */ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); adapter->led_dev = led_create(em_led_func, adapter, device_get_nameunit(dev)); #ifdef DEV_NETMAP em_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("em_attach: end"); return (0); err_late: em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); if (adapter->ifp != (void *)NULL) if_free(adapter->ifp); err_pci: em_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); EM_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); EM_CORE_LOCK(adapter); adapter->in_detach = 1; em_stop(adapter); EM_CORE_UNLOCK(adapter); EM_CORE_LOCK_DESTROY(adapter); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ em_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); free(adapter->mta, M_DEVBUF); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_shutdown(device_t dev) { return em_suspend(dev); } /* * Suspend/resume device methods. */ static int em_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(dev); EM_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int em_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; EM_CORE_LOCK(adapter); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_init_locked(adapter); em_init_manageability(adapter); if ((if_getflags(ifp) & IFF_UP) && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } #ifndef EM_MULTIQUEUE static void em_start_locked(if_t ifp, struct tx_ring *txr) { struct adapter *adapter = if_getsoftc(ifp); struct mbuf *m_head; EM_TX_LOCK_ASSERT(txr); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; while (!if_sendq_empty(ifp)) { /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) { if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); break; } m_head = if_dequeue(ifp); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (em_xmit(txr, &m_head)) { if (m_head == NULL) break; if_sendq_prepend(ifp, m_head); break; } /* Mark the queue as having work */ if (txr->busy == EM_TX_IDLE) txr->busy = EM_TX_BUSY; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } return; } static void em_start(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_TX_LOCK(txr); em_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } return; } #else /* EM_MULTIQUEUE */ /********************************************************************* * Multiqueue Transmit routines * * em_mq_start is called by the stack to initiate a transmit. * however, if busy the driver can queue the request rather * than do an immediate send. It is this that is an advantage * in this driver, rather than also having multiple tx queues. **********************************************************************/ /* ** Multiqueue capable stack interface */ static int em_mq_start(if_t ifp, struct mbuf *m) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; unsigned int i, error; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % adapter->num_queues; else i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; error = drbr_enqueue(ifp, txr->br, m); if (error) return (error); if (EM_TX_TRYLOCK(txr)) { em_mq_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } else taskqueue_enqueue(txr->tq, &txr->tx_task); return (0); } static int em_mq_start_locked(if_t ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; EM_TX_LOCK_ASSERT(txr); if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) { return (ENETDOWN); } /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = em_xmit(txr, &next)) != 0) { if (next == NULL) { /* It was freed, move forward */ drbr_advance(ifp, txr->br); } else { /* * Still have one left, it may not be * the same since the transmit function * may have changed it. */ drbr_putback(ifp, txr->br, next); } break; } drbr_advance(ifp, txr->br); enq++; if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); if (next->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); ETHER_BPF_MTAP(ifp, next); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; } /* Mark the queue as having work */ if ((enq > 0) && (txr->busy == EM_TX_IDLE)) txr->busy = EM_TX_BUSY; if (txr->tx_avail < EM_MAX_SCATTER) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) { if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); } return (err); } /* ** Flush all ring buffers */ static void em_qflush(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); EM_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* EM_MULTIQUEUE */ /********************************************************************* * Ioctl entry point * * em_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int em_ioctl(if_t ifp, u_long command, caddr_t data) { struct adapter *adapter = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp,IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) em_init(adapter); #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; /* Adapters that do not support jumbo frames */ case e1000_ich8lan: max_frame_size = ETHER_MAX_LEN; break; default: max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { EM_CORE_UNLOCK(adapter); error = EINVAL; break; } if_setmtu(ifp, ifr->ifr_mtu); adapter->hw.mac.max_frame_size = if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); EM_CORE_LOCK(adapter); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { em_disable_promisc(adapter); em_set_promisc(adapter); } } else em_init_locked(adapter); } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) em_stop(adapter); adapter->if_flags = if_getflags(ifp); EM_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_CORE_LOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); #ifdef DEVICE_POLLING if (!(if_getcapenable(ifp) & IFCAP_POLLING)) #endif em_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ EM_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { EM_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } EM_CORE_UNLOCK(adapter); /* falls thru */ case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(em_poll, ifp); if (error) return (error); EM_CORE_LOCK(adapter); em_disable_intr(adapter); if_setcapenablebit(ifp, IFCAP_POLLING, 0); EM_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ EM_CORE_LOCK(adapter); em_enable_intr(adapter); if_setcapenablebit(ifp, 0, IFCAP_POLLING); EM_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { if_togglecapenable(ifp,IFCAP_HWCSUM); reinit = 1; } if (mask & IFCAP_TSO4) { if_togglecapenable(ifp,IFCAP_TSO4); reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); reinit = 1; } if (mask & IFCAP_VLAN_HWTSO) { if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); reinit = 1; } if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) if_togglecapenable(ifp, IFCAP_WOL_MCAST); if (mask & IFCAP_WOL_MAGIC) if_togglecapenable(ifp, IFCAP_WOL_MAGIC); } if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) em_init(adapter); if_vlancap(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_init_locked(struct adapter *adapter) { if_t ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("em_init: begin"); EM_CORE_LOCK_ASSERT(adapter); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(adapter); em_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); /* ** There have proven to be problems with TSO when not ** at full gigabit speed, so disable the assist automatically ** when at lower speeds. -jfv */ if (if_getcapenable(ifp) & IFCAP_TSO4) { if (adapter->link_speed == SPEED_1000) if_sethwassistbits(ifp, CSUM_TSO, 0); } /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_setup_transmit_structures(adapter); em_initialize_transmit_unit(adapter); /* Setup Multicast table */ em_set_multi(adapter); /* ** Figure out the desired mbuf ** pool for doing jumbos */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (em_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); em_stop(adapter); return; } em_initialize_receive_unit(adapter); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_set_promisc(adapter); /* Set the interface as ACTIVE */ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); callout_reset(&adapter->timer, hz, em_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp; tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (if_getcapenable(ifp) & IFCAP_POLLING) em_disable_intr(adapter); else #endif /* DEVICE_POLLING */ em_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); } static void em_init(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine: note this only works with single queue * *********************************************************************/ static int em_poll(if_t ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 reg_icr; int rx_done; EM_CORE_LOCK(adapter); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { EM_CORE_UNLOCK(adapter); return (0); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); } } EM_CORE_UNLOCK(adapter); em_rxeof(rxr, count, &rx_done); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ static int em_irq_fast(void *arg) { struct adapter *adapter = arg; if_t ifp; u32 reg_icr; ifp = adapter->ifp; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; em_disable_intr(adapter); taskqueue_enqueue(adapter->tq, &adapter->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } /* Combined RX/TX handler, used by Legacy and MSI */ static void em_handle_que(void *context, int pending) { struct adapter *adapter = context; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(adapter->tq, &adapter->que_task); return; } } em_enable_intr(adapter); return; } /********************************************************************* * * MSIX Interrupt Service Routines * **********************************************************************/ static void em_msix_tx(void *arg) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; ++txr->tx_irq; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); return; } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static void em_msix_rx(void *arg) { struct rx_ring *rxr = arg; struct adapter *adapter = rxr->adapter; bool more; ++rxr->rx_irq; if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) return; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } return; } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static void em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; em_handle_link(adapter, 0); } else E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); /* ** Because we must read the ICR for this interrupt ** it may clear other causes using autoclear, for ** this reason we simply create a soft interrupt ** for all these vectors. */ if (reg_icr) { E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } return; } static void em_handle_rx(void *context, int pending) { struct rx_ring *rxr = context; struct adapter *adapter = rxr->adapter; bool more; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } } static void em_handle_tx(void *context, int pending) { struct tx_ring *txr = context; struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); } static void em_handle_link(void *context, int pending) { struct adapter *adapter = context; struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return; EM_CORE_LOCK(adapter); callout_stop(&adapter->timer); em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); if (adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_media_status(if_t ifp, struct ifmediareq *ifmr) { struct adapter *adapter = if_getsoftc(ifp); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_media_status: begin"); EM_CORE_LOCK(adapter); em_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { EM_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_media_change(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("em_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_txbuffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; struct ether_header *eh; struct ip *ip = NULL; struct tcphdr *tp = NULL; u32 txd_upper = 0, txd_lower = 0; int ip_off, poff; int nsegs, i, j, first, last = 0; int error; bool do_tso, tso_desc, remap = TRUE; m_head = *m_headp; do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); tso_desc = FALSE; ip_off = poff = 0; /* * Intel recommends entire IP/TCP header length reside in a single * buffer. If multiple descriptors are used to describe the IP and * TCP header, each descriptor should describe one or more * complete headers; descriptors referencing only parts of headers * are not supported. If all layer headers are not coalesced into * a single buffer, each buffer should not cross a 4KB boundary, * or be larger than the maximum read request size. * Controller also requires modifing IP/TCP header to make TSO work * so we firstly get a writable mbuf chain then coalesce ethernet/ * IP/TCP header into a single buffer to meet the requirement of * controller. This also simplifies IP/TCP/UDP checksum offloading * which also has similar restrictions. */ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { if (do_tso || (m_head->m_next != NULL && m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { if (M_WRITABLE(*m_headp) == 0) { m_head = m_dup(*m_headp, M_NOWAIT); m_freem(*m_headp); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } *m_headp = m_head; } } /* * XXX * Assume IPv4, we don't have TSO/checksum offload support * for IPv6 yet. */ ip_off = sizeof(struct ether_header); if (m_head->m_len < ip_off) { m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } eh = mtod(m_head, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); if (m_head->m_len < ip_off) { m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } } if (m_head->m_len < ip_off + sizeof(struct ip)) { m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { if (m_head->m_len < poff + sizeof(struct tcphdr)) { m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } tp = (struct tcphdr *)(mtod(m_head, char *) + poff); /* * TSO workaround: * pull 4 more bytes of data into it. */ if (m_head->m_len < poff + (tp->th_off << 2)) { m_head = m_pullup(m_head, poff + (tp->th_off << 2) + TSO_WORKAROUND); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); tp = (struct tcphdr *)(mtod(m_head, char *) + poff); if (do_tso) { ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + (ip->ip_hl << 2) + (tp->th_off << 2)); ip->ip_sum = 0; /* * The pseudo TCP checksum does not include TCP * payload length so driver should recompute * the checksum here what hardware expect to * see. This is adherence of Microsoft's Large * Send specification. */ tp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { if (m_head->m_len < poff + sizeof(struct udphdr)) { m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); } *m_headp = m_head; } /* * Map the packet for DMA * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* * There are two types of errors we can (try) to handle: * - EFBIG means the mbuf chain was too long and bus_dma ran * out of segments. Defragment the mbuf chain and try again. * - ENOMEM means bus_dma could not obtain enough bounce buffers * at this point in time. Defer sending and try again later. * All other errors, in particular EINVAL, are fatal and prevent the * mbuf chain from ever going through. Drop it and report error. */ if (error == EFBIG && remap) { struct mbuf *m; m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again, but only once */ remap = FALSE; goto retry; } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((!do_tso) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; txr->tx_tso = FALSE; } if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* Do hardware assists */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { em_tso_setup(txr, m_head, ip_off, ip, tp, &txd_upper, &txd_lower); /* we need to make a final sentinel transmit desc */ tso_desc = TRUE; } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) em_transmit_checksum_setup(txr, m_head, ip_off, ip, &txd_upper, &txd_lower); if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= htole16(if_getvtag(m_head)) << 16; /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } i = txr->next_avail_desc; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; tx_buffer = &txr->tx_buffers[i]; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; /* ** TSO Workaround: ** If this is the last descriptor, we want to ** split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == adapter->num_tx_desc) i = 0; /* Now make the sentinel */ txr->tx_avail--; ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | TSO_WORKAROUND); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } txr->next_avail_desc = i; txr->tx_avail -= nsegs; tx_buffer->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &txr->tx_buffers[first]; tx_buffer->next_eop = last; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void em_set_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (if_getflags(ifp) & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (if_getflags(ifp) & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } } static void em_disable_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_set_multi(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_local_timer(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 trigger = 0; EM_CORE_LOCK_ASSERT(adapter); em_update_link_status(adapter); em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Mask to use in the irq trigger */ if (adapter->msix_mem) { for (int i = 0; i < adapter->num_queues; i++, rxr++) trigger |= rxr->ims; rxr = adapter->rx_rings; } else trigger = E1000_ICS_RXDMT0; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { if (txr->busy == EM_TX_HUNG) goto hung; if (txr->busy >= EM_TX_MAXTRIES) txr->busy = EM_TX_HUNG; /* Schedule a TX tasklet if needed */ if (txr->tx_avail <= EM_MAX_SCATTER) taskqueue_enqueue(txr->tq, &txr->tx_task); } callout_reset(&adapter->timer, hz, em_local_timer, adapter); #ifndef DEVICE_POLLING /* Trigger an RX interrupt to guarantee mbuf refresh */ E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); #endif return; hung: /* Looks like we're hung */ device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", txr->me); em_print_debug_info(adapter); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); adapter->watchdog_events++; em_init_locked(adapter); } static void em_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if_t ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable hang detection */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->busy = EM_TX_IDLE; if_link_state_change(ifp, LINK_STATE_DOWN); } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_stop(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("em_stop: begin"); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); /* Disarm Hang Detection. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); txr->busy = EM_TX_IDLE; EM_TX_UNLOCK(txr); } /* I219 needs some special flushing to avoid hangs */ if (adapter->hw.mac.type == e1000_pch_spt) em_flush_desc_rings(adapter); e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ int em_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); if (adapter->msix == 1) /* using MSI */ rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Allocate a fast interrupt and the associated * deferred processing contexts. */ TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); /* Use a TX only tasklet for local timer */ TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", device_get_nameunit(adapter->dev)); TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers * This is not really Multiqueue, rather * its just separate interrupt vectors * for TX, RX, and Link. * **********************************************************************/ int em_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; int error, rid, vector = 0; int cpu_id = 0; /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* First set up ring resources */ for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { /* RX ring */ rid = vector + 1; rxr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (rxr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "RX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, rxr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, rxr, &rxr->tag)) != 0) { device_printf(dev, "Failed to register RX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); #endif rxr->msix = vector; if (em_last_bind_cpu < 0) em_last_bind_cpu = CPU_FIRST(); cpu_id = em_last_bind_cpu; bus_bind_intr(dev, rxr->res, cpu_id); TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, taskqueue_thread_enqueue, &rxr->tq); taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 ** are for RX0 and RX1, note this has ** NOTHING to do with the MSIX vector */ rxr->ims = 1 << (20 + i); adapter->ims |= rxr->ims; adapter->ivars |= (8 | rxr->msix) << (i * 4); em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { /* TX ring */ rid = vector + 1; txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (txr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "TX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, txr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, txr, &txr->tag)) != 0) { device_printf(dev, "Failed to register TX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); #endif txr->msix = vector; if (em_last_bind_cpu < 0) em_last_bind_cpu = CPU_FIRST(); cpu_id = em_last_bind_cpu; bus_bind_intr(dev, txr->res, cpu_id); TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 22 and 23 ** are for TX0 and TX1, note this has ** NOTHING to do with the MSIX vector */ txr->ims = 1 << (22 + i); adapter->ims |= txr->ims; adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } /* Link interrupt */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate " "bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; adapter->ivars |= (8 | vector) << 16; adapter->ivars |= 0x80000000; return (0); } static void em_free_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr; struct rx_ring *rxr; int rid; /* ** Release all the queue interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; /* an early abort? */ if (txr == NULL) break; rid = txr->msix +1; if (txr->tag != NULL) { bus_teardown_intr(dev, txr->res, txr->tag); txr->tag = NULL; } if (txr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res); rxr = &adapter->rx_rings[i]; /* an early abort? */ if (rxr == NULL) break; rid = rxr->msix +1; if (rxr->tag != NULL) { bus_teardown_intr(dev, rxr->res, rxr->tag); rxr->tag = NULL; } if (rxr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res); } if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); if (adapter->flash != NULL) bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); } /* * Setup MSI or MSI/X */ static int em_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int val; /* Nearly always going to use one queue */ adapter->num_queues = 1; /* ** Try using MSI-X for Hartwell adapters */ if ((adapter->hw.mac.type == e1000_82574) && (em_enable_msix == TRUE)) { #ifdef EM_MULTIQUEUE adapter->num_queues = (em_num_queues == 1) ? 1 : 2; if (adapter->num_queues > 1) em_enable_vectors_82574(adapter); #endif /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } val = pci_msix_count(dev); #ifdef EM_MULTIQUEUE /* We need 5 vectors in the multiqueue case */ if (adapter->num_queues > 1 ) { if (val >= 5) val = 5; else { adapter->num_queues = 1; device_printf(adapter->dev, "Insufficient MSIX vectors for >1 queue, " "using single queue...\n"); goto msix_one; } } else { msix_one: #endif if (val >= 3) val = 3; else { device_printf(adapter->dev, "Insufficient MSIX vectors, using MSI\n"); goto msi; } #ifdef EM_MULTIQUEUE } #endif if ((pci_alloc_msix(dev, &val) == 0)) { device_printf(adapter->dev, "Using MSIX interrupts " "with %d vectors\n", val); return (val); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); } msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } val = 1; if (pci_alloc_msi(dev, &val) == 0) { device_printf(adapter->dev, "Using an MSI interrupt\n"); return (val); } /* Should only happen due to manual configuration */ device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); return (0); } /* ** The 3 following flush routines are used as a workaround in the ** I219 client parts and only for them. ** ** em_flush_tx_ring - remove all descriptors from the tx_ring ** ** We want to clear all pending descriptors from the TX ring. ** zeroing happens when the HW reads the regs. We assign the ring itself as ** the data of the next descriptor. We don't care about the data we are about ** to reset the HW. */ static void em_flush_tx_ring(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct tx_ring *txr = adapter->tx_rings; struct e1000_tx_desc *txd; u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; u16 size = 512; tctl = E1000_READ_REG(hw, E1000_TCTL); E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); txd = &txr->tx_base[txr->next_avail_desc++]; if (txr->next_avail_desc == adapter->num_tx_desc) txr->next_avail_desc = 0; /* Just use the ring as a dummy buffer addr */ txd->buffer_addr = txr->txdma.dma_paddr; txd->lower.data = htole32(txd_lower | size); txd->upper.data = 0; /* flush descriptors to memory before notifying the HW */ wmb(); E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); mb(); usec_delay(250); } /* ** em_flush_rx_ring - remove all descriptors from the rx_ring ** ** Mark all descriptors in the RX ring as consumed and disable the rx ring */ static void em_flush_rx_ring(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 rctl, rxdctl; rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); E1000_WRITE_FLUSH(hw); usec_delay(150); rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); /* zero the lower 14 bits (prefetch and host thresholds) */ rxdctl &= 0xffffc000; /* * update thresholds: prefetch threshold to 31, host threshold to 1 * and make sure the granularity is "descriptors" and not "cache lines" */ rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); /* momentarily enable the RX ring for the changes to take effect */ E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); E1000_WRITE_FLUSH(hw); usec_delay(150); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); } /* ** em_flush_desc_rings - remove all descriptors from the descriptor rings ** ** In i219, the descriptor rings must be emptied before resetting the HW ** or before changing the device state to D3 during runtime (runtime PM). ** ** Failure to do this will cause the HW to enter a unit hang state which can ** only be released by PCI reset on the device ** */ static void em_flush_desc_rings(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; device_t dev = adapter->dev; u16 hang_state; u32 fext_nvm11, tdlen; /* First, disable MULR fix in FEXTNVM11 */ fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); /* do nothing if we're not in faulty state, or if the queue is empty */ tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) return; em_flush_tx_ring(adapter); /* recheck, maybe the fault is caused by the rx ring */ hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); if (hang_state & FLUSH_DESC_REQUIRED) em_flush_rx_ring(adapter); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void em_reset(struct adapter *adapter) { device_t dev = adapter->dev; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: pba = E1000_PBA_26K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* else fall thru */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* I219 needs some special flushing to avoid hangs */ if (hw->mac.type == e1000_pch_spt) em_flush_desc_rings(adapter); /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); em_disable_aspm(adapter); /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(device_t dev, struct adapter *adapter) { if_t ifp; INIT_DEBUGOUT("em_setup_interface: begin"); ifp = adapter->ifp = if_gethandle(IFT_ETHER); if (ifp == 0) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setdev(ifp, dev); if_setinitfn(ifp, em_init); if_setsoftc(ifp, adapter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, em_ioctl); if_setgetcounterfn(ifp, em_get_counter); /* TSO parameters */ ifp->if_hw_tsomax = IP_MAXPACKET; /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; #ifdef EM_MULTIQUEUE /* Multiqueue stack interface */ if_settransmitfn(ifp, em_mq_start); if_setqflushfn(ifp, em_qflush); #else if_setstartfn(ifp, em_start); if_setsendqlen(ifp, adapter->num_tx_desc - 1); if_setsendqready(ifp); #endif ether_ifattach(ifp, adapter->hw.mac.addr); if_setcapabilities(ifp, 0); if_setcapenable(ifp, 0); if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4, 0); /* * Tell the upper layer(s) we * support full VLAN capability */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU, 0); if_setcapenable(ifp, if_getcapabilities(ifp)); /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); #ifdef DEVICE_POLLING if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); #endif /* Enable only WOL MAGIC by default */ if (adapter->wol) { if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, em_media_change, em_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (error); } static void em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); dma->dma_paddr = 0; } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_vaddr = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int em_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* Allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); if (em_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #if __FreeBSD_version >= 800000 /* Allocate a buf ring */ txr->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (em_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) em_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) em_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #if __FreeBSD_version >= 800000 buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int em_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct em_txbuffer *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ EM_TSO_SIZE, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ em_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void em_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_txbuffer *txbuf; int i; #ifdef DEV_NETMAP struct netmap_slot *slot; struct netmap_adapter *na = netmap_getna(adapter->ifp); #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ EM_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); txr->tx_base[i].buffer_addr = htole64(paddr); /* reload the map for netmap mode */ netmap_load_map(na, txr->txtag, txbuf->map, addr); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->next_eop = -1; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; txr->busy = EM_TX_IDLE; /* Clear checksum offload context. */ txr->last_hw_offload = 0; txr->last_hw_ipcss = 0; txr->last_hw_ipcso = 0; txr->last_hw_tucss = 0; txr->last_hw_tucso = 0; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); EM_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void em_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) em_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txr->busy = EM_TX_IDLE; txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); reg = E1000_READ_REG(hw, E1000_TARC(0)); reg |= E1000_TARC0_CB_MULTIQ_3_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void em_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); em_free_transmit_buffers(txr); em_dma_free(adapter, &txr->txdma); EM_TX_UNLOCK(txr); EM_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void em_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_txbuffer *txbuf; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; for (int i = 0; i < adapter->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; if (txbuf->map != NULL) { bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } else if (txbuf->map != NULL) { bus_dmamap_unload(txr->txtag, txbuf->map); bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } #if __FreeBSD_version >= 800000 if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static void em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD = NULL; struct em_txbuffer *tx_buffer; int cur, hdr_len; u32 cmd = 0; u16 offload = 0; u8 ipcso, ipcss, tucso, tucss; ipcss = ipcso = tucss = tucso = 0; hdr_len = ip_off + (ip->ip_hl << 2); cur = txr->next_avail_desc; /* Setup of IP header checksum. */ if (mp->m_pkthdr.csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; offload |= CSUM_IP; ipcss = ip_off; ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->lower_setup.ip_fields.ipcss = ipcss; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = ipcso; cmd |= E1000_TXD_CMD_IP; } if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; offload |= CSUM_TCP; tucss = hdr_len; tucso = hdr_len + offsetof(struct tcphdr, th_sum); /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (adapter->num_queues < 2) { /* * Setting up new checksum offload context for every * frames takes a lot of processing time for hardware. * This also reduces performance a lot for small sized * frames so avoid it if driver can use previously * configured checksum offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; } /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; cmd |= E1000_TXD_CMD_TCP; } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; tucss = hdr_len; tucso = hdr_len + offsetof(struct udphdr, uh_sum); /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (adapter->num_queues < 2) { /* * Setting up new checksum offload context for every * frames takes a lot of processing time for hardware. * This also reduces performance a lot for small sized * frames so avoid it if driver can use previously * configured checksum offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; } /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = tucss; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } if (offload & CSUM_IP) { txr->last_hw_ipcss = ipcss; txr->last_hw_ipcso = ipcso; } TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &txr->tx_buffers[cur]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ static void em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD; struct em_txbuffer *tx_buffer; int cur, hdr_len; /* * In theory we can use the same TSO context if and only if * frame is the same type(IP/TCP) and the same MSS. However * checking whether a frame has the same IP/TCP structure is * hard thing so just ignore that and always restablish a * new TSO context. */ hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[cur]; TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = ip_off; TXD->lower_setup.ip_fields.ipcse = htole16(ip_off + (ip->ip_hl << 2) - 1); TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (mp->m_pkthdr.len - (hdr_len))); /* Total len */ tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; txr->tx_tso = TRUE; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void em_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; int first, last, done, processed; struct em_txbuffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; if_t ifp = adapter->ifp; EM_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return; #endif /* DEV_NETMAP */ /* No work, make sure hang detection is disabled */ if (txr->tx_avail == adapter->num_tx_desc) { txr->busy = EM_TX_IDLE; return; } processed = 0; first = txr->next_to_clean; tx_desc = &txr->tx_base[first]; tx_buffer = &txr->tx_buffers[first]; last = tx_buffer->next_eop; eop_desc = &txr->tx_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++txr->tx_avail; ++processed; if (tx_buffer->m_head) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &txr->tx_buffers[first]; tx_desc = &txr->tx_base[first]; } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* ** Hang detection: we know there's work outstanding ** or the entry return would have been taken, so no ** descriptor processed here indicates a potential hang. ** The local timer will examine this and do a reset if needed. */ if (processed == 0) { if (txr->busy != EM_TX_HUNG) ++txr->busy; } else /* At least one descriptor was cleaned */ txr->busy = EM_TX_BUSY; /* note this clears HUNG */ /* * If we have a minimum free, clear IFF_DRV_OACTIVE * to tell the stack that it is OK to send packets. * Notice that all writes of OACTIVE happen under the * TX lock which, with a single queue, guarantees * sanity. */ if (txr->tx_avail >= EM_MAX_SCATTER) { if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); } /* Disable hang detection if all clean */ if (txr->tx_avail == adapter->num_tx_desc) txr->busy = EM_TX_IDLE; } /********************************************************************* * * Refresh RX descriptor mbufs from system mbuf buffer pool. * **********************************************************************/ static void em_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; struct mbuf *m; bus_dma_segment_t segs; struct em_rxbuffer *rxbuf; int i, j, error, nsegs; bool cleaned = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head == NULL) { m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); /* ** If we have a temporary resource shortage ** that causes a failure, just abort refresh ** for now, we will return to this point when ** reinvoked from em_rxeof. */ if (m == NULL) goto update; } else m = rxbuf->m_head; m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; m->m_flags |= M_PKTHDR; m->m_data = m->m_ext.ext_buf; /* Use bus_dma machinery to setup the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, m, &segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(m); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = m; rxbuf->paddr = segs.ds_addr; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); em_setup_rxdesc(&rxr->rx_base[i], rxbuf); cleaned = TRUE; i = j; /* Next is precalulated for us */ rxr->next_to_refresh = i; /* Calculate next controlling index */ if (++j == adapter->num_rx_desc) j = 0; } update: /* ** Update the tail pointer only if, ** and as far as we have refreshed. */ if (cleaned) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int em_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct em_rxbuffer *rxbuf; int error; rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &rxr->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); goto fail; } rxbuf = rxr->rx_buffers; for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } } return (0); fail: em_free_receive_structures(adapter); return (error); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int em_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_rxbuffer *rxbuf; bus_dma_segment_t seg[1]; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_slot *slot; struct netmap_adapter *na = netmap_getna(adapter->ifp); #endif /* Clear the ring contents */ EM_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* ** Free current RX buffer structs and their mbufs */ for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; /* mark as freed */ } } /* Now replenish the mbufs */ for (int j = 0; j != adapter->num_rx_desc; ++j) { rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); + rxbuf->paddr = paddr; em_setup_rxdesc(&rxr->rx_base[j], rxbuf); continue; } #endif /* DEV_NETMAP */ rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } rxbuf->m_head->m_len = adapter->rx_mbuf_sz; rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, rxbuf->m_head, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; goto fail; } bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); rxbuf->paddr = seg[0].ds_addr; em_setup_rxdesc(&rxr->rx_base[j], rxbuf); } rxr->next_to_check = 0; rxr->next_to_refresh = 0; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); fail: EM_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int em_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int q; for (q = 0; q < adapter->num_queues; q++, rxr++) if (em_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ for (int i = 0; i < q; ++i) { rxr = &adapter->rx_rings[i]; for (int n = 0; n < adapter->num_rx_desc; n++) { struct em_rxbuffer *rxbuf; rxbuf = &rxr->rx_buffers[n]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } rxr->next_to_check = 0; rxr->next_to_refresh = 0; } return (ENOBUFS); } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void em_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { em_free_receive_buffers(rxr); /* Free the ring memory as well */ em_dma_free(adapter, &rxr->rxdma); EM_RX_LOCK_DESTROY(rxr); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void em_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_rxbuffer *rxbuf = NULL; INIT_DEBUGOUT("free_receive_buffers: begin"); if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->map != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); bus_dmamap_destroy(rxr->rxtag, rxbuf->map); } if (rxbuf->m_head != NULL) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; rxr->next_to_check = 0; rxr->next_to_refresh = 0; } if (rxr->rxtag != NULL) { bus_dma_tag_destroy(rxr->rxtag); rxr->rxtag = NULL; } return; } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* ** When using MSIX interrupts we need to throttle ** using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM) { #ifdef EM_MULTIQUEUE rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; #else rxcsum |= E1000_RXCSUM_TUOFL; #endif } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); #ifdef EM_MULTIQUEUE #define RSSKEYLEN 10 if (adapter->num_queues > 1) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % adapter->num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) { E1000_WRITE_REG(hw, E1000_RETA(i), reta); } E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } #endif /* ** XXX TEMPORARY WORKAROUND: on some systems with 82573 ** long latencies are observed, like Lenovo X60. This ** change eliminates the problem, but since having positive ** values in RDTR is a known source of problems on other ** platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rxdma.dma_paddr; u32 rdt = adapter->num_rx_desc - 1; /* default */ E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { struct netmap_adapter *na = netmap_getna(adapter->ifp); rdt -= nm_kr_rxspace(&na->rx_rings[i]); } #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; /* ensure we clear use DTYPE of 00 here */ rctl &= ~0x00000C00; /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * For polling we also now return the number of cleaned packets *********************************************************************/ static bool em_rxeof(struct rx_ring *rxr, int count, int *done) { struct adapter *adapter = rxr->adapter; if_t ifp = adapter->ifp; struct mbuf *mp, *sendmp; u32 status = 0; u16 len; int i, processed, rxdone = 0; bool eop; union e1000_rx_desc_extended *cur; EM_RX_LOCK(rxr); /* Sync the ring */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { EM_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check, processed = 0; count != 0;) { if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; cur = &rxr->rx_base[i]; status = le32toh(cur->wb.upper.status_error); mp = sendmp = NULL; if ((status & E1000_RXD_STAT_DD) == 0) break; len = le16toh(cur->wb.upper.length); eop = (status & E1000_RXD_STAT_EOP) != 0; if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || (rxr->discard == TRUE)) { adapter->dropped_pkts++; ++rxr->rx_discarded; if (!eop) /* Catch subsequent segs */ rxr->discard = TRUE; else rxr->discard = FALSE; em_rx_discard(rxr, i); goto next_desc; } bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); /* Assign correct length to the current fragment */ mp = rxr->rx_buffers[i].m_head; mp->m_len = len; /* Trigger for refresh */ rxr->rx_buffers[i].m_head = NULL; /* First segment? */ if (rxr->fmp == NULL) { mp->m_pkthdr.len = len; rxr->fmp = rxr->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; rxr->lmp->m_next = mp; rxr->lmp = mp; rxr->fmp->m_pkthdr.len += len; } if (eop) { --count; sendmp = rxr->fmp; if_setrcvif(sendmp, ifp); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); em_receive_checksum(status, sendmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->hw.mac.max_frame_size > (MCLBYTES - ETHER_ALIGN) && em_fixup_rx(rxr) != 0) goto skip; #endif if (status & E1000_RXD_STAT_VP) { if_setvtag(sendmp, le16toh(cur->wb.upper.vlan)); sendmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT skip: #endif rxr->fmp = rxr->lmp = NULL; } next_desc: /* Sync the ring */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Zero out the receive descriptors status. */ cur->wb.upper.status_error &= htole32(~0xFF); ++rxdone; /* cumulative for POLL */ ++processed; /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Send to the stack */ if (sendmp != NULL) { rxr->next_to_check = i; EM_RX_UNLOCK(rxr); if_input(ifp, sendmp); EM_RX_LOCK(rxr); i = rxr->next_to_check; } /* Only refresh mbufs every 8 descriptors */ if (processed == 8) { em_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remaining refresh work */ if (e1000_rx_unrefreshed(rxr)) em_refresh_mbufs(rxr, i); rxr->next_to_check = i; if (done != NULL) *done = rxdone; EM_RX_UNLOCK(rxr); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } static __inline void em_rx_discard(struct rx_ring *rxr, int i) { struct em_rxbuffer *rbuf; rbuf = &rxr->rx_buffers[i]; bus_dmamap_unload(rxr->rxtag, rbuf->map); /* Free any previous pieces */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** Free buffer and allow em_refresh_mbufs() ** to clean up and recharge buffer. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; } return; } #ifndef __NO_STRICT_ALIGNMENT /* * When jumbo frames are enabled we should realign entire payload on * architecures with strict alignment. This is serious design mistake of 8254x * as it nullifies DMA operations. 8254x just allows RX buffer size to be * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its * payload. On architecures without strict alignment restrictions 8254x still * performs unaligned memory access which would reduce the performance too. * To avoid copying over an entire frame to align, we allocate a new mbuf and * copy ethernet header to the new mbuf. The new mbuf is prepended into the * existing mbuf chain. * * Be aware, best performance of the 8254x is achived only when jumbo frame is * not used at all on architectures with strict alignment. */ static int em_fixup_rx(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct mbuf *m, *n; int error; error = 0; m = rxr->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n != NULL) { bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; rxr->fmp = n; } else { adapter->dropped_pkts++; m_freem(rxr->fmp); rxr->fmp = NULL; error = ENOMEM; } } return (error); } #endif static void em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) { rxd->read.buffer_addr = htole64(rxbuf->paddr); /* DD bits must be cleared */ rxd->wb.upper.status_error= 0; } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void em_receive_checksum(uint32_t status, struct mbuf *mp) { mp->m_pkthdr.csum_flags = 0; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) return; /* If the IP checksum exists and there is no IP Checksum error */ if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == E1000_RXD_STAT_IPCS) { mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); } /* TCP or UDP checksum */ if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == E1000_RXD_STAT_TCPCS) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } if (status & E1000_RXD_STAT_UDPCS) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } /* * This routine is run via an vlan * config EVENT */ static void em_register_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if ((void*)adapter != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void em_unregister_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if (adapter != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_enable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); ims_mask |= adapter->ims; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_disable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if (hw->mac.type == e1000_82574) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); return; } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* Falls thru */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* ** Determine type of Wakeup: note that wol ** is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if ((adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_pch2lan)) { if (em_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (if_getcapenable(ifp) & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* ** WOL in the newer chipset interfaces (pchlan) ** require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_led_func(void *arg, int onoff) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } EM_CORE_UNLOCK(adapter); } /* ** Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; EM_CORE_LOCK(adapter); regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); EM_CORE_UNLOCK(adapter); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } static void em_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); EM_CORE_LOCK(adapter); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *)arg1; em_print_debug_info(adapter); } return (error); } /* ** This routine is meant to be fluid, add whatever is ** needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); device_printf(dev, "Tx Queue Status = %d\n", txr->busy); device_printf(dev, "TX descriptors avail = %d\n", txr->tx_avail); device_printf(dev, "Tx Descriptors avail failure = %ld\n", txr->no_desc_avail); device_printf(dev, "RX Queue %d ------\n", i); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(i)), E1000_READ_REG(&adapter->hw, E1000_RDT(i))); device_printf(dev, "RX discarded packets = %ld\n", rxr->rx_discarded); device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); } } #ifdef EM_MULTIQUEUE /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSIX * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; device_t dev = adapter->dev; u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSIX vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } #endif #ifdef DDB DB_COMMAND(em_reset_dev, em_ddb_reset_dev) { devclass_t dc; int max_em; dc = devclass_find("em"); max_em = devclass_get_maxunit(dc); for (int index = 0; index < (max_em - 1); index++) { device_t dev; dev = devclass_get_device(dc, index); if (device_get_driver(dev) == &em_driver) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } } } DB_COMMAND(em_dump_queue, em_ddb_dump_queue) { devclass_t dc; int max_em; dc = devclass_find("em"); max_em = devclass_get_maxunit(dc); for (int index = 0; index < (max_em - 1); index++) { device_t dev; dev = devclass_get_device(dc, index); if (device_get_driver(dev) == &em_driver) em_print_debug_info(device_get_softc(dev)); } } #endif Index: user/alc/PQ_LAUNDRY/sys/dev/gpio/gpioled.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/gpio/gpioled.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/gpio/gpioled.c (revision 303642) @@ -1,249 +1,254 @@ /*- * Copyright (c) 2009 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #include #include #include "gpiobus_if.h" /* * Only one pin for led */ #define GPIOLED_PIN 0 #define GPIOLED_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define GPIOLED_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define GPIOLED_LOCK_INIT(_sc) mtx_init(&(_sc)->sc_mtx, \ device_get_nameunit((_sc)->sc_dev), "gpioled", MTX_DEF) #define GPIOLED_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx) struct gpioled_softc { device_t sc_dev; device_t sc_busdev; struct mtx sc_mtx; struct cdev *sc_leddev; + int sc_invert; }; static void gpioled_control(void *, int); static int gpioled_probe(device_t); static int gpioled_attach(device_t); static int gpioled_detach(device_t); static void gpioled_control(void *priv, int onoff) { struct gpioled_softc *sc; sc = (struct gpioled_softc *)priv; GPIOLED_LOCK(sc); if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN, GPIO_PIN_OUTPUT) == 0) { + if (sc->sc_invert) + onoff = !onoff; GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN, onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW); } GPIOLED_UNLOCK(sc); } #ifdef FDT static void gpioled_identify(driver_t *driver, device_t bus) { phandle_t child, leds, root; root = OF_finddevice("/"); if (root == 0) return; for (leds = OF_child(root); leds != 0; leds = OF_peer(leds)) { if (!fdt_is_compatible_strict(leds, "gpio-leds")) continue; /* Traverse the 'gpio-leds' node and add its children. */ for (child = OF_child(leds); child != 0; child = OF_peer(child)) { if (!OF_hasprop(child, "gpios")) continue; if (ofw_gpiobus_add_fdt_child(bus, driver->name, child) == NULL) continue; } } } #endif static int gpioled_probe(device_t dev) { #ifdef FDT int match; phandle_t node; char *compat; /* * We can match against our own node compatible string and also against * our parent node compatible string. The first is normally used to * describe leds on a gpiobus and the later when there is a common node * compatible with 'gpio-leds' which is used to concentrate all the * leds nodes on the dts. */ match = 0; if (ofw_bus_is_compatible(dev, "gpioled")) match = 1; if (match == 0) { if ((node = ofw_bus_get_node(dev)) == -1) return (ENXIO); if ((node = OF_parent(node)) == -1) return (ENXIO); if (OF_getprop_alloc(node, "compatible", 1, (void **)&compat) == -1) return (ENXIO); if (strcasecmp(compat, "gpio-leds") == 0) match = 1; OF_prop_free(compat); } if (match == 0) return (ENXIO); #endif device_set_desc(dev, "GPIO led"); return (BUS_PROBE_DEFAULT); } static int gpioled_attach(device_t dev) { struct gpioled_softc *sc; int state; #ifdef FDT phandle_t node; char *default_state; char *name; #else const char *name; #endif sc = device_get_softc(dev); sc->sc_dev = dev; sc->sc_busdev = device_get_parent(dev); GPIOLED_LOCK_INIT(sc); state = 0; #ifdef FDT if ((node = ofw_bus_get_node(dev)) == -1) return (ENXIO); if (OF_getprop_alloc(node, "default-state", sizeof(char), (void **)&default_state) != -1) { if (strcasecmp(default_state, "on") == 0) state = 1; else if (strcasecmp(default_state, "off") == 0) state = 0; else if (strcasecmp(default_state, "keep") == 0) state = -1; else { device_printf(dev, "unknown value for default-state in FDT\n"); } OF_prop_free(default_state); } name = NULL; if (OF_getprop_alloc(node, "label", 1, (void **)&name) == -1) OF_getprop_alloc(node, "name", 1, (void **)&name); #else if (resource_string_value(device_get_name(dev), device_get_unit(dev), "name", &name)) name = NULL; + resource_int_value(device_get_name(dev), + device_get_unit(dev), "invert", &sc->sc_invert); #endif sc->sc_leddev = led_create_state(gpioled_control, sc, name ? name : device_get_nameunit(dev), state); #ifdef FDT if (name != NULL) OF_prop_free(name); #endif return (0); } static int gpioled_detach(device_t dev) { struct gpioled_softc *sc; sc = device_get_softc(dev); if (sc->sc_leddev) { led_destroy(sc->sc_leddev); sc->sc_leddev = NULL; } GPIOLED_LOCK_DESTROY(sc); return (0); } static devclass_t gpioled_devclass; static device_method_t gpioled_methods[] = { /* Device interface */ #ifdef FDT DEVMETHOD(device_identify, gpioled_identify), #endif DEVMETHOD(device_probe, gpioled_probe), DEVMETHOD(device_attach, gpioled_attach), DEVMETHOD(device_detach, gpioled_detach), DEVMETHOD_END }; static driver_t gpioled_driver = { "gpioled", gpioled_methods, sizeof(struct gpioled_softc), }; DRIVER_MODULE(gpioled, gpiobus, gpioled_driver, gpioled_devclass, 0, 0); MODULE_DEPEND(gpioled, gpiobus, 1, 1, 1); Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/include/vmbus.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/hyperv/include/vmbus.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/include/vmbus.h (revision 303642) @@ -1,162 +1,159 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMBUS_H_ #define _VMBUS_H_ #include #include /* * VMBUS version is 32 bit, upper 16 bit for major_number and lower * 16 bit for minor_number. * * 0.13 -- Windows Server 2008 * 1.1 -- Windows 7 * 2.4 -- Windows 8 * 3.0 -- Windows 8.1 */ #define VMBUS_VERSION_WS2008 ((0 << 16) | (13)) #define VMBUS_VERSION_WIN7 ((1 << 16) | (1)) #define VMBUS_VERSION_WIN8 ((2 << 16) | (4)) #define VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) #define VMBUS_VERSION_MAJOR(ver) (((uint32_t)(ver)) >> 16) #define VMBUS_VERSION_MINOR(ver) (((uint32_t)(ver)) & 0xffff) /* * GPA stuffs. */ struct vmbus_gpa_range { uint32_t gpa_len; uint32_t gpa_ofs; uint64_t gpa_page[0]; } __packed; /* This is actually vmbus_gpa_range.gpa_page[1] */ struct vmbus_gpa { uint32_t gpa_len; uint32_t gpa_ofs; uint64_t gpa_page; } __packed; #define VMBUS_CHANPKT_SIZE_SHIFT 3 #define VMBUS_CHANPKT_GETLEN(pktlen) \ (((int)(pktlen)) << VMBUS_CHANPKT_SIZE_SHIFT) struct vmbus_chanpkt_hdr { uint16_t cph_type; /* VMBUS_CHANPKT_TYPE_ */ uint16_t cph_hlen; /* header len, in 8 bytes */ uint16_t cph_tlen; /* total len, in 8 bytes */ uint16_t cph_flags; /* VMBUS_CHANPKT_FLAG_ */ uint64_t cph_xactid; } __packed; #define VMBUS_CHANPKT_TYPE_INBAND 0x0006 #define VMBUS_CHANPKT_TYPE_RXBUF 0x0007 #define VMBUS_CHANPKT_TYPE_GPA 0x0009 #define VMBUS_CHANPKT_TYPE_COMP 0x000b #define VMBUS_CHANPKT_FLAG_RC 0x0001 /* report completion */ #define VMBUS_CHANPKT_CONST_DATA(pkt) \ (const void *)((const uint8_t *)(pkt) + \ VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)) struct vmbus_rxbuf_desc { uint32_t rb_len; uint32_t rb_ofs; } __packed; struct vmbus_chanpkt_rxbuf { struct vmbus_chanpkt_hdr cp_hdr; uint16_t cp_rxbuf_id; uint16_t cp_rsvd; uint32_t cp_rxbuf_cnt; struct vmbus_rxbuf_desc cp_rxbuf[]; } __packed; -#define VMBUS_CHAN_SGLIST_MAX 32 -#define VMBUS_CHAN_PRPLIST_MAX 32 - struct vmbus_channel; struct hyperv_guid; typedef void (*vmbus_chan_callback_t)(struct vmbus_channel *, void *); static __inline struct vmbus_channel * vmbus_get_channel(device_t dev) { return device_get_ivars(dev); } int vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size, const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg); void vmbus_chan_close(struct vmbus_channel *chan); int vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr, int size, uint32_t *gpadl); int vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl); void vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu); void vmbus_chan_cpu_rr(struct vmbus_channel *chan); struct vmbus_channel * vmbus_chan_cpu2chan(struct vmbus_channel *chan, int cpu); void vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on); struct vmbus_channel ** vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt); void vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt); void vmbus_subchan_drain(struct vmbus_channel *pri_chan); int vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen, uint64_t *xactid); int vmbus_chan_recv_pkt(struct vmbus_channel *chan, struct vmbus_chanpkt_hdr *pkt, int *pktlen); int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags, void *data, int dlen, uint64_t xactid); int vmbus_chan_send_sglist(struct vmbus_channel *chan, struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid); int vmbus_chan_send_prplist(struct vmbus_channel *chan, struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen, uint64_t xactid); uint32_t vmbus_chan_id(const struct vmbus_channel *chan); uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan); bool vmbus_chan_is_primary(const struct vmbus_channel *chan); const struct hyperv_guid * vmbus_chan_guid_inst(const struct vmbus_channel *chan); #endif /* !_VMBUS_H_ */ Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_net_vsc.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 303642) @@ -1,1283 +1,1284 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * HyperV vmbus (virtual machine bus) network VSC (virtual services client) * header file * * (Updated from unencumbered NvspProtocol.h) */ #ifndef __HV_NET_VSC_H__ #define __HV_NET_VSC_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HN_USE_TXDESC_BUFRING MALLOC_DECLARE(M_NETVSC); #define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 #define NVSP_PROTOCOL_VERSION_2 0x30002 #define NVSP_PROTOCOL_VERSION_4 0x40000 #define NVSP_PROTOCOL_VERSION_5 0x50000 #define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1) #define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2) #define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2 #define VERSION_4_OFFLOAD_SIZE 22 #define NVSP_OPERATIONAL_STATUS_OK (0x00000000) #define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001) #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002) #define NVSP_OPERATIONAL_STATUS_NO_CONTACT (0x00000003) #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004) /* * Maximun number of transfer pages (packets) the VSP will use on a receive */ #define NVSP_MAX_PACKETS_PER_RECEIVE 375 /* vRSS stuff */ #define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 #define RNDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 #define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 #define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 struct rndis_obj_header { uint8_t type; uint8_t rev; uint16_t size; } __packed; /* rndis_recv_scale_cap/cap_flag */ #define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 #define RNDIS_RSS_CAPS_USING_MSI_X 0x08000000 #define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 #define RNDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 /* RNDIS_RECEIVE_SCALE_CAPABILITIES */ struct rndis_recv_scale_cap { struct rndis_obj_header hdr; uint32_t cap_flag; uint32_t num_int_msg; uint32_t num_recv_que; uint16_t num_indirect_tabent; } __packed; /* rndis_recv_scale_param flags */ #define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 #define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 #define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 #define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 #define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 /* Hash info bits */ #define RNDIS_HASH_FUNC_TOEPLITZ 0x00000001 #define RNDIS_HASH_IPV4 0x00000100 #define RNDIS_HASH_TCP_IPV4 0x00000200 #define RNDIS_HASH_IPV6 0x00000400 #define RNDIS_HASH_IPV6_EX 0x00000800 #define RNDIS_HASH_TCP_IPV6 0x00001000 #define RNDIS_HASH_TCP_IPV6_EX 0x00002000 #define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) #define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 #define ITAB_NUM 128 #define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 /* RNDIS_RECEIVE_SCALE_PARAMETERS */ typedef struct rndis_recv_scale_param_ { struct rndis_obj_header hdr; /* Qualifies the rest of the information */ uint16_t flag; /* The base CPU number to do receive processing. not used */ uint16_t base_cpu_number; /* This describes the hash function and type being enabled */ uint32_t hashinfo; /* The size of indirection table array */ uint16_t indirect_tabsize; /* The offset of the indirection table from the beginning of this * structure */ uint32_t indirect_taboffset; /* The size of the hash secret key */ uint16_t hashkey_size; /* The offset of the secret key from the beginning of this structure */ uint32_t hashkey_offset; uint32_t processor_masks_offset; uint32_t num_processor_masks; uint32_t processor_masks_entry_size; } rndis_recv_scale_param; typedef enum nvsp_msg_type_ { nvsp_msg_type_none = 0, /* * Init Messages */ nvsp_msg_type_init = 1, nvsp_msg_type_init_complete = 2, nvsp_version_msg_start = 100, /* * Version 1 Messages */ nvsp_msg_1_type_send_ndis_vers = nvsp_version_msg_start, nvsp_msg_1_type_send_rx_buf, nvsp_msg_1_type_send_rx_buf_complete, nvsp_msg_1_type_revoke_rx_buf, nvsp_msg_1_type_send_send_buf, nvsp_msg_1_type_send_send_buf_complete, nvsp_msg_1_type_revoke_send_buf, nvsp_msg_1_type_send_rndis_pkt, nvsp_msg_1_type_send_rndis_pkt_complete, /* * Version 2 Messages */ nvsp_msg_2_type_send_chimney_delegated_buf, nvsp_msg_2_type_send_chimney_delegated_buf_complete, nvsp_msg_2_type_revoke_chimney_delegated_buf, nvsp_msg_2_type_resume_chimney_rx_indication, nvsp_msg_2_type_terminate_chimney, nvsp_msg_2_type_terminate_chimney_complete, nvsp_msg_2_type_indicate_chimney_event, nvsp_msg_2_type_send_chimney_packet, nvsp_msg_2_type_send_chimney_packet_complete, nvsp_msg_2_type_post_chimney_rx_request, nvsp_msg_2_type_post_chimney_rx_request_complete, nvsp_msg_2_type_alloc_rx_buf, nvsp_msg_2_type_alloc_rx_buf_complete, nvsp_msg_2_type_free_rx_buf, nvsp_msg_2_send_vmq_rndis_pkt, nvsp_msg_2_send_vmq_rndis_pkt_complete, nvsp_msg_2_type_send_ndis_config, nvsp_msg_2_type_alloc_chimney_handle, nvsp_msg_2_type_alloc_chimney_handle_complete, nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete, /* * Version 4 Messages */ nvsp_msg4_type_send_vf_association, nvsp_msg4_type_switch_data_path, nvsp_msg4_type_uplink_connect_state_deprecated, nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated, /* * Version 5 Messages */ nvsp_msg5_type_oid_query_ex, nvsp_msg5_type_oid_query_ex_comp, nvsp_msg5_type_subchannel, nvsp_msg5_type_send_indirection_table, nvsp_msg5_max = nvsp_msg5_type_send_indirection_table, } nvsp_msg_type; typedef enum nvsp_status_ { nvsp_status_none = 0, nvsp_status_success, nvsp_status_failure, /* Deprecated */ nvsp_status_prot_vers_range_too_new, /* Deprecated */ nvsp_status_prot_vers_range_too_old, nvsp_status_invalid_rndis_pkt, nvsp_status_busy, nvsp_status_max, } nvsp_status; typedef struct nvsp_msg_hdr_ { uint32_t msg_type; } __packed nvsp_msg_hdr; /* * Init Messages */ /* * This message is used by the VSC to initialize the channel * after the channels has been opened. This message should * never include anything other then versioning (i.e. this * message will be the same for ever). * * Forever is a long time. The values have been redefined * in Win7 to indicate major and minor protocol version * number. */ typedef struct nvsp_msg_init_ { union { struct { uint16_t minor_protocol_version; uint16_t major_protocol_version; } s; /* Formerly min_protocol_version */ uint32_t protocol_version; } p1; /* Formerly max_protocol_version */ uint32_t protocol_version_2; } __packed nvsp_msg_init; /* * This message is used by the VSP to complete the initialization * of the channel. This message should never include anything other * then versioning (i.e. this message will be the same forever). */ typedef struct nvsp_msg_init_complete_ { /* Deprecated */ uint32_t negotiated_prot_vers; uint32_t max_mdl_chain_len; uint32_t status; } __packed nvsp_msg_init_complete; typedef union nvsp_msg_init_uber_ { nvsp_msg_init init; nvsp_msg_init_complete init_compl; } __packed nvsp_msg_init_uber; /* * Version 1 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_1_msg_send_ndis_version_ { uint32_t ndis_major_vers; /* Deprecated */ uint32_t ndis_minor_vers; } __packed nvsp_1_msg_send_ndis_version; /* * This message is used by the VSC to send a receive buffer * to the VSP. The VSP can then use the receive buffer to * send data to the VSC. */ typedef struct nvsp_1_msg_send_rx_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_rx_buf; typedef struct nvsp_1_rx_buf_section_ { uint32_t offset; uint32_t sub_allocation_size; uint32_t num_sub_allocations; uint32_t end_offset; } __packed nvsp_1_rx_buf_section; /* * This message is used by the VSP to acknowledge a receive * buffer send by the VSC. This message must be sent by the * VSP before the VSP uses the receive buffer. */ typedef struct nvsp_1_msg_send_rx_buf_complete_ { uint32_t status; uint32_t num_sections; /* * The receive buffer is split into two parts, a large * suballocation section and a small suballocation * section. These sections are then suballocated by a * certain size. * * For example, the following break up of the receive * buffer has 6 large suballocations and 10 small * suballocations. * * | Large Section | | Small Section | * ------------------------------------------------------------ * | | | | | | | | | | | | | | | | | | * | | * LargeOffset SmallOffset */ nvsp_1_rx_buf_section sections[1]; } __packed nvsp_1_msg_send_rx_buf_complete; /* * This message is sent by the VSC to revoke the receive buffer. * After the VSP completes this transaction, the VSP should never * use the receive buffer again. */ typedef struct nvsp_1_msg_revoke_rx_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_rx_buf; /* * This message is used by the VSC to send a send buffer * to the VSP. The VSC can then use the send buffer to * send data to the VSP. */ typedef struct nvsp_1_msg_send_send_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_send_buf; /* * This message is used by the VSP to acknowledge a send * buffer sent by the VSC. This message must be sent by the * VSP before the VSP uses the sent buffer. */ typedef struct nvsp_1_msg_send_send_buf_complete_ { uint32_t status; /* * The VSC gets to choose the size of the send buffer and * the VSP gets to choose the sections size of the buffer. * This was done to enable dynamic reconfigurations when * the cost of GPA-direct buffers decreases. */ uint32_t section_size; } __packed nvsp_1_msg_send_send_buf_complete; /* * This message is sent by the VSC to revoke the send buffer. * After the VSP completes this transaction, the vsp should never * use the send buffer again. */ typedef struct nvsp_1_msg_revoke_send_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_send_buf; /* * This message is used by both the VSP and the VSC to send * an RNDIS message to the opposite channel endpoint. */ typedef struct nvsp_1_msg_send_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. */ uint32_t chan_type; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFFFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint32_t send_buf_section_idx; uint32_t send_buf_section_size; } __packed nvsp_1_msg_send_rndis_pkt; /* * This message is used by both the VSP and the VSC to complete * a RNDIS message to the opposite channel endpoint. At this * point, the initiator of this message cannot use any resources * associated with the original RNDIS packet. */ typedef struct nvsp_1_msg_send_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_1_msg_send_rndis_pkt_complete; /* * Version 2 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_2_netvsc_capabilities_ { union { uint64_t as_uint64; struct { uint64_t vmq : 1; uint64_t chimney : 1; uint64_t sriov : 1; uint64_t ieee8021q : 1; uint64_t correlationid : 1; uint64_t teaming : 1; } u2; } u1; } __packed nvsp_2_netvsc_capabilities; typedef struct nvsp_2_msg_send_ndis_config_ { uint32_t mtu; uint32_t reserved; nvsp_2_netvsc_capabilities capabilities; } __packed nvsp_2_msg_send_ndis_config; /* * NvspMessage2TypeSendChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_send_chimney_buf_ { /* * On WIN7 beta, delegated_obj_max_size is defined as a uint32_t * Since WIN7 RC, it was split into two uint16_t. To have the same * struct layout, delegated_obj_max_size shall be the first field. */ uint16_t delegated_obj_max_size; /* * The revision # of chimney protocol used between NVSC and NVSP. * * This revision is NOT related to the chimney revision between * NDIS protocol and miniport drivers. */ uint16_t revision; uint32_t gpadl_handle; } __packed nvsp_2_msg_send_chimney_buf; /* Unsupported chimney revision 0 (only present in WIN7 beta) */ #define NVSP_CHIMNEY_REVISION_0 0 /* WIN7 Beta Chimney QFE */ #define NVSP_CHIMNEY_REVISION_1 1 /* The chimney revision since WIN7 RC */ #define NVSP_CHIMNEY_REVISION_2 2 /* * NvspMessage2TypeSendChimneyDelegatedBufferComplete */ typedef struct nvsp_2_msg_send_chimney_buf_complete_ { uint32_t status; /* * Maximum number outstanding sends and pre-posted receives. * * NVSC should not post more than SendQuota/ReceiveQuota packets. * Otherwise, it can block the non-chimney path for an indefinite * amount of time. * (since chimney sends/receives are affected by the remote peer). * * Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL * basis. It doesn't enforce the restriction separately for chimney * send/receive. If NVSC doesn't voluntarily enforce "SendQuota", * it may kill its own network connectivity. */ uint32_t send_quota; uint32_t rx_quota; } __packed nvsp_2_msg_send_chimney_buf_complete; /* * NvspMessage2TypeRevokeChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_revoke_chimney_buf_ { uint32_t gpadl_handle; } __packed nvsp_2_msg_revoke_chimney_buf; #define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR 0 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH4 1 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH6 2 #define NVSP_CHIMNEY_OBJECT_TYPE_TCP 3 /* * NvspMessage2TypeAllocateChimneyHandle */ typedef struct nvsp_2_msg_alloc_chimney_handle_ { uint64_t vsc_context; uint32_t object_type; } __packed nvsp_2_msg_alloc_chimney_handle; /* * NvspMessage2TypeAllocateChimneyHandleComplete */ typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ { uint32_t vsp_handle; } __packed nvsp_2_msg_alloc_chimney_handle_complete; /* * NvspMessage2TypeResumeChimneyRXIndication */ typedef struct nvsp_2_msg_resume_chimney_rx_indication { /* * Handle identifying the offloaded connection */ uint32_t vsp_tcp_handle; } __packed nvsp_2_msg_resume_chimney_rx_indication; #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE (0x01u) #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED (~(0x01u)) /* * NvspMessage2TypeTerminateChimney */ typedef struct nvsp_2_msg_terminate_chimney_ { /* * Handle identifying the offloaded object */ uint32_t vsp_handle; /* * Terminate Offload Flags * Bit 0: * When set to 0, terminate the offload at the destination NIC * Bit 1-31: Reserved, shall be zero */ uint32_t flags; union { /* * This field is valid only when bit 0 of flags is clear. * It specifies the index into the premapped delegated * object buffer. The buffer was sent through the * NvspMessage2TypeSendChimneyDelegatedBuffer * message at initialization time. * * NVSP will write the delegated state into the delegated * buffer upon upload completion. */ uint32_t index; /* * This field is valid only when bit 0 of flags is set. * * The seqence number of the most recently accepted RX * indication when VSC sets its TCP context into * "terminating" state. * * This allows NVSP to determines if there are any in-flight * RX indications for which the acceptance state is still * undefined. */ uint64_t last_accepted_rx_seq_no; } f0; } __packed nvsp_2_msg_terminate_chimney; #define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED 0x0000001u /* * NvspMessage2TypeTerminateChimneyComplete */ typedef struct nvsp_2_msg_terminate_chimney_complete_ { uint64_t vsc_context; uint32_t flags; } __packed nvsp_2_msg_terminate_chimney_complete; /* * NvspMessage2TypeIndicateChimneyEvent */ typedef struct nvsp_2_msg_indicate_chimney_event_ { /* * When VscTcpContext is 0, event_type is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event (defined in * NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_type; /* * When VscTcpContext is 0, EventType is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event specific information * (defined in NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_specific_info; /* * If not 0, the event is per-TCP connection event. This field * contains the VSC's TCP context. * If 0, the event indication is global. */ uint64_t vsc_tcp_context; } __packed nvsp_2_msg_indicate_chimney_event; #define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX 0xffffu #define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffffff /* * NvspMessage2TypeSendChimneyPacket */ typedef struct nvsp_2_msg_send_chimney_pkt_ { /* * Identify the TCP connection for which this chimney send is */ uint32_t vsp_tcp_handle; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint16_t send_buf_section_index; uint16_t send_buf_section_size; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint16_t oob_data_index; /* * DisconnectFlags = 0 * Normal chimney send. See MiniportTcpOffloadSend for details. * * DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01) * Graceful disconnect. See MiniportTcpOffloadDisconnect for details. * * DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02) * Abortive disconnect. See MiniportTcpOffloadDisconnect for details. */ uint16_t disconnect_flags; uint32_t seq_no; } __packed nvsp_2_msg_send_chimney_pkt; /* * NvspMessage2TypeSendChimneyPacketComplete */ typedef struct nvsp_2_msg_send_chimney_pkt_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by the peer). */ uint32_t bytes_transferred; } __packed nvsp_2_msg_send_chimney_pkt_complete; #define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH 0x0001u #define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX 0xffffu /* * NvspMessage2TypePostChimneyRecvRequest */ typedef struct nvsp_2_msg_post_chimney_rx_request_ { /* * Identify the TCP connection which this chimney receive request * is for. */ uint32_t vsp_tcp_handle; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint32_t oob_data_index; /* * Bit 0 * When it is set, this is a "no-push" receive. * When it is clear, this is a "push" receive. * * Bit 1-15: Reserved and shall be zero */ uint16_t flags; /* * For debugging and diagnoses purpose. * The SeqNo is per TCP connection and starts from 0. */ uint32_t seq_no; } __packed nvsp_2_msg_post_chimney_rx_request; /* * NvspMessage2TypePostChimneyRecvRequestComplete */ typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by * the peer). */ uint32_t bytes_xferred; } __packed nvsp_2_msg_post_chimney_rx_request_complete; /* * NvspMessage2TypeAllocateReceiveBuffer */ typedef struct nvsp_2_msg_alloc_rx_buf_ { /* * Allocation ID to match the allocation request and response */ uint32_t allocation_id; /* * Length of the VM shared memory receive buffer that needs to * be allocated */ uint32_t length; } __packed nvsp_2_msg_alloc_rx_buf; /* * NvspMessage2TypeAllocateReceiveBufferComplete */ typedef struct nvsp_2_msg_alloc_rx_buf_complete_ { /* * The NDIS_STATUS code for buffer allocation */ uint32_t status; /* * Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER */ uint32_t allocation_id; /* * GPADL handle for the allocated receive buffer */ uint32_t gpadl_handle; /* * Receive buffer ID that is further used in * NvspMessage2SendVmqRndisPacket */ uint64_t rx_buf_id; } __packed nvsp_2_msg_alloc_rx_buf_complete; /* * NvspMessage2TypeFreeReceiveBuffer */ typedef struct nvsp_2_msg_free_rx_buf_ { /* * Receive buffer ID previous returned in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t rx_buf_id; } __packed nvsp_2_msg_free_rx_buf; /* * This structure is used in defining the buffers in * NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure */ typedef struct nvsp_xfer_page_range_ { /* * Specifies the ID of the receive buffer that has the buffer. This * ID can be the general receive buffer ID specified in * NvspMessage1TypeSendReceiveBuffer or it can be the shared memory * receive buffer ID allocated by the VSC and specified in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t xfer_page_set_id; /* * Number of bytes */ uint32_t byte_count; /* * Offset in bytes from the beginning of the buffer */ uint32_t byte_offset; } __packed nvsp_xfer_page_range; /* * NvspMessage2SendVmqRndisPacket */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. It must be RMC_DATA */ uint32_t channel_type; /* * Only the Range element corresponding to the RNDIS header of * the first RNDIS message in the multiple RNDIS messages sent * in one NVSP message. Information about the data portions as well * as the subsequent RNDIS messages in the same NVSP message are * embedded in the RNDIS header itself */ nvsp_xfer_page_range range; } __packed nvsp_2_msg_send_vmq_rndis_pkt; /* * This message is used by the VSC to complete * a RNDIS VMQ message to the VSP. At this point, * the initiator of this message can use any resources * associated with the original RNDIS VMQ packet. */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete; /* * Version 5 messages */ enum nvsp_subchannel_operation { NVSP_SUBCHANNEL_NONE = 0, NVSP_SUBCHANNE_ALLOCATE, NVSP_SUBCHANNE_MAX }; typedef struct nvsp_5_subchannel_request_ { uint32_t op; uint32_t num_subchannels; } __packed nvsp_5_subchannel_request; typedef struct nvsp_5_subchannel_complete_ { uint32_t status; /* Actual number of subchannels allocated */ uint32_t num_subchannels; } __packed nvsp_5_subchannel_complete; typedef struct nvsp_5_send_indirect_table_ { /* The number of entries in the send indirection table */ uint32_t count; /* * The offset of the send indireciton table from top of * this struct. The send indirection table tells which channel * to put the send traffic on. Each entry is a channel number. */ uint32_t offset; } __packed nvsp_5_send_indirect_table; typedef union nvsp_1_msg_uber_ { nvsp_1_msg_send_ndis_version send_ndis_vers; nvsp_1_msg_send_rx_buf send_rx_buf; nvsp_1_msg_send_rx_buf_complete send_rx_buf_complete; nvsp_1_msg_revoke_rx_buf revoke_rx_buf; nvsp_1_msg_send_send_buf send_send_buf; nvsp_1_msg_send_send_buf_complete send_send_buf_complete; nvsp_1_msg_revoke_send_buf revoke_send_buf; nvsp_1_msg_send_rndis_pkt send_rndis_pkt; nvsp_1_msg_send_rndis_pkt_complete send_rndis_pkt_complete; } __packed nvsp_1_msg_uber; typedef union nvsp_2_msg_uber_ { nvsp_2_msg_send_ndis_config send_ndis_config; nvsp_2_msg_send_chimney_buf send_chimney_buf; nvsp_2_msg_send_chimney_buf_complete send_chimney_buf_complete; nvsp_2_msg_revoke_chimney_buf revoke_chimney_buf; nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication; nvsp_2_msg_terminate_chimney terminate_chimney; nvsp_2_msg_terminate_chimney_complete terminate_chimney_complete; nvsp_2_msg_indicate_chimney_event indicate_chimney_event; nvsp_2_msg_send_chimney_pkt send_chimney_packet; nvsp_2_msg_send_chimney_pkt_complete send_chimney_packet_complete; nvsp_2_msg_post_chimney_rx_request post_chimney_rx_request; nvsp_2_msg_post_chimney_rx_request_complete post_chimney_rx_request_complete; nvsp_2_msg_alloc_rx_buf alloc_rx_buffer; nvsp_2_msg_alloc_rx_buf_complete alloc_rx_buffer_complete; nvsp_2_msg_free_rx_buf free_rx_buffer; nvsp_2_msg_send_vmq_rndis_pkt send_vmq_rndis_pkt; nvsp_2_msg_send_vmq_rndis_pkt_complete send_vmq_rndis_pkt_complete; nvsp_2_msg_alloc_chimney_handle alloc_chimney_handle; nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete; } __packed nvsp_2_msg_uber; typedef union nvsp_5_msg_uber_ { nvsp_5_subchannel_request subchannel_request; nvsp_5_subchannel_complete subchn_complete; nvsp_5_send_indirect_table send_table; } __packed nvsp_5_msg_uber; typedef union nvsp_all_msgs_ { nvsp_msg_init_uber init_msgs; nvsp_1_msg_uber vers_1_msgs; nvsp_2_msg_uber vers_2_msgs; nvsp_5_msg_uber vers_5_msgs; } __packed nvsp_all_msgs; /* * ALL Messages */ typedef struct nvsp_msg_ { nvsp_msg_hdr hdr; nvsp_all_msgs msgs; } __packed nvsp_msg; /* * The following arguably belongs in a separate header file */ /* * Defines */ #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe #define NETVSC_RECEIVE_SG_COUNT 1 /* Preallocated receive packets */ #define NETVSC_RECEIVE_PACKETLIST_COUNT 256 /* * Maximum MTU we permit to be configured for a netvsc interface. * When the code was developed, a max MTU of 12232 was tested and * proven to work. 9K is a reasonable maximum for an Ethernet. */ #define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024) #define NETVSC_PACKET_SIZE PAGE_SIZE #define VRSS_SEND_TABLE_SIZE 16 /* * Data types */ /* * Per netvsc channel-specific */ typedef struct netvsc_dev_ { struct hn_softc *sc; /* Send buffer allocated by us but manages by NetVSP */ void *send_buf; uint32_t send_buf_size; uint32_t send_buf_gpadl_handle; uint32_t send_section_size; uint32_t send_section_count; unsigned long bitsmap_words; unsigned long *send_section_bitsmap; /* Receive buffer allocated by us but managed by NetVSP */ void *rx_buf; uint32_t rx_buf_size; uint32_t rx_buf_gpadl_handle; uint32_t rx_section_count; nvsp_1_rx_buf_section *rx_sections; /* Used for NetVSP initialization protocol */ struct sema channel_init_sema; nvsp_msg channel_init_packet; nvsp_msg revoke_packet; /*uint8_t hw_mac_addr[ETHER_ADDR_LEN];*/ /* Holds rndis device info */ void *extension; uint8_t destroy; /* Negotiated NVSP version */ uint32_t nvsp_version; uint32_t num_channel; struct hyperv_dma rxbuf_dma; struct hyperv_dma txbuf_dma; uint32_t vrss_send_table[VRSS_SEND_TABLE_SIZE]; } netvsc_dev; struct vmbus_channel; typedef void (*pfn_on_send_rx_completion)(struct vmbus_channel *, void *); #define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE) +#define NETVSC_PACKET_MAXPAGE 32 #define NETVSC_VLAN_PRIO_MASK 0xe000 #define NETVSC_VLAN_PRIO_SHIFT 13 #define NETVSC_VLAN_VID_MASK 0x0fff #define TYPE_IPV4 2 #define TYPE_IPV6 4 #define TYPE_TCP 2 #define TYPE_UDP 4 #define TRANSPORT_TYPE_NOT_IP 0 #define TRANSPORT_TYPE_IPV4_TCP ((TYPE_IPV4 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV4_UDP ((TYPE_IPV4 << 16) | TYPE_UDP) #define TRANSPORT_TYPE_IPV6_TCP ((TYPE_IPV6 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV6_UDP ((TYPE_IPV6 << 16) | TYPE_UDP) #ifdef __LP64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif typedef struct netvsc_packet_ { uint8_t is_data_pkt; /* One byte */ uint16_t vlan_tci; uint32_t status; /* Completion */ union { struct { uint64_t rx_completion_tid; void *rx_completion_context; /* This is no longer used */ pfn_on_send_rx_completion on_rx_completion; } rx; struct { uint64_t send_completion_tid; void *send_completion_context; /* Still used in netvsc and filter code */ pfn_on_send_rx_completion on_send_completion; } send; } compl; uint32_t send_buf_section_idx; uint32_t send_buf_section_size; void *rndis_mesg; uint32_t tot_data_buf_len; void *data; uint32_t gpa_cnt; - struct vmbus_gpa gpa[VMBUS_CHAN_SGLIST_MAX]; + struct vmbus_gpa gpa[NETVSC_PACKET_MAXPAGE]; } netvsc_packet; typedef struct { uint8_t mac_addr[6]; /* Assumption unsigned long */ uint8_t link_state; } netvsc_device_info; #ifndef HN_USE_TXDESC_BUFRING struct hn_txdesc; SLIST_HEAD(hn_txdesc_list, hn_txdesc); #else struct buf_ring; #endif struct hn_tx_ring; struct hn_rx_ring { struct ifnet *hn_ifp; struct hn_tx_ring *hn_txr; void *hn_rdbuf; int hn_rx_idx; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ struct lro_ctrl hn_lro; u_long hn_csum_ip; u_long hn_csum_tcp; u_long hn_csum_udp; u_long hn_csum_trusted; u_long hn_lro_tried; u_long hn_small_pkts; u_long hn_pkts; u_long hn_rss_pkts; /* Rarely used stuffs */ struct sysctl_oid *hn_rx_sysctl_tree; int hn_rx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TRUST_HCSUM_IP 0x0001 #define HN_TRUST_HCSUM_TCP 0x0002 #define HN_TRUST_HCSUM_UDP 0x0004 #define HN_RX_FLAG_ATTACHED 0x1 struct hn_tx_ring { #ifndef HN_USE_TXDESC_BUFRING struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; #else struct buf_ring *hn_txdesc_br; #endif int hn_txdesc_cnt; int hn_txdesc_avail; u_short hn_has_txeof; u_short hn_txdone_cnt; int hn_sched_tx; void (*hn_txeof)(struct hn_tx_ring *); struct taskqueue *hn_tx_taskq; struct task hn_tx_task; struct task hn_txeof_task; struct buf_ring *hn_mbuf_br; int hn_oactive; int hn_tx_idx; struct mtx hn_tx_lock; struct hn_softc *hn_sc; struct vmbus_channel *hn_chan; int hn_direct_tx_size; int hn_tx_chimney_size; bus_dma_tag_t hn_tx_data_dtag; uint64_t hn_csum_assist; u_long hn_no_txdescs; u_long hn_send_failed; u_long hn_txdma_failed; u_long hn_tx_collapsed; u_long hn_tx_chimney_tried; u_long hn_tx_chimney; u_long hn_pkts; /* Rarely used stuffs */ struct hn_txdesc *hn_txdesc; bus_dma_tag_t hn_tx_rndis_dtag; struct sysctl_oid *hn_tx_sysctl_tree; int hn_tx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TX_FLAG_ATTACHED 0x1 /* * Device-specific softc structure */ typedef struct hn_softc { struct ifnet *hn_ifp; struct ifmedia hn_media; device_t hn_dev; uint8_t hn_unit; int hn_carrier; int hn_if_flags; struct mtx hn_lock; int hn_initdone; /* See hv_netvsc_drv_freebsd.c for rules on how to use */ int temp_unusable; netvsc_dev *net_dev; struct vmbus_channel *hn_prichan; int hn_rx_ring_cnt; int hn_rx_ring_inuse; struct hn_rx_ring *hn_rx_ring; int hn_tx_ring_cnt; int hn_tx_ring_inuse; struct hn_tx_ring *hn_tx_ring; int hn_cpu; int hn_tx_chimney_max; struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; struct sysctl_oid *hn_rx_sysctl_tree; } hn_softc_t; /* * Externs */ extern int hv_promisc_mode; void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status); netvsc_dev *hv_nv_on_device_add(struct hn_softc *sc, void *additional_info, struct hn_rx_ring *rxr); int hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel); int hv_nv_on_send(struct vmbus_channel *chan, netvsc_packet *pkt); int hv_nv_get_next_send_section(netvsc_dev *net_dev); void hv_nv_subchan_attach(struct vmbus_channel *chan, struct hn_rx_ring *rxr); #endif /* __HV_NET_VSC_H__ */ Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 303642) @@ -1,3040 +1,3040 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" #include "vmbus_if.h" /* Short for Hyper-V network interface */ #define NETVSC_DEVNAME "hn" /* * It looks like offset 0 of buf is reserved to hold the softc pointer. * The sc pointer evidently not needed, and is not presently populated. * The packet offset is where the netvsc_packet starts in the buffer. */ #define HV_NV_SC_PTR_OFFSET_IN_BUF 0 #define HV_NV_PACKET_OFFSET_IN_BUF 16 /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 #define HN_LROENT_CNT_DEF 128 #define HN_RING_CNT_DEF_MAX 8 #define HN_RNDIS_MSG_LEN \ (sizeof(rndis_msg) + \ RNDIS_HASHVAL_PPI_SIZE + \ RNDIS_VLAN_PPI_SIZE + \ RNDIS_TSO_PPI_SIZE + \ RNDIS_CSUM_PPI_SIZE) #define HN_RNDIS_MSG_BOUNDARY PAGE_SIZE #define HN_RNDIS_MSG_ALIGN CACHE_LINE_SIZE #define HN_TX_DATA_BOUNDARY PAGE_SIZE #define HN_TX_DATA_MAXSIZE IP_MAXPACKET #define HN_TX_DATA_SEGSIZE PAGE_SIZE #define HN_TX_DATA_SEGCNT_MAX \ - (VMBUS_CHAN_SGLIST_MAX - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) + (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) #define HN_DIRECT_TX_SIZE_DEF 128 #define HN_EARLY_TXEOF_THRESH 8 struct hn_txdesc { #ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; #endif struct mbuf *m; struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ netvsc_packet netvsc_pkt; /* XXX to be removed */ bus_dmamap_t data_dmap; bus_addr_t rndis_msg_paddr; rndis_msg *rndis_msg; bus_dmamap_t rndis_msg_dmap; }; #define HN_TXD_FLAG_ONLIST 0x1 #define HN_TXD_FLAG_DMAMAP 0x2 /* * Only enable UDP checksum offloading when it is on 2012R2 or * later. UDP checksum offloading doesn't work on earlier * Windows releases. */ #define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP) #define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP) #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) #define HN_LRO_ACKCNT_DEF 1 /* * Be aware that this sleepable mutex will exhibit WITNESS errors when * certain TCP and ARP code paths are taken. This appears to be a * well-known condition, as all other drivers checked use a sleeping * mutex to protect their transmit paths. * Also Be aware that mutexes do not play well with semaphores, and there * is a conflicting semaphore in a certain channel code path. */ #define NV_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) #define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) #define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) #define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) #define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) /* * Globals */ int hv_promisc_mode = 0; /* normal mode by default */ SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V network interface"); /* Trust tcp segements verification on host side. */ static int hn_trust_hosttcp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, &hn_trust_hosttcp, 0, "Trust tcp segement verification on host side, " "when csum info is missing (global setting)"); /* Trust udp datagrams verification on host side. */ static int hn_trust_hostudp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, &hn_trust_hostudp, 0, "Trust udp datagram verification on host side, " "when csum info is missing (global setting)"); /* Trust ip packets verification on host side. */ static int hn_trust_hostip = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, &hn_trust_hostip, 0, "Trust ip packet verification on host side, " "when csum info is missing (global setting)"); #if __FreeBSD_version >= 1100045 /* Limit TSO burst size */ static int hn_tso_maxlen = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, &hn_tso_maxlen, 0, "TSO burst limit"); #endif /* Limit chimney send size */ static int hn_tx_chimney_size = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, &hn_tx_chimney_size, 0, "Chimney send packet size limit"); /* Limit the size of packet for direct transmission */ static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, &hn_lro_entry_count, 0, "LRO entry count"); #endif #endif static int hn_share_tx_taskq = 0; SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); static struct taskqueue *hn_tx_taskq; #ifndef HN_USE_TXDESC_BUFRING static int hn_use_txdesc_bufring = 0; #else static int hn_use_txdesc_bufring = 1; #endif SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); static int hn_bind_tx_taskq = -1; SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); static int hn_use_if_start = 0; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); static int hn_chan_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hn_chan_cnt, 0, "# of channels to use; each channel has one RX ring and one TX ring"); static int hn_tx_ring_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, &hn_tx_ring_cnt, 0, "# of TX rings to use"); static int hn_tx_swq_depth = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); #if __FreeBSD_version >= 1100095 static u_int hn_lro_mbufq_depth = 0; SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); #endif static u_int hn_cpu_index; /* * Forward declarations */ static void hn_stop(hn_softc_t *sc); static void hn_ifinit_locked(hn_softc_t *sc); static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int hn_start_locked(struct hn_tx_ring *txr, int len); static void hn_start(struct ifnet *ifp); static void hn_start_txeof(struct hn_tx_ring *); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_check_iplen(const struct mbuf *, int); static int hn_create_tx_ring(struct hn_softc *, int); static void hn_destroy_tx_ring(struct hn_tx_ring *); static int hn_create_tx_data(struct hn_softc *, int); static void hn_destroy_tx_data(struct hn_softc *); static void hn_start_taskfunc(void *, int); static void hn_start_txeof_taskfunc(void *, int); static void hn_stop_tx_tasks(struct hn_softc *); static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static void hn_create_rx_data(struct hn_softc *sc, int); static void hn_destroy_rx_data(struct hn_softc *sc); static void hn_set_tx_chimney_size(struct hn_softc *, int); static void hn_channel_attach(struct hn_softc *, struct vmbus_channel *); static void hn_subchan_attach(struct hn_softc *, struct vmbus_channel *); static void hn_subchan_setup(struct hn_softc *); static int hn_transmit(struct ifnet *, struct mbuf *); static void hn_xmit_qflush(struct ifnet *); static int hn_xmit(struct hn_tx_ring *, int); static void hn_xmit_txeof(struct hn_tx_ring *); static void hn_xmit_taskfunc(void *, int); static void hn_xmit_txeof_taskfunc(void *, int); #if __FreeBSD_version >= 1100099 static void hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) { int i; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; } #endif static int hn_get_txswq_depth(const struct hn_tx_ring *txr) { KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); if (hn_tx_swq_depth < txr->hn_txdesc_cnt) return txr->hn_txdesc_cnt; return hn_tx_swq_depth; } static int hn_ifmedia_upd(struct ifnet *ifp __unused) { return EOPNOTSUPP; } static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct hn_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->hn_carrier) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ static const struct hyperv_guid g_net_vsc_device_type = { .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} }; /* * Standard probe entry point. * */ static int netvsc_probe(device_t dev) { if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &g_net_vsc_device_type) == 0) { device_set_desc(dev, "Hyper-V Network Interface"); return BUS_PROBE_DEFAULT; } return ENXIO; } /* * Standard attach entry point. * * Called when the driver is loaded. It allocates needed resources, * and initializes the "hardware" and software. */ static int netvsc_attach(device_t dev) { netvsc_device_info device_info; hn_softc_t *sc; int unit = device_get_unit(dev); struct ifnet *ifp = NULL; int error, ring_cnt, tx_ring_cnt; #if __FreeBSD_version >= 1100045 int tso_maxlen; #endif sc = device_get_softc(dev); sc->hn_unit = unit; sc->hn_dev = dev; sc->hn_prichan = vmbus_get_channel(dev); if (hn_tx_taskq == NULL) { sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, PI_NET, &cpu_set, "%s tx", device_get_nameunit(dev)); } else { taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", device_get_nameunit(dev)); } } else { sc->hn_tx_taskq = hn_tx_taskq; } NV_LOCK_INIT(sc, "NetVSCLock"); ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Figure out the # of RX rings (ring_cnt) and the # of TX rings * to use (tx_ring_cnt). * * NOTE: * The # of RX rings to use is same as the # of channels to use. */ ring_cnt = hn_chan_cnt; if (ring_cnt <= 0) { /* Default */ ring_cnt = mp_ncpus; if (ring_cnt > HN_RING_CNT_DEF_MAX) ring_cnt = HN_RING_CNT_DEF_MAX; } else if (ring_cnt > mp_ncpus) { ring_cnt = mp_ncpus; } tx_ring_cnt = hn_tx_ring_cnt; if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) tx_ring_cnt = ring_cnt; if (hn_use_if_start) { /* ifnet.if_start only needs one TX ring. */ tx_ring_cnt = 1; } /* * Set the leader CPU for channels. */ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; error = hn_create_tx_data(sc, tx_ring_cnt); if (error) goto failed; hn_create_rx_data(sc, ring_cnt); /* * Associate the first TX/RX ring w/ the primary channel. */ hn_channel_attach(sc, sc->hn_prichan); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = hn_ioctl; ifp->if_init = hn_ifinit; /* needed by hv_rf_on_device_add() code */ ifp->if_mtu = ETHERMTU; if (hn_use_if_start) { int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); ifp->if_snd.ifq_drv_maxlen = qdepth - 1; IFQ_SET_READY(&ifp->if_snd); } else { ifp->if_transmit = hn_transmit; ifp->if_qflush = hn_xmit_qflush; } ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); /* XXX ifmedia_set really should do this for us */ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; /* * Tell upper layers that we support full VLAN capability. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; error = hv_rf_on_device_add(sc, &device_info, ring_cnt, &sc->hn_rx_ring[0]); if (error) goto failed; KASSERT(sc->net_dev->num_channel > 0 && sc->net_dev->num_channel <= sc->hn_rx_ring_inuse, ("invalid channel count %u, should be less than %d", sc->net_dev->num_channel, sc->hn_rx_ring_inuse)); /* * Set the # of TX/RX rings that could be used according to * the # of channels that host offered. */ if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel) sc->hn_tx_ring_inuse = sc->net_dev->num_channel; sc->hn_rx_ring_inuse = sc->net_dev->num_channel; device_printf(dev, "%d TX ring, %d RX ring\n", sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); if (sc->net_dev->num_channel > 1) hn_subchan_setup(sc); #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring_inuse > 1) { /* * Reduce TCP segment aggregation limit for multiple * RX rings to increase ACK timeliness. */ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); } #endif if (device_info.link_state == 0) { sc->hn_carrier = 1; } #if __FreeBSD_version >= 1100045 tso_maxlen = hn_tso_maxlen; if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET) tso_maxlen = IP_MAXPACKET; ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); #endif ether_ifattach(ifp, device_info.mac_addr); #if __FreeBSD_version >= 1100045 if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); #endif sc->hn_tx_chimney_max = sc->net_dev->send_section_size; hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_tx_chimney_max) hn_set_tx_chimney_size(sc, hn_tx_chimney_size); return (0); failed: hn_destroy_tx_data(sc); if (ifp != NULL) if_free(ifp); return (error); } /* * Standard detach entry point */ static int netvsc_detach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); if (bootverbose) printf("netvsc_detach\n"); /* * XXXKYS: Need to clean up all our * driver state; this is the driver * unloading. */ /* * XXXKYS: Need to stop outgoing traffic and unregister * the netdevice. */ hv_rf_on_device_remove(sc, HV_RF_NV_DESTROY_CHANNEL); hn_stop_tx_tasks(sc); ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); hn_destroy_tx_data(sc); if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); return (0); } /* * Standard shutdown entry point */ static int netvsc_shutdown(device_t dev) { return (0); } static __inline int hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); if (m_new == NULL) return ENOBUFS; else *m_head = m = m_new; txr->hn_tx_collapsed++; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } return error; } static __inline void hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd) { if (txd->flags & HN_TXD_FLAG_DMAMAP) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } } static __inline int hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, ("put an onlist txd %#x", txd->flags)); KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; hn_txdesc_dmamap_unload(txr, txd); if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; } txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); #else atomic_add_int(&txr->hn_txdesc_avail, 1); buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif return 1; } static __inline struct hn_txdesc * hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { KASSERT(txr->hn_txdesc_avail > 0, ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail--; SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); #else txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); #endif if (txd != NULL) { #ifdef HN_USE_TXDESC_BUFRING atomic_subtract_int(&txr->hn_txdesc_avail, 1); #endif KASSERT(txd->m == NULL && txd->refs == 0 && (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; txd->refs = 1; } return txd; } static __inline void hn_txdesc_hold(struct hn_txdesc *txd) { /* 0->1 transition will never work */ KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); atomic_add_int(&txd->refs, 1); } static __inline void hn_txeof(struct hn_tx_ring *txr) { txr->hn_has_txeof = 0; txr->hn_txeof(txr); } static void hn_tx_done(struct vmbus_channel *chan, void *xpkt) { netvsc_packet *packet = xpkt; struct hn_txdesc *txd; struct hn_tx_ring *txr; txd = (struct hn_txdesc *)(uintptr_t) packet->compl.send.send_completion_tid; txr = txd->txr; KASSERT(txr->hn_chan == chan, ("channel mismatch, on chan%u, should be chan%u", vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan))); txr->hn_has_txeof = 1; hn_txdesc_put(txr, txd); ++txr->hn_txdone_cnt; if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { txr->hn_txdone_cnt = 0; if (txr->hn_oactive) hn_txeof(txr); } } void netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) { #if defined(INET) || defined(INET6) tcp_lro_flush_all(&rxr->hn_lro); #endif /* * NOTE: * 'txr' could be NULL, if multiple channels and * ifnet.if_start method are enabled. */ if (txr == NULL || !txr->hn_has_txeof) return; txr->hn_txdone_cnt = 0; hn_txeof(txr); } /* * NOTE: * If this function fails, then both txd and m_head0 will be freed. */ static int hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; netvsc_packet *packet; rndis_msg *rndis_mesg; rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; struct rndis_hash_value *hash_value; uint32_t rndis_msg_size; packet = &txd->netvsc_pkt; packet->is_data_pkt = TRUE; packet->tot_data_buf_len = m_head->m_pkthdr.len; /* * extension points to the area reserved for the * rndis_filter_packet, which is placed just after * the netvsc_packet (and rppi struct, if present; * length is updated later). */ rndis_mesg = txd->rndis_msg; /* XXX not necessary */ memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN); rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset = sizeof(rndis_packet); rndis_pkt->data_length = packet->tot_data_buf_len; rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); /* * Set the hash value for this packet, so that the host could * dispatch the TX done event for this packet back to this TX * ring's channel. */ rndis_msg_size += RNDIS_HASHVAL_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASHVAL_PPI_SIZE, nbl_hash_value); hash_value = (struct rndis_hash_value *)((uint8_t *)rppi + rppi->per_packet_info_offset); hash_value->hash_value = txr->hn_tx_idx; if (m_head->m_flags & M_VLANTAG) { ndis_8021q_info *rppi_vlan_info; rndis_msg_size += RNDIS_VLAN_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, ieee_8021q_info); rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); rppi_vlan_info->u1.s1.vlan_id = m_head->m_pkthdr.ether_vtag & 0xfff; } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { rndis_tcp_tso_info *tso_info; struct ether_vlan_header *eh; int ether_len; /* * XXX need m_pullup and use mtodo */ eh = mtod(m_head, struct ether_vlan_header*); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ether_len = ETHER_HDR_LEN; rndis_msg_size += RNDIS_TSO_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, tcp_large_send_info); tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); tso_info->lso_v2_xmit.type = RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { struct ip *ip = (struct ip *)(m_head->m_data + ether_len); unsigned long iph_len = ip->ip_hl << 2; struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iph_len); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip->ip_len = 0; ip->ip_sum = 0; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { struct ip6_hdr *ip6 = (struct ip6_hdr *) (m_head->m_data + ether_len); struct tcphdr *th = (struct tcphdr *)(ip6 + 1); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ip6->ip6_plen = 0; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); } #endif tso_info->lso_v2_xmit.tcp_header_offset = 0; tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { rndis_tcp_ip_csum_info *csum_info; rndis_msg_size += RNDIS_CSUM_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, tcpip_chksum_info); csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); csum_info->xmit.is_ipv4 = 1; if (m_head->m_pkthdr.csum_flags & CSUM_IP) csum_info->xmit.ip_header_csum = 1; if (m_head->m_pkthdr.csum_flags & CSUM_TCP) { csum_info->xmit.tcp_csum = 1; csum_info->xmit.tcp_header_offset = 0; } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { csum_info->xmit.udp_csum = 1; } } rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; packet->tot_data_buf_len = rndis_mesg->msg_len; /* * Chimney send, if the packet could fit into one chimney buffer. */ if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) { netvsc_dev *net_dev = txr->hn_sc->net_dev; uint32_t send_buf_section_idx; txr->hn_tx_chimney_tried++; send_buf_section_idx = hv_nv_get_next_send_section(net_dev); if (send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { uint8_t *dest = ((uint8_t *)net_dev->send_buf + (send_buf_section_idx * net_dev->send_section_size)); memcpy(dest, rndis_mesg, rndis_msg_size); dest += rndis_msg_size; m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); packet->send_buf_section_idx = send_buf_section_idx; packet->send_buf_section_size = packet->tot_data_buf_len; packet->gpa_cnt = 0; txr->hn_tx_chimney++; goto done; } } error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (error) { int freed; /* * This mbuf is not linked w/ the txd yet, so free it now. */ m_freem(m_head); *m_head0 = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); txr->hn_txdma_failed++; if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; packet->gpa_cnt = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS; /* send packet with page buffer */ packet->gpa[0].gpa_page = atop(txd->rndis_msg_paddr); packet->gpa[0].gpa_ofs = txd->rndis_msg_paddr & PAGE_MASK; packet->gpa[0].gpa_len = rndis_msg_size; /* * Fill the page buffers with mbuf info starting at index * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. */ for (i = 0; i < nsegs; ++i) { struct vmbus_gpa *gpa = &packet->gpa[ i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS]; gpa->gpa_page = atop(segs[i].ds_addr); gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; gpa->gpa_len = segs[i].ds_len; } packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; packet->send_buf_section_size = 0; done: txd->m = m_head; /* Set the completion routine */ packet->compl.send.on_send_completion = hn_tx_done; packet->compl.send.send_completion_context = packet; packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd; return 0; } /* * NOTE: * If this function fails, then txd will be freed, but the mbuf * associated w/ the txd will _not_ be freed. */ static int hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) { int error, send_failed = 0; again: /* * Make sure that txd is not freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt); if (!error) { ETHER_BPF_MTAP(ifp, txd->m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (!hn_use_if_start) { if_inc_counter(ifp, IFCOUNTER_OBYTES, txd->m->m_pkthdr.len); if (txd->m->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } txr->hn_pkts++; } hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; /* * This should "really rarely" happen. * * XXX Too many RX to be acked or too many sideband * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ txr->hn_has_txeof = 1; if (!send_failed) { txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_has_txeof; * in case that we missed the last * netvsc_channel_rollup(). */ goto again; } if_printf(ifp, "send failed\n"); /* * Caller will perform further processing on the * associated mbuf, so don't free it in hn_txdesc_put(); * only unload it from the DMA map in hn_txdesc_put(), * if it was loaded. */ txd->m = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); txr->hn_send_failed++; } return error; } /* * Start a transmit of one or more packets */ static int hn_start_locked(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(hn_use_if_start, ("hn_start_locked is called, when if_start is disabled")); KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); mtx_assert(&txr->hn_tx_lock, MA_OWNED); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { struct hn_txdesc *txd; struct mbuf *m_head; int error; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } return 0; } /* * Link up/down notification */ void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status) { if (status == 1) { sc->hn_carrier = 1; } else { sc->hn_carrier = 0; } } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. * There should be an equivalent in the kernel mbuf code, * but there does not appear to be one yet. * * Differs from m_append() in that additional mbufs are * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * * Return 1 if able to complete the job; otherwise 0. */ static int hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space; remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) break; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } #if defined(INET) || defined(INET6) static __inline int hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) { #if __FreeBSD_version >= 1100095 if (hn_lro_mbufq_depth) { tcp_lro_queue_mbuf(lc, m); return 0; } #endif return tcp_lro_rx(lc, m, 0); } #endif /* * Called when we receive a data packet from the "wire" on the * specified device * * Note: This is no longer used as a callback */ int netvsc_recv(struct hn_rx_ring *rxr, netvsc_packet *packet, const rndis_tcp_ip_csum_info *csum_info, const struct rndis_hash_info *hash_info, const struct rndis_hash_value *hash_value) { struct ifnet *ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1; int hash_type = M_HASHTYPE_OPAQUE_HASH; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (0); /* * Bail out if packet contains more data than configured MTU. */ if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { return (0); } else if (packet->tot_data_buf_len <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } memcpy(mtod(m_new, void *), packet->data, packet->tot_data_buf_len); m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; rxr->hn_small_pkts++; } else { /* * Get an mbuf with a cluster. For packets 2K or less, * get a standard 2K cluster. For anything larger, get a * 4K cluster. Any buffers larger than 4K can cause problems * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; if (packet->tot_data_buf_len > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } hv_m_append(m_new, packet->tot_data_buf_len, packet->data); } m_new->m_pkthdr.rcvif = ifp; if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ if (csum_info != NULL) { /* IP csum offload */ if (csum_info->receive.ip_csum_succeeded && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ if ((csum_info->receive.tcp_csum_succeeded || csum_info->receive.udp_csum_succeeded) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; if (csum_info->receive.tcp_csum_succeeded) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; } if (csum_info->receive.ip_csum_succeeded && csum_info->receive.tcp_csum_succeeded) do_lro = 1; } else { const struct ether_header *eh; uint16_t etype; int hoff; hoff = sizeof(*eh); if (m_new->m_len < hoff) goto skip; eh = mtod(m_new, struct ether_header *); etype = ntohs(eh->ether_type); if (etype == ETHERTYPE_VLAN) { const struct ether_vlan_header *evl; hoff = sizeof(*evl); if (m_new->m_len < hoff) goto skip; evl = mtod(m_new, struct ether_vlan_header *); etype = ntohs(evl->evl_proto); } if (etype == ETHERTYPE_IP) { int pr; pr = hn_check_iplen(m_new, hoff); if (pr == IPPROTO_TCP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_TCP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } do_lro = 1; } else if (pr == IPPROTO_UDP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_UDP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } } else if (pr != IPPROTO_DONE && do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); } } } skip: if ((packet->vlan_tci != 0) && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m_new->m_pkthdr.ether_vtag = packet->vlan_tci; m_new->m_flags |= M_VLANTAG; } if (hash_info != NULL && hash_value != NULL) { rxr->hn_rss_pkts++; m_new->m_pkthdr.flowid = hash_value->hash_value; if ((hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) == NDIS_HASH_FUNCTION_TOEPLITZ) { uint32_t type = (hash_info->hash_info & NDIS_HASH_TYPE_MASK); switch (type) { case NDIS_HASH_IPV4: hash_type = M_HASHTYPE_RSS_IPV4; break; case NDIS_HASH_TCP_IPV4: hash_type = M_HASHTYPE_RSS_TCP_IPV4; break; case NDIS_HASH_IPV6: hash_type = M_HASHTYPE_RSS_IPV6; break; case NDIS_HASH_IPV6_EX: hash_type = M_HASHTYPE_RSS_IPV6_EX; break; case NDIS_HASH_TCP_IPV6: hash_type = M_HASHTYPE_RSS_TCP_IPV6; break; case NDIS_HASH_TCP_IPV6_EX: hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; break; } } } else { if (hash_value != NULL) { m_new->m_pkthdr.flowid = hash_value->hash_value; } else { m_new->m_pkthdr.flowid = rxr->hn_rx_idx; hash_type = M_HASHTYPE_OPAQUE; } } M_HASHTYPE_SET(m_new, hash_type); /* * Note: Moved RX completion back to hv_nv_on_receive() so all * messages (not just data messages) will trigger a response. */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rxr->hn_pkts++; if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxr->hn_lro; if (lro->lro_cnt) { rxr->hn_lro_tried++; if (hn_lro_rx(lro, m_new) == 0) { /* DONE! */ return 0; } } #endif } /* We're not holding the lock here, so don't release it */ (*ifp->if_input)(ifp, m_new); return (0); } /* * Rules for using sc->temp_unusable: * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() * 2. code reading sc->temp_unusable under NV_LOCK(), and finding * sc->temp_unusable set, must release NV_LOCK() and exit * 3. to retain exclusive control of the interface, * sc->temp_unusable must be set by code before releasing NV_LOCK() * 4. only code setting sc->temp_unusable can clear sc->temp_unusable * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable */ /* * Standard ioctl entry point. Called when the user wants to configure * the interface. */ static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { hn_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif netvsc_device_info device_info; int mask, error = 0; int retry_cnt = 500; switch(cmd) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) hn_ifinit(sc); arp_ifinit(ifp, ifa); } else #endif error = ether_ioctl(ifp, cmd, data); break; case SIOCSIFMTU: /* Check MTU value change */ if (ifp->if_mtu == ifr->ifr_mtu) break; if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { error = EINVAL; break; } /* Obtain and record requested MTU */ ifp->if_mtu = ifr->ifr_mtu; #if __FreeBSD_version >= 1100099 /* * Make sure that LRO aggregation length limit is still * valid, after the MTU change. */ NV_LOCK(sc); if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); NV_UNLOCK(sc); #endif do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } /* We must remove and add back the device to cause the new * MTU to take effect. This includes tearing down, but not * deleting the channel, then bringing it back up. */ error = hv_rf_on_device_remove(sc, HV_RF_NV_RETAIN_CHANNEL); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } /* Wait for subchannels to be destroyed */ vmbus_subchan_drain(sc->hn_prichan); error = hv_rf_on_device_add(sc, &device_info, sc->hn_rx_ring_inuse, &sc->hn_rx_ring[0]); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } KASSERT(sc->hn_rx_ring_cnt == sc->net_dev->num_channel, ("RX ring count %d and channel count %u mismatch", sc->hn_rx_ring_cnt, sc->net_dev->num_channel)); if (sc->net_dev->num_channel > 1) { int r; /* * Skip the rings on primary channel; they are * handled by the hv_rf_on_device_add() above. */ for (r = 1; r < sc->hn_rx_ring_cnt; ++r) { sc->hn_rx_ring[r].hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; } for (r = 1; r < sc->hn_tx_ring_cnt; ++r) { sc->hn_tx_ring[r].hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; } hn_subchan_setup(sc); } sc->hn_tx_chimney_max = sc->net_dev->send_section_size; if (sc->hn_tx_ring[0].hn_tx_chimney_size > sc->hn_tx_chimney_max) hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; case SIOCSIFFLAGS: do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ #ifdef notyet /* Fixme: Promiscuous mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->hn_if_flags & IFF_PROMISC)) { /* do something here for Hyper-V */ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->hn_if_flags & IFF_PROMISC) { /* do something here for Hyper-V */ } else #endif hn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { hn_stop(sc); } } NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); sc->hn_if_flags = ifp->if_flags; error = 0; break; case SIOCSIFCAP: NV_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= sc->hn_tx_ring[0].hn_csum_assist; } else { ifp->if_hwassist &= ~sc->hn_tx_ring[0].hn_csum_assist; } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; else ifp->if_hwassist &= ~CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; else ifp->if_hwassist &= ~CSUM_IP6_TSO; } NV_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef notyet /* Fixme: Multicast mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { NV_LOCK(sc); netvsc_setmulti(sc); NV_UNLOCK(sc); error = 0; } #endif error = EINVAL; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * */ static void hn_stop(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (bootverbose) printf(" Closing Device ...\n"); atomic_clear_int(&ifp->if_drv_flags, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; if_link_state_change(ifp, LINK_STATE_DOWN); sc->hn_initdone = 0; ret = hv_rf_on_close(sc); } /* * FreeBSD transmit entry point */ static void hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } static void hn_start_txeof(struct hn_tx_ring *txr) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } /* * */ static void hn_ifinit_locked(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { return; } hv_promisc_mode = 1; ret = hv_rf_on_open(sc); if (ret != 0) { return; } else { sc->hn_initdone = 1; } atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); if_link_state_change(ifp, LINK_STATE_UP); } /* * */ static void hn_ifinit(void *xsc) { hn_softc_t *sc = xsc; NV_LOCK(sc); if (sc->temp_unusable) { NV_UNLOCK(sc); return; } sc->temp_unusable = TRUE; NV_UNLOCK(sc); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); } #ifdef LATER /* * */ static void hn_watchdog(struct ifnet *ifp) { hn_softc_t *sc; sc = ifp->if_softc; printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); hn_ifinit(sc); /*???*/ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } #endif #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; unsigned int lenlim; int error; lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || lenlim > TCP_LRO_LENGTH_MAX) return EINVAL; NV_LOCK(sc); hn_set_lro_lenlim(sc, lenlim); NV_UNLOCK(sc); return 0; } static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ackcnt, error, i; /* * lro_ackcnt_lim is append count limit, * +1 to turn it into aggregation limit. */ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; error = sysctl_handle_int(oidp, &ackcnt, 0, req); if (error || req->newptr == NULL) return error; if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) return EINVAL; /* * Convert aggregation limit back to append * count limit. */ --ackcnt; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; NV_UNLOCK(sc); return 0; } #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int hcsum = arg2; int on, error, i; on = 0; if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) on = 1; error = sysctl_handle_int(oidp, &on, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (on) rxr->hn_trust_hcsum |= hcsum; else rxr->hn_trust_hcsum &= ~hcsum; } NV_UNLOCK(sc); return 0; } static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int chimney_size, error; chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size; error = sysctl_handle_int(oidp, &chimney_size, 0, req); if (error || req->newptr == NULL) return error; if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0) return EINVAL; hn_set_tx_chimney_size(sc, chimney_size); return 0; } static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; u_long stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((u_long *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((u_long *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((uint64_t *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_tx_ring *txr; u_long stat; stat = 0; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; stat += *((u_long *)((uint8_t *)txr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((u_long *)((uint8_t *)txr + ofs)) = 0; } return 0; } static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error, conf; struct hn_tx_ring *txr; txr = &sc->hn_tx_ring[0]; conf = *((int *)((uint8_t *)txr + ofs)); error = sysctl_handle_int(oidp, &conf, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((int *)((uint8_t *)txr + ofs)) = conf; } NV_UNLOCK(sc); return 0; } static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; int len, iphlen, iplen; const struct tcphdr *th; int thoff; /* TCP data offset */ len = hoff + sizeof(struct ip); /* The packet must be at least the size of an IP header. */ if (m->m_pkthdr.len < len) return IPPROTO_DONE; /* The fixed IP header must reside completely in the first mbuf. */ if (m->m_len < len) return IPPROTO_DONE; ip = mtodo(m, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; if (iphlen < sizeof(struct ip)) /* minimum header length */ return IPPROTO_DONE; /* The full IP header must reside completely in the one mbuf. */ if (m->m_len < hoff + iphlen) return IPPROTO_DONE; iplen = ntohs(ip->ip_len); /* * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ if (m->m_pkthdr.len < hoff + iplen) return IPPROTO_DONE; /* * Ignore IP fragments. */ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) return IPPROTO_DONE; /* * The TCP/IP or UDP/IP header must be entirely contained within * the first fragment of a packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (iplen < iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); thoff = th->th_off << 2; if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + thoff) return IPPROTO_DONE; break; case IPPROTO_UDP: if (iplen < iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; break; default: if (iplen < iphlen) return IPPROTO_DONE; break; } return ip->ip_p; } static void hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev = sc->hn_dev; #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 int lroent_cnt; #endif #endif int i; sc->hn_rx_ring_cnt = ring_cnt; sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 lroent_cnt = hn_lro_entry_count; if (lroent_cnt < TCP_LRO_ENTRIES) lroent_cnt = TCP_LRO_ENTRIES; device_printf(dev, "LRO: entry count %d\n", lroent_cnt); #endif #endif /* INET || INET6 */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Create dev.hn.UNIT.rx sysctl tree */ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (hn_trust_hosttcp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; if (hn_trust_hostudp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; if (hn_trust_hostip) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; rxr->hn_ifp = sc->hn_ifp; if (i < sc->hn_tx_ring_cnt) rxr->hn_txr = &sc->hn_tx_ring[i]; rxr->hn_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK); rxr->hn_rx_idx = i; /* * Initialize LRO. */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, hn_lro_mbufq_depth); #else tcp_lro_init(&rxr->hn_lro); rxr->hn_lro.ifp = sc->hn_ifp; #endif #if __FreeBSD_version >= 1100099 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif #endif /* INET || INET6 */ if (sc->hn_rx_sysctl_tree != NULL) { char name[16]; /* * Create per RX ring sysctl tree: * dev.hn.UNIT.rx.RINGID */ snprintf(name, sizeof(name), "%d", i); rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (rxr->hn_rx_sysctl_tree != NULL) { SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "packets", CTLFLAG_RW, &rxr->hn_pkts, "# of packets received"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "rss_pkts", CTLFLAG_RW, &rxr->hn_rss_pkts, "# of packets w/ RSS info received"); } } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_queued), hn_rx_stat_u64_sysctl, "LU", "LRO queued"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), hn_rx_stat_u64_sysctl, "LU", "LRO flushed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro_tried), hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); #if __FreeBSD_version >= 1100099 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_lenlim_sysctl, "IU", "Max # of data bytes to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_ackcnt_sysctl, "I", "Max # of ACKs to be aggregated by LRO"); #endif SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", "Trust tcp segement verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, hn_trust_hcsum_sysctl, "I", "Trust udp datagram verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, hn_trust_hcsum_sysctl, "I", "Trust ip packet verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_ip), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_tcp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_udp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_trusted), hn_rx_stat_ulong_sysctl, "LU", "# of packets that we trust host's csum verification"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_small_pkts), hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); } static void hn_destroy_rx_data(struct hn_softc *sc) { int i; if (sc->hn_rx_ring_cnt == 0) return; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; #if defined(INET) || defined(INET6) tcp_lro_free(&rxr->hn_lro); #endif free(rxr->hn_rdbuf, M_NETVSC); } free(sc->hn_rx_ring, M_NETVSC); sc->hn_rx_ring = NULL; sc->hn_rx_ring_cnt = 0; sc->hn_rx_ring_inuse = 0; } static int hn_create_tx_ring(struct hn_softc *sc, int id) { struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; device_t dev = sc->hn_dev; bus_dma_tag_t parent_dtag; int error, i; uint32_t version; txr->hn_sc = sc; txr->hn_tx_idx = id; #ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); #endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK | M_ZERO); #ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); #else txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); #endif txr->hn_tx_taskq = sc->hn_tx_taskq; if (hn_use_if_start) { txr->hn_txeof = hn_start_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); } else { int br_depth; txr->hn_txeof = hn_xmit_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); br_depth = hn_get_txswq_depth(txr); txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); } txr->hn_direct_tx_size = hn_direct_tx_size; version = VMBUS_GET_VERSION(device_get_parent(dev), dev); if (version >= VMBUS_VERSION_WIN8_1) { txr->hn_csum_assist = HN_CSUM_ASSIST; } else { txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8; if (id == 0) { device_printf(dev, "bus version %u.%u, " "no UDP checksum offloading\n", VMBUS_VERSION_MAJOR(version), VMBUS_VERSION_MINOR(version)); } } /* * Always schedule transmission instead of trying to do direct * transmission. This one gives the best performance so far. */ txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(dev); /* DMA tag for RNDIS messages. */ error = bus_dma_tag_create(parent_dtag, /* parent */ HN_RNDIS_MSG_ALIGN, /* alignment */ HN_RNDIS_MSG_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_RNDIS_MSG_LEN, /* maxsize */ 1, /* nsegments */ HN_RNDIS_MSG_LEN, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_rndis_dtag); if (error) { device_printf(dev, "failed to create rndis dmatag\n"); return error; } /* DMA tag for data. */ error = bus_dma_tag_create(parent_dtag, /* parent */ 1, /* alignment */ HN_TX_DATA_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_TX_DATA_MAXSIZE, /* maxsize */ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ HN_TX_DATA_SEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_data_dtag); if (error) { device_printf(dev, "failed to create data dmatag\n"); return error; } for (i = 0; i < txr->hn_txdesc_cnt; ++i) { struct hn_txdesc *txd = &txr->hn_txdesc[i]; txd->txr = txr; /* * Allocate and load RNDIS messages. */ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_msg, BUS_DMA_WAITOK | BUS_DMA_COHERENT, &txd->rndis_msg_dmap); if (error) { device_printf(dev, "failed to allocate rndis_msg, %d\n", i); return error; } error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap, txd->rndis_msg, HN_RNDIS_MSG_LEN, hyperv_dma_map_paddr, &txd->rndis_msg_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to load rndis_msg, %d\n", i); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* DMA map for TX data. */ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(dev, "failed to allocate tx data dmamap\n"); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); #else buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; if (sc->hn_tx_sysctl_tree != NULL) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; char name[16]; /* * Create per TX ring sysctl tree: * dev.hn.UNIT.tx.RINGID */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); snprintf(name, sizeof(name), "%d", id); txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (txr->hn_tx_sysctl_tree != NULL) { child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", CTLFLAG_RD, &txr->hn_txdesc_avail, 0, "# of available TX descs"); if (!hn_use_if_start) { SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", CTLFLAG_RD, &txr->hn_oactive, 0, "over active"); } SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", CTLFLAG_RW, &txr->hn_pkts, "# of packets transmitted"); } } return 0; } static void hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) { struct hn_tx_ring *txr = txd->txr; KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } static void hn_destroy_tx_ring(struct hn_tx_ring *txr) { struct hn_txdesc *txd; if (txr->hn_txdesc == NULL) return; #ifndef HN_USE_TXDESC_BUFRING while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { SLIST_REMOVE_HEAD(&txr->hn_txlist, link); hn_txdesc_dmamap_destroy(txd); } #else mtx_lock(&txr->hn_tx_lock); while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) hn_txdesc_dmamap_destroy(txd); mtx_unlock(&txr->hn_tx_lock); #endif if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); #ifdef HN_USE_TXDESC_BUFRING buf_ring_free(txr->hn_txdesc_br, M_NETVSC); #endif free(txr->hn_txdesc, M_NETVSC); txr->hn_txdesc = NULL; if (txr->hn_mbuf_br != NULL) buf_ring_free(txr->hn_mbuf_br, M_NETVSC); #ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); #endif mtx_destroy(&txr->hn_tx_lock); } static int hn_create_tx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int i; sc->hn_tx_ring_cnt = ring_cnt; sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); /* Create dev.hn.UNIT.tx sysctl tree */ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { int error; error = hn_create_tx_ring(sc, i); if (error) return error; } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_no_txdescs), hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_send_failed), hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_txdma_failed), hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_collapsed), hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, "# of total TX descs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", CTLFLAG_RD, &sc->hn_tx_chimney_max, 0, "Chimney send packet size upper boundary"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_tx_chimney_size_sysctl, "I", "Chimney send packet size limit"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_direct_tx_size), hn_tx_conf_int_sysctl, "I", "Size of the packet for direct transmission"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_sched_tx), hn_tx_conf_int_sysctl, "I", "Always schedule transmission " "instead of doing direct transmission"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); return 0; } static void hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size) { int i; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size; NV_UNLOCK(sc); } static void hn_destroy_tx_data(struct hn_softc *sc) { int i; if (sc->hn_tx_ring_cnt == 0) return; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) hn_destroy_tx_ring(&sc->hn_tx_ring[i]); free(sc->hn_tx_ring, M_NETVSC); sc->hn_tx_ring = NULL; sc->hn_tx_ring_cnt = 0; sc->hn_tx_ring_inuse = 0; } static void hn_start_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_start_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_stop_tx_tasks(struct hn_softc *sc) { int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static int hn_xmit(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; struct mbuf *m_head; mtx_assert(&txr->hn_tx_lock, MA_OWNED); KASSERT(hn_use_if_start == 0, ("hn_xmit is called, when if_start is enabled")); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) return 0; while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { struct hn_txdesc *txd; int error; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed; discard */ drbr_advance(ifp, txr->hn_mbuf_br); continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } /* Sent */ drbr_advance(ifp, txr->hn_mbuf_br); } return 0; } static int hn_transmit(struct ifnet *ifp, struct mbuf *m) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr; int error, idx = 0; /* * Select the TX ring based on flowid */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; txr = &sc->hn_tx_ring[idx]; error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); if (error) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return error; } if (txr->hn_oactive) return 0; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return 0; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); return 0; } static void hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; struct mbuf *m; mtx_lock(&txr->hn_tx_lock); while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) m_freem(m); mtx_unlock(&txr->hn_tx_lock); } if_qflush(ifp); } static void hn_xmit_txeof(struct hn_tx_ring *txr) { if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; txr->hn_oactive = 0; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the oactive earlier, with the hope, that * others could catch up. The task will clear the * oactive again with the hn_tx_lock to avoid possible * races. */ txr->hn_oactive = 0; taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_xmit_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); txr->hn_oactive = 0; hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_channel_attach(struct hn_softc *sc, struct vmbus_channel *chan) { struct hn_rx_ring *rxr; int idx; idx = vmbus_chan_subidx(chan); KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("RX ring %d already attached", idx)); rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; if (bootverbose) { if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n", idx, vmbus_chan_id(chan)); } if (idx < sc->hn_tx_ring_inuse) { struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("TX ring %d already attached", idx)); txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; txr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n", idx, vmbus_chan_id(chan)); } } /* Bind channel to a proper CPU */ vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); } static void hn_subchan_attach(struct hn_softc *sc, struct vmbus_channel *chan) { KASSERT(!vmbus_chan_is_primary(chan), ("subchannel callback on primary channel")); hn_channel_attach(sc, chan); } static void hn_subchan_setup(struct hn_softc *sc) { struct vmbus_channel **subchans; int subchan_cnt = sc->net_dev->num_channel - 1; int i; /* Wait for sub-channels setup to complete. */ subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); /* Attach the sub-channels. */ for (i = 0; i < subchan_cnt; ++i) { struct vmbus_channel *subchan = subchans[i]; /* NOTE: Calling order is critical. */ hn_subchan_attach(sc, subchan); hv_nv_subchan_attach(subchan, &sc->hn_rx_ring[vmbus_chan_subidx(subchan)]); } /* Release the sub-channels */ vmbus_subchan_rel(subchans, subchan_cnt); if_printf(sc->hn_ifp, "%d sub-channels setup done\n", subchan_cnt); } static void hn_tx_taskq_create(void *arg __unused) { if (!hn_share_tx_taskq) return; hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, &cpu_set, "hn tx"); } else { taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); } } SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_create, NULL); static void hn_tx_taskq_destroy(void *arg __unused) { if (hn_tx_taskq != NULL) taskqueue_free(hn_tx_taskq); } SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_destroy, NULL); static device_method_t netvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netvsc_probe), DEVMETHOD(device_attach, netvsc_attach), DEVMETHOD(device_detach, netvsc_detach), DEVMETHOD(device_shutdown, netvsc_shutdown), { 0, 0 } }; static driver_t netvsc_driver = { NETVSC_DEVNAME, netvsc_methods, sizeof(hn_softc_t) }; static devclass_t netvsc_devclass; DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 303642) @@ -1,2203 +1,2234 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * StorVSC driver for Hyper-V. This driver presents a SCSI HBA interface * to the Comman Access Method (CAM) layer. CAM control blocks (CCBs) are * converted into VSCSI protocol messages which are delivered to the parent * partition StorVSP driver over the Hyper-V VMBUS. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vstorage.h" #include "vmbus_if.h" #define STORVSC_RINGBUFFER_SIZE (20*PAGE_SIZE) #define STORVSC_MAX_LUNS_PER_TARGET (64) #define STORVSC_MAX_IO_REQUESTS (STORVSC_MAX_LUNS_PER_TARGET * 2) #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1) #define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS #define STORVSC_MAX_TARGETS (2) #define VSTOR_PKT_SIZE (sizeof(struct vstor_packet) - vmscsi_size_delta) -#define STORVSC_DATA_SEGCNT_MAX VMBUS_CHAN_PRPLIST_MAX +/* + * 33 segments are needed to allow 128KB maxio, in case the data + * in the first page is _not_ PAGE_SIZE aligned, e.g. + * + * |<----------- 128KB ----------->| + * | | + * 0 2K 4K 8K 16K 124K 128K 130K + * | | | | | | | | + * +--+--+-----+-----+.......+-----+--+--+ + * | | | | | | | | | DATA + * | | | | | | | | | + * +--+--+-----+-----+.......------+--+--+ + * | | | | + * | 1| 31 | 1| ...... # of segments + */ +#define STORVSC_DATA_SEGCNT_MAX 33 #define STORVSC_DATA_SEGSZ_MAX PAGE_SIZE #define STORVSC_DATA_SIZE_MAX \ - (STORVSC_DATA_SEGCNT_MAX * STORVSC_DATA_SEGSZ_MAX) + ((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX) struct storvsc_softc; struct hv_sgl_node { LIST_ENTRY(hv_sgl_node) link; struct sglist *sgl_data; }; struct hv_sgl_page_pool{ LIST_HEAD(, hv_sgl_node) in_use_sgl_list; LIST_HEAD(, hv_sgl_node) free_sgl_list; boolean_t is_init; } g_hv_sgl_page_pool; #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * STORVSC_DATA_SEGCNT_MAX enum storvsc_request_type { WRITE_TYPE, READ_TYPE, UNKNOWN_TYPE }; SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V storage interface"); static u_int hv_storvsc_use_pim_unmapped = 1; SYSCTL_INT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN, &hv_storvsc_use_pim_unmapped, 0, "Optimize storvsc by using unmapped I/O"); struct hv_storvsc_sysctl { u_long data_bio_cnt; u_long data_vaddr_cnt; u_long data_sg_cnt; }; struct storvsc_gpa_range { struct vmbus_gpa_range gpa_range; uint64_t gpa_page[STORVSC_DATA_SEGCNT_MAX]; } __packed; struct hv_storvsc_request { LIST_ENTRY(hv_storvsc_request) link; struct vstor_packet vstor_packet; int prp_cnt; struct storvsc_gpa_range prp_list; void *sense_data; uint8_t sense_info_len; uint8_t retries; union ccb *ccb; struct storvsc_softc *softc; struct callout callout; struct sema synch_sema; /*Synchronize the request/response if needed */ struct sglist *bounce_sgl; unsigned int bounce_sgl_count; uint64_t not_aligned_seg_bits; bus_dmamap_t data_dmap; }; struct storvsc_softc { struct vmbus_channel *hs_chan; LIST_HEAD(, hv_storvsc_request) hs_free_list; struct mtx hs_lock; struct storvsc_driver_props *hs_drv_props; int hs_unit; uint32_t hs_frozen; struct cam_sim *hs_sim; struct cam_path *hs_path; uint32_t hs_num_out_reqs; boolean_t hs_destroy; boolean_t hs_drain_notify; struct sema hs_drain_sema; struct hv_storvsc_request hs_init_req; struct hv_storvsc_request hs_reset_req; device_t hs_dev; bus_dma_tag_t storvsc_req_dtag; struct hv_storvsc_sysctl sysctl_data; struct vmbus_channel *hs_cpu2chan[MAXCPU]; }; /** * HyperV storvsc timeout testing cases: * a. IO returned after first timeout; * b. IO returned after second timeout and queue freeze; * c. IO returned while timer handler is running * The first can be tested by "sg_senddiag -vv /dev/daX", * and the second and third can be done by * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX". */ #define HVS_TIMEOUT_TEST 0 /* * Bus/adapter reset functionality on the Hyper-V host is * buggy and it will be disabled until * it can be further tested. */ #define HVS_HOST_RESET 0 struct storvsc_driver_props { char *drv_name; char *drv_desc; uint8_t drv_max_luns_per_target; uint8_t drv_max_ios_per_target; uint32_t drv_ringbuffer_size; }; enum hv_storage_type { DRIVER_BLKVSC, DRIVER_STORVSC, DRIVER_UNKNOWN }; #define HS_MAX_ADAPTERS 10 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */ static const struct hyperv_guid gStorVscDeviceType={ .hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} }; /* {32412632-86cb-44a2-9b5c-50d1417354f5} */ static const struct hyperv_guid gBlkVscDeviceType={ .hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} }; static struct storvsc_driver_props g_drv_props_table[] = { {"blkvsc", "Hyper-V IDE Storage Interface", BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE}, {"storvsc", "Hyper-V SCSI Storage Interface", STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE} }; /* * Sense buffer size changed in win8; have a run-time * variable to track the size we should use. */ static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; /* * The size of the vmscsi_request has changed in win8. The * additional size is for the newly added elements in the * structure. These elements are valid only when we are talking * to a win8 host. * Track the correct size we need to apply. */ static int vmscsi_size_delta; /* * The storage protocol version is determined during the * initial exchange with the host. It will indicate which * storage functionality is available in the host. */ static int vmstor_proto_version; struct vmstor_proto { int proto_version; int sense_buffer_size; int vmscsi_size_delta; }; static const struct vmstor_proto vmstor_proto_list[] = { { VMSTOR_PROTOCOL_VERSION_WIN10, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8_1, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN7, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), }, { VMSTOR_PROTOCOL_VERSION_WIN6, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), } }; /* static functions */ static int storvsc_probe(device_t dev); static int storvsc_attach(device_t dev); static int storvsc_detach(device_t dev); static void storvsc_poll(struct cam_sim * sim); static void storvsc_action(struct cam_sim * sim, union ccb * ccb); static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp); static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp); static enum hv_storage_type storvsc_get_storage_type(device_t dev); static void hv_storvsc_rescan_target(struct storvsc_softc *sc); static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc); static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request); static int hv_storvsc_connect_vsp(struct storvsc_softc *); static void storvsc_io_done(struct hv_storvsc_request *reqp); static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits); void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits); static device_method_t storvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, storvsc_probe), DEVMETHOD(device_attach, storvsc_attach), DEVMETHOD(device_detach, storvsc_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD_END }; static driver_t storvsc_driver = { "storvsc", storvsc_methods, sizeof(struct storvsc_softc), }; static devclass_t storvsc_devclass; DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0); MODULE_VERSION(storvsc, 1); MODULE_DEPEND(storvsc, vmbus, 1, 1, 1); static void storvsc_subchan_attach(struct storvsc_softc *sc, struct vmbus_channel *new_channel) { struct vmstor_chan_props props; int ret = 0; memset(&props, 0, sizeof(props)); vmbus_chan_cpu_rr(new_channel); ret = vmbus_chan_open(new_channel, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, sc); } /** * @brief Send multi-channel creation request to host * * @param device a Hyper-V device pointer * @param max_chans the max channels supported by vmbus */ static void storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_chans) { struct vmbus_channel **subchan; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; int request_channels_cnt = 0; int ret, i; /* get multichannels count that need to create */ request_channels_cnt = MIN(max_chans, mp_ncpus); request = &sc->hs_init_req; /* request the host to create multi-channel */ memset(request, 0, sizeof(struct hv_storvsc_request)); sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet = &request->vstor_packet; vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.multi_channels_cnt = request_channels_cnt; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); /* wait for 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) { printf("Storvsc_error: create multi-channel timeout, %d\n", ret); return; } if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { printf("Storvsc_error: create multi-channel invalid operation " "(%d) or statue (%u)\n", vstor_packet->operation, vstor_packet->status); return; } /* Wait for sub-channels setup to complete. */ subchan = vmbus_subchan_get(sc->hs_chan, request_channels_cnt); /* Attach the sub-channels. */ for (i = 0; i < request_channels_cnt; ++i) storvsc_subchan_attach(sc, subchan[i]); /* Release the sub-channels. */ vmbus_subchan_rel(subchan, request_channels_cnt); if (bootverbose) printf("Storvsc create multi-channel success!\n"); } /** * @brief initialize channel connection to parent partition * * @param dev a Hyper-V device pointer * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_channel_init(struct storvsc_softc *sc) { int ret = 0, i; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; uint16_t max_chans = 0; boolean_t support_multichannel = FALSE; uint32_t version; max_chans = 0; support_multichannel = FALSE; request = &sc->hs_init_req; memset(request, 0, sizeof(struct hv_storvsc_request)); vstor_packet = &request->vstor_packet; request->softc = sc; /** * Initiate the vsc/vsp initialization protocol on the open channel */ sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } for (i = 0; i < nitems(vmstor_proto_list); i++) { /* reuse the packet for version range supported */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.version.major_minor = vmstor_proto_list[i].proto_version; /* revision is only significant for Windows guests */ vstor_packet->u.version.revision = 0; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) { ret = EINVAL; goto cleanup; } if (vstor_packet->status == 0) { vmstor_proto_version = vmstor_proto_list[i].proto_version; sense_buffer_size = vmstor_proto_list[i].sense_buffer_size; vmscsi_size_delta = vmstor_proto_list[i].vmscsi_size_delta; break; } } if (vstor_packet->status != 0) { ret = EINVAL; goto cleanup; } /** * Query channel properties */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if ( ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } /* multi-channels feature is supported by WIN8 and above version */ max_chans = vstor_packet->u.chan_props.max_channel_cnt; version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev); if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 && (vstor_packet->u.chan_props.flags & HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) { support_multichannel = TRUE; } memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) { goto cleanup; } /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) goto cleanup; /* * If multi-channel is supported, send multichannel create * request to host. */ if (support_multichannel) storvsc_send_multichannel_request(sc, max_chans); cleanup: sema_destroy(&request->synch_sema); return (ret); } /** * @brief Open channel connection to paraent partition StorVSP driver * * Open and initialize channel connection to parent partition StorVSP driver. * * @param pointer to a Hyper-V device * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_connect_vsp(struct storvsc_softc *sc) { int ret = 0; struct vmstor_chan_props props; memset(&props, 0, sizeof(struct vmstor_chan_props)); /* * Open the channel */ vmbus_chan_cpu_rr(sc->hs_chan); ret = vmbus_chan_open( sc->hs_chan, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, sc); if (ret != 0) { return ret; } ret = hv_storvsc_channel_init(sc); return (ret); } #if HVS_HOST_RESET static int hv_storvsc_host_reset(struct storvsc_softc *sc) { int ret = 0; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; request = &sc->hs_reset_req; request->softc = sc; vstor_packet = &request->vstor_packet; sema_init(&request->synch_sema, 0, "stor synch sema"); vstor_packet->operation = VSTOR_OPERATION_RESETBUS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(dev->channel, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)&sc->hs_reset_req); if (ret != 0) { goto cleanup; } ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */ if (ret) { goto cleanup; } /* * At this point, all outstanding requests in the adapter * should have been flushed out and return to us */ cleanup: sema_destroy(&request->synch_sema); return (ret); } #endif /* HVS_HOST_RESET */ /** * @brief Function to initiate an I/O request * * @param device Hyper-V device pointer * @param request pointer to a request structure * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_io_request(struct storvsc_softc *sc, struct hv_storvsc_request *request) { struct vstor_packet *vstor_packet = &request->vstor_packet; struct vmbus_channel* outgoing_channel = NULL; int ret = 0; vstor_packet->flags |= REQUEST_COMPLETION_FLAG; vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE; vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size; vstor_packet->u.vm_srb.transfer_len = request->prp_list.gpa_range.gpa_len; vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB; outgoing_channel = sc->hs_cpu2chan[curcpu]; mtx_unlock(&request->softc->hs_lock); if (request->prp_list.gpa_range.gpa_len) { ret = vmbus_chan_send_prplist(outgoing_channel, &request->prp_list.gpa_range, request->prp_cnt, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } else { ret = vmbus_chan_send(outgoing_channel, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } mtx_lock(&request->softc->hs_lock); if (ret != 0) { printf("Unable to send packet %p ret %d", vstor_packet, ret); } else { atomic_add_int(&sc->hs_num_out_reqs, 1); } return (ret); } /** * Process IO_COMPLETION_OPERATION and ready * the result to be completed for upper layer * processing by the CAM layer. */ static void hv_storvsc_on_iocompletion(struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request) { struct vmscsi_req *vm_srb; vm_srb = &vstor_packet->u.vm_srb; /* * Copy some fields of the host's response into the request structure, * because the fields will be used later in storvsc_io_done(). */ request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status; request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len; if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) && (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) { /* Autosense data available */ KASSERT(vm_srb->sense_info_len <= request->sense_info_len, ("vm_srb->sense_info_len <= " "request->sense_info_len")); memcpy(request->sense_data, vm_srb->u.sense_data, vm_srb->sense_info_len); request->sense_info_len = vm_srb->sense_info_len; } /* Complete request by passing to the CAM layer */ storvsc_io_done(request); atomic_subtract_int(&sc->hs_num_out_reqs, 1); if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) { sema_post(&sc->hs_drain_sema); } } static void hv_storvsc_rescan_target(struct storvsc_softc *sc) { path_id_t pathid; target_id_t targetid; union ccb *ccb; pathid = cam_sim_path(sc->hs_sim); targetid = CAM_TARGET_WILDCARD; /* * Allocate a CCB and schedule a rescan. */ ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return; } if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { printf("unable to create path for rescan, pathid: %u," "targetid: %u\n", pathid, targetid); xpt_free_ccb(ccb); return; } if (targetid == CAM_TARGET_WILDCARD) ccb->ccb_h.func_code = XPT_SCAN_BUS; else ccb->ccb_h.func_code = XPT_SCAN_TGT; xpt_rescan(ccb); } static void hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc) { int ret = 0; struct storvsc_softc *sc = xsc; uint32_t bytes_recvd; uint64_t request_id; uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)]; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8); ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id); KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough")); /* XXX check bytes_recvd to make sure that it contains enough data */ while ((ret == 0) && (bytes_recvd > 0)) { request = (struct hv_storvsc_request *)(uintptr_t)request_id; if ((request == &sc->hs_init_req) || (request == &sc->hs_reset_req)) { memcpy(&request->vstor_packet, packet, sizeof(struct vstor_packet)); sema_post(&request->synch_sema); } else { vstor_packet = (struct vstor_packet *)packet; switch(vstor_packet->operation) { case VSTOR_OPERATION_COMPLETEIO: if (request == NULL) panic("VMBUS: storvsc received a " "packet with NULL request id in " "COMPLETEIO operation."); hv_storvsc_on_iocompletion(sc, vstor_packet, request); break; case VSTOR_OPERATION_REMOVEDEVICE: printf("VMBUS: storvsc operation %d not " "implemented.\n", vstor_packet->operation); /* TODO: implement */ break; case VSTOR_OPERATION_ENUMERATE_BUS: hv_storvsc_rescan_target(sc); break; default: break; } } bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8), ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id); KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough")); /* * XXX check bytes_recvd to make sure that it contains * enough data */ } } /** * @brief StorVSC probe function * * Device probe function. Returns 0 if the input device is a StorVSC * device. Otherwise, a ENXIO is returned. If the input device is * for BlkVSC (paravirtual IDE) device and this support is disabled in * favor of the emulated ATA/IDE device, return ENXIO. * * @param a device * @returns 0 on success, ENXIO if not a matcing StorVSC device */ static int storvsc_probe(device_t dev) { int ata_disk_enable = 0; int ret = ENXIO; switch (storvsc_get_storage_type(dev)) { case DRIVER_BLKVSC: if(bootverbose) device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n"); if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) { if(bootverbose) device_printf(dev, "Enlightened ATA/IDE detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc); ret = BUS_PROBE_DEFAULT; } else if(bootverbose) device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n"); break; case DRIVER_STORVSC: if(bootverbose) device_printf(dev, "Enlightened SCSI device detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc); ret = BUS_PROBE_DEFAULT; break; default: ret = ENXIO; } return (ret); } static void storvsc_create_cpu2chan(struct storvsc_softc *sc) { int cpu; CPU_FOREACH(cpu) { sc->hs_cpu2chan[cpu] = vmbus_chan_cpu2chan(sc->hs_chan, cpu); if (bootverbose) { device_printf(sc->hs_dev, "cpu%d -> chan%u\n", cpu, vmbus_chan_id(sc->hs_cpu2chan[cpu])); } } } static int storvsc_init_requests(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp; int error, i; LIST_INIT(&sc->hs_free_list); error = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1, /* alignment */ PAGE_SIZE, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ STORVSC_DATA_SIZE_MAX, /* maxsize */ STORVSC_DATA_SEGCNT_MAX, /* nsegments */ STORVSC_DATA_SEGSZ_MAX, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &sc->storvsc_req_dtag); if (error) { device_printf(dev, "failed to create storvsc dma tag\n"); return (error); } for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) { reqp = malloc(sizeof(struct hv_storvsc_request), M_DEVBUF, M_WAITOK|M_ZERO); reqp->softc = sc; error = bus_dmamap_create(sc->storvsc_req_dtag, 0, &reqp->data_dmap); if (error) { device_printf(dev, "failed to allocate storvsc " "data dmamap\n"); goto cleanup; } LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } return (0); cleanup: while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) { LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } return (error); } static void storvsc_sysctl(device_t dev) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; struct storvsc_softc *sc; sc = device_get_softc(dev); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW, &sc->sysctl_data.data_bio_cnt, "# of bio data block"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW, &sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW, &sc->sysctl_data.data_sg_cnt, "# of sg data block"); } /** * @brief StorVSC attach function * * Function responsible for allocating per-device structures, * setting up CAM interfaces and scanning for available LUNs to * be used for SCSI device peripherals. * * @param a device * @returns 0 on success or an error on failure */ static int storvsc_attach(device_t dev) { enum hv_storage_type stor_type; struct storvsc_softc *sc; struct cam_devq *devq; int ret, i, j; struct hv_storvsc_request *reqp; struct root_hold_token *root_mount_token = NULL; struct hv_sgl_node *sgl_node = NULL; void *tmp_buff = NULL; /* * We need to serialize storvsc attach calls. */ root_mount_token = root_mount_hold("storvsc"); sc = device_get_softc(dev); sc->hs_chan = vmbus_get_channel(dev); stor_type = storvsc_get_storage_type(dev); if (stor_type == DRIVER_UNKNOWN) { ret = ENODEV; goto cleanup; } /* fill in driver specific properties */ sc->hs_drv_props = &g_drv_props_table[stor_type]; /* fill in device specific properties */ sc->hs_unit = device_get_unit(dev); sc->hs_dev = dev; mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF); ret = storvsc_init_requests(dev); if (ret != 0) goto cleanup; /* create sg-list page pool */ if (FALSE == g_hv_sgl_page_pool.is_init) { g_hv_sgl_page_pool.is_init = TRUE; LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list); /* * Pre-create SG list, each SG list with * STORVSC_DATA_SEGCNT_MAX segments, each * segment has one page buffer */ for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) { sgl_node = malloc(sizeof(struct hv_sgl_node), M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data = sglist_alloc(STORVSC_DATA_SEGCNT_MAX, M_WAITOK|M_ZERO); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) { tmp_buff = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data->sg_segs[j].ss_paddr = (vm_paddr_t)tmp_buff; } LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } } sc->hs_destroy = FALSE; sc->hs_drain_notify = FALSE; sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema"); ret = hv_storvsc_connect_vsp(sc); if (ret != 0) { goto cleanup; } /* Construct cpu to channel mapping */ storvsc_create_cpu2chan(sc); /* * Create the device queue. * Hyper-V maps each target to one SCSI HBA */ devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target); if (devq == NULL) { device_printf(dev, "Failed to alloc device queue\n"); ret = ENOMEM; goto cleanup; } sc->hs_sim = cam_sim_alloc(storvsc_action, storvsc_poll, sc->hs_drv_props->drv_name, sc, sc->hs_unit, &sc->hs_lock, 1, sc->hs_drv_props->drv_max_ios_per_target, devq); if (sc->hs_sim == NULL) { device_printf(dev, "Failed to alloc sim\n"); cam_simq_free(devq); ret = ENOMEM; goto cleanup; } mtx_lock(&sc->hs_lock); /* bus_id is set to 0, need to get it from VMBUS channel query? */ if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) { cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to register SCSI bus\n"); ret = ENXIO; goto cleanup; } if (xpt_create_path(&sc->hs_path, /*periph*/NULL, cam_sim_path(sc->hs_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->hs_sim)); cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to create path\n"); ret = ENXIO; goto cleanup; } mtx_unlock(&sc->hs_lock); storvsc_sysctl(dev); root_mount_rel(root_mount_token); return (0); cleanup: root_mount_rel(root_mount_token); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) { if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (ret); } /** * @brief StorVSC device detach function * * This function is responsible for safely detaching a * StorVSC device. This includes waiting for inbound responses * to complete and freeing associated per-device structures. * * @param dev a device * returns 0 on success */ static int storvsc_detach(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp = NULL; struct hv_sgl_node *sgl_node = NULL; int j = 0; sc->hs_destroy = TRUE; /* * At this point, all outbound traffic should be disabled. We * only allow inbound traffic (responses) to proceed so that * outstanding requests can be completed. */ sc->hs_drain_notify = TRUE; sema_wait(&sc->hs_drain_sema); sc->hs_drain_notify = FALSE; /* * Since we have already drained, we don't need to busy wait. * The call to close the channel will reset the callback * under the protection of the incoming channel lock. */ vmbus_chan_close(sc->hs_chan); mtx_lock(&sc->hs_lock); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } mtx_unlock(&sc->hs_lock); while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){ if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (0); } #if HVS_TIMEOUT_TEST /** * @brief unit test for timed out operations * * This function provides unit testing capability to simulate * timed out operations. Recompilation with HV_TIMEOUT_TEST=1 * is required. * * @param reqp pointer to a request structure * @param opcode SCSI operation being performed * @param wait if 1, wait for I/O to complete */ static void storvsc_timeout_test(struct hv_storvsc_request *reqp, uint8_t opcode, int wait) { int ret; union ccb *ccb = reqp->ccb; struct storvsc_softc *sc = reqp->softc; if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) { return; } if (wait) { mtx_lock(&reqp->event.mtx); } ret = hv_storvsc_io_request(sc, reqp); if (ret != 0) { if (wait) { mtx_unlock(&reqp->event.mtx); } printf("%s: io_request failed with %d.\n", __func__, ret); ccb->ccb_h.status = CAM_PROVIDE_FAIL; mtx_lock(&sc->hs_lock); storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); return; } if (wait) { xpt_print(ccb->ccb_h.path, "%u: %s: waiting for IO return.\n", ticks, __func__); ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz); mtx_unlock(&reqp->event.mtx); xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n", ticks, __func__, (ret == 0)? "IO return detected" : "IO return not detected"); /* * Now both the timer handler and io done are running * simultaneously. We want to confirm the io done always * finishes after the timer handler exits. So reqp used by * timer handler is not freed or stale. Do busy loop for * another 1/10 second to make sure io done does * wait for the timer handler to complete. */ DELAY(100*1000); mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: %s: finishing, queue frozen %d, " "ccb status 0x%x scsi_status 0x%x.\n", ticks, __func__, sc->hs_frozen, ccb->ccb_h.status, ccb->csio.scsi_status); mtx_unlock(&sc->hs_lock); } } #endif /* HVS_TIMEOUT_TEST */ #ifdef notyet /** * @brief timeout handler for requests * * This function is called as a result of a callout expiring. * * @param arg pointer to a request */ static void storvsc_timeout(void *arg) { struct hv_storvsc_request *reqp = arg; struct storvsc_softc *sc = reqp->softc; union ccb *ccb = reqp->ccb; if (reqp->retries == 0) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO timed out (req=0x%p), wait for another %u secs.\n", ticks, reqp, ccb->ccb_h.timeout / 1000); cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL); mtx_unlock(&sc->hs_lock); reqp->retries++; callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0); #endif return; } mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n", ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000, (sc->hs_frozen == 0)? "freezing the queue" : "the queue is already frozen"); if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1); } mtx_unlock(&sc->hs_lock); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, MODE_SELECT_10, 1); #endif } #endif /** * @brief StorVSC device poll function * * This function is responsible for servicing requests when * interrupts are disabled (i.e when we are dumping core.) * * @param sim a pointer to a CAM SCSI interface module */ static void storvsc_poll(struct cam_sim *sim) { struct storvsc_softc *sc = cam_sim_softc(sim); mtx_assert(&sc->hs_lock, MA_OWNED); mtx_unlock(&sc->hs_lock); hv_storvsc_on_channel_callback(sc->hs_chan, sc); mtx_lock(&sc->hs_lock); } /** * @brief StorVSC device action function * * This function is responsible for handling SCSI operations which * are passed from the CAM layer. The requests are in the form of * CAM control blocks which indicate the action being performed. * Not all actions require converting the request to a VSCSI protocol * message - these actions can be responded to by this driver. * Requests which are destined for a backend storage device are converted * to a VSCSI protocol message and sent on the channel connection associated * with this device. * * @param sim pointer to a CAM SCSI interface module * @param ccb pointer to a CAM control block */ static void storvsc_action(struct cam_sim *sim, union ccb *ccb) { struct storvsc_softc *sc = cam_sim_softc(sim); int res; mtx_assert(&sc->hs_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET; if (hv_storvsc_use_pim_unmapped) cpi->hba_misc |= PIM_UNMAPPED; + cpi->maxio = STORVSC_DATA_SIZE_MAX; cpi->hba_eng_cnt = 0; cpi->max_target = STORVSC_MAX_TARGETS; cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target; cpi->initiator_id = cpi->max_target; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 300000; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC2; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; cts->transport = XPORT_SAS; cts->transport_version = 0; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC2; /* enable tag queuing and disconnected mode */ cts->proto_specific.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; cts->xport_specific.valid = CTS_SPI_VALID_DISC; cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_SET_TRAN_SETTINGS: { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_CALC_GEOMETRY:{ cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); return; } case XPT_RESET_BUS: case XPT_RESET_DEV:{ #if HVS_HOST_RESET if ((res = hv_storvsc_host_reset(sc)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_host_reset failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; #else xpt_print(ccb->ccb_h.path, "%s reset not supported.\n", (ccb->ccb_h.func_code == XPT_RESET_BUS)? "bus" : "dev"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; #endif /* HVS_HOST_RESET */ } case XPT_SCSI_IO: case XPT_IMMED_NOTIFY: { struct hv_storvsc_request *reqp = NULL; bus_dmamap_t dmap_saved; if (ccb->csio.cdb_len == 0) { panic("cdl_len is 0\n"); } if (LIST_EMPTY(&sc->hs_free_list)) { ccb->ccb_h.status = CAM_REQUEUE_REQ; if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(sim, /* count*/1); } xpt_done(ccb); return; } reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); /* Save the data_dmap before reset request */ dmap_saved = reqp->data_dmap; /* XXX this is ugly */ bzero(reqp, sizeof(struct hv_storvsc_request)); /* Restore necessary bits */ reqp->data_dmap = dmap_saved; reqp->softc = sc; ccb->ccb_h.status |= CAM_SIM_QUEUED; if ((res = create_storvsc_request(ccb, reqp)) != 0) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } #ifdef notyet if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_init(&reqp->callout, 1); callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST cv_init(&reqp->event.cv, "storvsc timeout cv"); mtx_init(&reqp->event.mtx, "storvsc timeout mutex", NULL, MTX_DEF); switch (reqp->vstor_packet.vm_srb.cdb[0]) { case MODE_SELECT_10: case SEND_DIAGNOSTIC: /* To have timer send the request. */ return; default: break; } #endif /* HVS_TIMEOUT_TEST */ } #endif if ((res = hv_storvsc_io_request(sc, reqp)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_io_request failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; storvsc_free_request(sc, reqp); xpt_done(ccb); return; } return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } } /** * @brief destroy bounce buffer * * This function is responsible for destroy a Scatter/Gather list * that create by storvsc_create_bounce_buffer() * * @param sgl- the Scatter/Gather need be destroy * @param sg_count- page count of the SG list. * */ static void storvsc_destroy_bounce_buffer(struct sglist *sgl) { struct hv_sgl_node *sgl_node = NULL; if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) { printf("storvsc error: not enough in use sgl\n"); return; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_REMOVE(sgl_node, link); sgl_node->sgl_data = sgl; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } /** * @brief create bounce buffer * * This function is responsible for create a Scatter/Gather list, * which hold several pages that can be aligned with page size. * * @param seg_count- SG-list segments count * @param write - if WRITE_TYPE, set SG list page used size to 0, * otherwise set used size to page size. * * return NULL if create failed */ static struct sglist * storvsc_create_bounce_buffer(uint16_t seg_count, int write) { int i = 0; struct sglist *bounce_sgl = NULL; unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE); struct hv_sgl_node *sgl_node = NULL; /* get struct sglist from free_sgl_list */ if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { printf("storvsc error: not enough free sgl\n"); return NULL; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); bounce_sgl = sgl_node->sgl_data; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link); bounce_sgl->sg_maxseg = seg_count; if (write == WRITE_TYPE) bounce_sgl->sg_nseg = 0; else bounce_sgl->sg_nseg = seg_count; for (i = 0; i < seg_count; i++) bounce_sgl->sg_segs[i].ss_len = buf_len; return bounce_sgl; } /** * @brief copy data from SG list to bounce buffer * * This function is responsible for copy data from one SG list's segments * to another SG list which used as bounce buffer. * * @param bounce_sgl - the destination SG list * @param orig_sgl - the segment of the source SG list. * @param orig_sgl_count - the count of segments. * @param orig_sgl_count - indicate which segment need bounce buffer, * set 1 means need. * */ static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits) { int src_sgl_idx = 0; for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) { if (seg_bits & (1 << src_sgl_idx)) { memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr, (void*)orig_sgl[src_sgl_idx].ds_addr, orig_sgl[src_sgl_idx].ds_len); bounce_sgl->sg_segs[src_sgl_idx].ss_len = orig_sgl[src_sgl_idx].ds_len; } } } /** * @brief copy data from SG list which used as bounce to another SG list * * This function is responsible for copy data from one SG list with bounce * buffer to another SG list's segments. * * @param dest_sgl - the destination SG list's segments * @param dest_sgl_count - the count of destination SG list's segment. * @param src_sgl - the source SG list. * @param seg_bits - indicate which segment used bounce buffer of src SG-list. * */ void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits) { int sgl_idx = 0; for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) { if (seg_bits & (1 << sgl_idx)) { memcpy((void*)(dest_sgl[sgl_idx].ds_addr), (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr), src_sgl->sg_segs[sgl_idx].ss_len); } } } /** * @brief check SG list with bounce buffer or not * * This function is responsible for check if need bounce buffer for SG list. * * @param sgl - the SG list's segments * @param sg_count - the count of SG list's segment. * @param bits - segmengs number that need bounce buffer * * return -1 if SG list needless bounce buffer */ static int storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl, unsigned int sg_count, uint64_t *bits) { int i = 0; int offset = 0; uint64_t phys_addr = 0; uint64_t tmp_bits = 0; boolean_t found_hole = FALSE; boolean_t pre_aligned = TRUE; if (sg_count < 2){ return -1; } *bits = 0; phys_addr = vtophys(sgl[0].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset != 0) { pre_aligned = FALSE; tmp_bits |= 1; } for (i = 1; i < sg_count; i++) { phys_addr = vtophys(sgl[i].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset == 0) { if (FALSE == pre_aligned){ /* * This segment is aligned, if the previous * one is not aligned, find a hole */ found_hole = TRUE; } pre_aligned = TRUE; } else { tmp_bits |= 1 << i; if (!pre_aligned) { if (phys_addr != vtophys(sgl[i-1].ds_addr + sgl[i-1].ds_len)) { /* * Check whether connect to previous * segment,if not, find the hole */ found_hole = TRUE; } } else { found_hole = TRUE; } pre_aligned = FALSE; } } if (!found_hole) { return (-1); } else { *bits = tmp_bits; return 0; } } /** * Copy bus_dma segments to multiple page buffer, which requires * the pages are compact composed except for the 1st and last pages. */ static void storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct hv_storvsc_request *reqp = arg; union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_gpa_range *prplist; int i; prplist = &reqp->prp_list; prplist->gpa_range.gpa_len = csio->dxfer_len; prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK; for (i = 0; i < nsegs; i++) { - prplist->gpa_page[i] = atop(segs[i].ds_addr); #ifdef INVARIANTS - if (i != 0 && i != nsegs - 1) { - KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 && - segs[i].ds_len == PAGE_SIZE, ("not a full page")); + if (nsegs > 1) { + if (i == 0) { + KASSERT((segs[i].ds_addr & PAGE_MASK) + + segs[i].ds_len == PAGE_SIZE, + ("invalid 1st page, ofs 0x%jx, len %zu", + (uintmax_t)segs[i].ds_addr, + segs[i].ds_len)); + } else if (i == nsegs - 1) { + KASSERT((segs[i].ds_addr & PAGE_MASK) == 0, + ("invalid last page, ofs 0x%jx", + (uintmax_t)segs[i].ds_addr)); + } else { + KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 && + segs[i].ds_len == PAGE_SIZE, + ("not a full page, ofs 0x%jx, len %zu", + (uintmax_t)segs[i].ds_addr, + segs[i].ds_len)); + } } #endif + prplist->gpa_page[i] = atop(segs[i].ds_addr); } reqp->prp_cnt = nsegs; } /** * @brief Fill in a request structure based on a CAM control block * * Fills in a request structure based on the contents of a CAM control * block. The request structure holds the payload information for * VSCSI protocol request. * * @param ccb pointer to a CAM contorl block * @param reqp pointer to a request structure */ static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp) { struct ccb_scsiio *csio = &ccb->csio; uint64_t phys_addr; uint32_t pfn; uint64_t not_aligned_seg_bits = 0; int error; /* refer to struct vmscsi_req for meanings of these two fields */ reqp->vstor_packet.u.vm_srb.port = cam_sim_unit(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.path_id = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id; reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun; reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len; if(ccb->ccb_h.flags & CAM_CDB_POINTER) { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); } else { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); } switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE; break; case CAM_DIR_IN: reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE; break; case CAM_DIR_NONE: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; break; default: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; break; } reqp->sense_data = &csio->sense_data; reqp->sense_info_len = csio->sense_len; reqp->ccb = ccb; if (0 == csio->dxfer_len) { return (0); } switch (ccb->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_BIO: case CAM_DATA_VADDR: error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag, reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp, BUS_DMA_NOWAIT); if (error) { xpt_print(ccb->ccb_h.path, "bus_dmamap_load_ccb failed: %d\n", error); return (error); } if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO) reqp->softc->sysctl_data.data_bio_cnt++; else reqp->softc->sysctl_data.data_vaddr_cnt++; break; case CAM_DATA_SG: { struct storvsc_gpa_range *prplist; int i = 0; int offset = 0; int ret; bus_dma_segment_t *storvsc_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt; prplist = &reqp->prp_list; prplist->gpa_range.gpa_len = csio->dxfer_len; printf("Storvsc: get SG I/O operation, %d\n", reqp->vstor_packet.u.vm_srb.data_in); if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){ printf("Storvsc: %d segments is too much, " "only support %d segments\n", storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX); return (EINVAL); } /* * We create our own bounce buffer function currently. Idealy * we should use BUS_DMA(9) framework. But with current BUS_DMA * code there is no callback API to check the page alignment of * middle segments before busdma can decide if a bounce buffer * is needed for particular segment. There is callback, * "bus_dma_filter_t *filter", but the parrameters are not * sufficient for storvsc driver. * TODO: * Add page alignment check in BUS_DMA(9) callback. Once * this is complete, switch the following code to use * BUS_DMA(9) for storvsc bounce buffer support. */ /* check if we need to create bounce buffer */ ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist, storvsc_sg_count, ¬_aligned_seg_bits); if (ret != -1) { reqp->bounce_sgl = storvsc_create_bounce_buffer(storvsc_sg_count, reqp->vstor_packet.u.vm_srb.data_in); if (NULL == reqp->bounce_sgl) { printf("Storvsc_error: " "create bounce buffer failed.\n"); return (ENOMEM); } reqp->bounce_sgl_count = storvsc_sg_count; reqp->not_aligned_seg_bits = not_aligned_seg_bits; /* * if it is write, we need copy the original data *to bounce buffer */ if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_sgl_to_bounce_buf( reqp->bounce_sgl, storvsc_sglist, storvsc_sg_count, reqp->not_aligned_seg_bits); } /* transfer virtual address to physical frame number */ if (reqp->not_aligned_seg_bits & 0x1){ phys_addr = vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr); }else{ phys_addr = vtophys(storvsc_sglist[0].ds_addr); } prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK; pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[0] = pfn; for (i = 1; i < storvsc_sg_count; i++) { if (reqp->not_aligned_seg_bits & (1 << i)) { phys_addr = vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr); } else { phys_addr = vtophys(storvsc_sglist[i].ds_addr); } pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; } reqp->prp_cnt = i; } else { phys_addr = vtophys(storvsc_sglist[0].ds_addr); prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK; for (i = 0; i < storvsc_sg_count; i++) { phys_addr = vtophys(storvsc_sglist[i].ds_addr); pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; } reqp->prp_cnt = i; /* check the last segment cross boundary or not */ offset = phys_addr & PAGE_MASK; if (offset) { /* Add one more PRP entry */ phys_addr = vtophys(storvsc_sglist[i-1].ds_addr + PAGE_SIZE - offset); pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; reqp->prp_cnt++; } reqp->bounce_sgl_count = 0; } reqp->softc->sysctl_data.data_sg_cnt++; break; } default: printf("Unknow flags: %d\n", ccb->ccb_h.flags); return(EINVAL); } return(0); } /* * SCSI Inquiry checks qualifier and type. * If qualifier is 011b, means the device server is not capable * of supporting a peripheral device on this logical unit, and * the type should be set to 1Fh. * * Return 1 if it is valid, 0 otherwise. */ static inline int is_inquiry_valid(const struct scsi_inquiry_data *inq_data) { uint8_t type; if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) { return (0); } type = SID_TYPE(inq_data); if (type == T_NODEVICE) { return (0); } return (1); } /** * @brief completion function before returning to CAM * * I/O process has been completed and the result needs * to be passed to the CAM layer. * Free resources related to this request. * * @param reqp pointer to a request structure */ static void storvsc_io_done(struct hv_storvsc_request *reqp) { union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_softc *sc = reqp->softc; struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb; bus_dma_segment_t *ori_sglist = NULL; int ori_sg_count = 0; /* destroy bounce buffer if it is used */ if (reqp->bounce_sgl_count) { ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; ori_sg_count = ccb->csio.sglist_cnt; /* * If it is READ operation, we should copy back the data * to original SG list. */ if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_from_bounce_buf_to_sgl(ori_sglist, ori_sg_count, reqp->bounce_sgl, reqp->not_aligned_seg_bits); } storvsc_destroy_bounce_buffer(reqp->bounce_sgl); reqp->bounce_sgl_count = 0; } if (reqp->retries > 0) { mtx_lock(&sc->hs_lock); #if HVS_TIMEOUT_TEST xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "waking up timer handler if any.\n", ticks); mtx_lock(&reqp->event.mtx); cv_signal(&reqp->event.cv); mtx_unlock(&reqp->event.mtx); #endif reqp->retries = 0; xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "stopping timer if any.\n", ticks); mtx_unlock(&sc->hs_lock); } #ifdef notyet /* * callout_drain() will wait for the timer handler to finish * if it is running. So we don't need any lock to synchronize * between this routine and the timer handler. * Note that we need to make sure reqp is not freed when timer * handler is using or will use it. */ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_drain(&reqp->callout); } #endif ccb->ccb_h.status &= ~CAM_SIM_QUEUED; ccb->ccb_h.status &= ~CAM_STATUS_MASK; if (vm_srb->scsi_status == SCSI_STATUS_OK) { const struct scsi_generic *cmd; /* * Check whether the data for INQUIRY cmd is valid or * not. Windows 10 and Windows 2016 send all zero * inquiry data to VM even for unpopulated slots. */ cmd = (const struct scsi_generic *) ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes); if (cmd->opcode == INQUIRY) { /* * The host of Windows 10 or 2016 server will response * the inquiry request with invalid data for unexisted device: [0x7f 0x0 0x5 0x2 0x1f ... ] * But on windows 2012 R2, the response is: [0x7f 0x0 0x0 0x0 0x0 ] * That is why here wants to validate the inquiry response. * The validation will skip the INQUIRY whose response is short, * which is less than SHORT_INQUIRY_LENGTH (36). * * For more information about INQUIRY, please refer to: * ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf */ const struct scsi_inquiry_data *inq_data = (const struct scsi_inquiry_data *)csio->data_ptr; uint8_t* resp_buf = (uint8_t*)csio->data_ptr; /* Get the buffer length reported by host */ int resp_xfer_len = vm_srb->transfer_len; /* Get the available buffer length */ int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0; int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len; if (data_len < SHORT_INQUIRY_LENGTH) { ccb->ccb_h.status |= CAM_REQ_CMP; if (bootverbose && data_len >= 5) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc skips the validation for short inquiry (%d)" " [%x %x %x %x %x]\n", data_len,resp_buf[0],resp_buf[1],resp_buf[2], resp_buf[3],resp_buf[4]); mtx_unlock(&sc->hs_lock); } } else if (is_inquiry_valid(inq_data) == 0) { ccb->ccb_h.status |= CAM_DEV_NOT_THERE; if (bootverbose && data_len >= 5) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc uninstalled invalid device" " [%x %x %x %x %x]\n", resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]); mtx_unlock(&sc->hs_lock); } } else { ccb->ccb_h.status |= CAM_REQ_CMP; if (bootverbose) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc has passed inquiry response (%d) validation\n", data_len); mtx_unlock(&sc->hs_lock); } } } else { ccb->ccb_h.status |= CAM_REQ_CMP; } } else { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc scsi_status = %d\n", vm_srb->scsi_status); mtx_unlock(&sc->hs_lock); ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF); ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len; if (reqp->sense_info_len != 0) { csio->sense_resid = csio->sense_len - reqp->sense_info_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } mtx_lock(&sc->hs_lock); if (reqp->softc->hs_frozen == 1) { xpt_print(ccb->ccb_h.path, "%u: storvsc unfreezing softc 0x%p.\n", ticks, reqp->softc); ccb->ccb_h.status |= CAM_RELEASE_SIMQ; reqp->softc->hs_frozen = 0; } storvsc_free_request(sc, reqp); mtx_unlock(&sc->hs_lock); xpt_done_direct(ccb); } /** * @brief Free a request structure * * Free a request structure by returning it to the free list * * @param sc pointer to a softc * @param reqp pointer to a request structure */ static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp) { LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /** * @brief Determine type of storage device from GUID * * Using the type GUID, determine if this is a StorVSC (paravirtual * SCSI or BlkVSC (paravirtual IDE) device. * * @param dev a device * returns an enum */ static enum hv_storage_type storvsc_get_storage_type(device_t dev) { device_t parent = device_get_parent(dev); if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0) return DRIVER_BLKVSC; if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0) return DRIVER_STORVSC; return DRIVER_UNKNOWN; } Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_brvar.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_brvar.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_brvar.h (revision 303642) @@ -1,93 +1,100 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMBUS_BRVAR_H_ #define _VMBUS_BRVAR_H_ #include #include #include #include struct vmbus_br { struct vmbus_bufring *vbr; uint32_t vbr_dsize; /* total data size */ }; #define vbr_windex vbr->br_windex #define vbr_rindex vbr->br_rindex #define vbr_imask vbr->br_imask #define vbr_data vbr->br_data struct vmbus_rxbr { struct mtx rxbr_lock; struct vmbus_br rxbr; }; #define rxbr_windex rxbr.vbr_windex #define rxbr_rindex rxbr.vbr_rindex #define rxbr_imask rxbr.vbr_imask #define rxbr_data rxbr.vbr_data #define rxbr_dsize rxbr.vbr_dsize struct vmbus_txbr { struct mtx txbr_lock; struct vmbus_br txbr; }; #define txbr_windex txbr.vbr_windex #define txbr_rindex txbr.vbr_rindex #define txbr_imask txbr.vbr_imask #define txbr_data txbr.vbr_data #define txbr_dsize txbr.vbr_dsize struct sysctl_ctx_list; struct sysctl_oid; +static __inline int +vmbus_txbr_maxpktsz(const struct vmbus_txbr *tbr) +{ + /* 1/2 data size */ + return (tbr->txbr_dsize / 2); +} + void vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree, struct vmbus_br *br, const char *name); void vmbus_rxbr_init(struct vmbus_rxbr *rbr); void vmbus_rxbr_deinit(struct vmbus_rxbr *rbr); void vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen); int vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen); int vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip); void vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr); uint32_t vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr); void vmbus_txbr_init(struct vmbus_txbr *tbr); void vmbus_txbr_deinit(struct vmbus_txbr *tbr); void vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen); int vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen, boolean_t *need_sig); #endif /* _VMBUS_BRVAR_H_ */ Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_chan.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_chan.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_chan.c (revision 303642) @@ -1,1413 +1,1413 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void vmbus_chan_update_evtflagcnt( struct vmbus_softc *, const struct vmbus_channel *); static void vmbus_chan_close_internal( struct vmbus_channel *); static int vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS); static void vmbus_chan_sysctl_create( struct vmbus_channel *); static struct vmbus_channel *vmbus_chan_alloc(struct vmbus_softc *); static void vmbus_chan_free(struct vmbus_channel *); static int vmbus_chan_add(struct vmbus_channel *); static void vmbus_chan_cpu_default(struct vmbus_channel *); static void vmbus_chan_task(void *, int); static void vmbus_chan_task_nobatch(void *, int); static void vmbus_chan_detach_task(void *, int); static void vmbus_chan_msgproc_choffer(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_chan_msgproc_chrescind( struct vmbus_softc *, const struct vmbus_message *); /* * Vmbus channel message processing. */ static const vmbus_chanmsg_proc_t vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = { VMBUS_CHANMSG_PROC(CHOFFER, vmbus_chan_msgproc_choffer), VMBUS_CHANMSG_PROC(CHRESCIND, vmbus_chan_msgproc_chrescind), VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP) }; /* * Notify host that there are data pending on our TX bufring. */ static __inline void vmbus_chan_signal_tx(const struct vmbus_channel *chan) { atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask); if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF) atomic_set_int(chan->ch_montrig, chan->ch_montrig_mask); else hypercall_signal_event(chan->ch_monprm_dma.hv_paddr); } static int vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS) { struct vmbus_channel *chan = arg1; int mnf = 0; if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF) mnf = 1; return sysctl_handle_int(oidp, &mnf, 0, req); } static void vmbus_chan_sysctl_create(struct vmbus_channel *chan) { struct sysctl_oid *ch_tree, *chid_tree, *br_tree; struct sysctl_ctx_list *ctx; uint32_t ch_id; char name[16]; /* * Add sysctl nodes related to this channel to this * channel's sysctl ctx, so that they can be destroyed * independently upon close of this channel, which can * happen even if the device is not detached. */ ctx = &chan->ch_sysctl_ctx; sysctl_ctx_init(ctx); /* * Create dev.NAME.UNIT.channel tree. */ ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)), OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; /* * Create dev.NAME.UNIT.channel.CHANID tree. */ if (VMBUS_CHAN_ISPRIMARY(chan)) ch_id = chan->ch_id; else ch_id = chan->ch_prichan->ch_id; snprintf(name, sizeof(name), "%d", ch_id); chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * Create dev.NAME.UNIT.channel.CHANID.sub tree. */ ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; /* * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree. * * NOTE: * chid_tree is changed to this new sysctl tree. */ snprintf(name, sizeof(name), "%d", chan->ch_subidx); chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id"); } SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, chan, 0, vmbus_chan_sysctl_mnf, "I", "has monitor notification facilities"); br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "br", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (br_tree != NULL) { /* * Create sysctl tree for RX bufring. */ vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_rxbr.rxbr, "rx"); /* * Create sysctl tree for TX bufring. */ vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_txbr.txbr, "tx"); } } int vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size, const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg) { struct vmbus_softc *sc = chan->ch_vmbus; const struct vmbus_chanmsg_chopen_resp *resp; const struct vmbus_message *msg; struct vmbus_chanmsg_chopen *req; struct vmbus_msghc *mh; uint32_t status; int error; uint8_t *br; if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) { device_printf(sc->vmbus_dev, "invalid udata len %d for chan%u\n", udlen, chan->ch_id); return EINVAL; } KASSERT((txbr_size & PAGE_MASK) == 0, ("send bufring size is not multiple page")); KASSERT((rxbr_size & PAGE_MASK) == 0, ("recv bufring size is not multiple page")); if (atomic_testandset_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED_SHIFT)) panic("double-open chan%u", chan->ch_id); chan->ch_cb = cb; chan->ch_cbarg = cbarg; vmbus_chan_update_evtflagcnt(sc, chan); chan->ch_tq = VMBUS_PCPU_GET(chan->ch_vmbus, event_tq, chan->ch_cpuid); if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) TASK_INIT(&chan->ch_task, 0, vmbus_chan_task, chan); else TASK_INIT(&chan->ch_task, 0, vmbus_chan_task_nobatch, chan); /* * Allocate the TX+RX bufrings. * XXX should use ch_dev dtag */ br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (br == NULL) { device_printf(sc->vmbus_dev, "bufring allocation failed\n"); error = ENOMEM; goto failed; } chan->ch_bufring = br; /* TX bufring comes first */ vmbus_txbr_setup(&chan->ch_txbr, br, txbr_size); /* RX bufring immediately follows TX bufring */ vmbus_rxbr_setup(&chan->ch_rxbr, br + txbr_size, rxbr_size); /* Create sysctl tree for this channel */ vmbus_chan_sysctl_create(chan); /* * Connect the bufrings, both RX and TX, to this channel. */ error = vmbus_chan_gpadl_connect(chan, chan->ch_bufring_dma.hv_paddr, txbr_size + rxbr_size, &chan->ch_bufring_gpadl); if (error) { device_printf(sc->vmbus_dev, "failed to connect bufring GPADL to chan%u\n", chan->ch_id); goto failed; } /* * Open channel w/ the bufring GPADL on the target CPU. */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chopen(chan%u)\n", chan->ch_id); error = ENXIO; goto failed; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN; req->chm_chanid = chan->ch_id; req->chm_openid = chan->ch_id; req->chm_gpadl = chan->ch_bufring_gpadl; req->chm_vcpuid = chan->ch_vcpuid; req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT; if (udlen > 0) memcpy(req->chm_udata, udata, udlen); error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chopen(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); goto failed; } msg = vmbus_msghc_wait_result(sc, mh); resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data; status = resp->chm_status; vmbus_msghc_put(sc, mh); if (status == 0) { if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u opened\n", chan->ch_id); } return 0; } device_printf(sc->vmbus_dev, "failed to open chan%u\n", chan->ch_id); error = ENXIO; failed: if (chan->ch_bufring_gpadl) { vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl); chan->ch_bufring_gpadl = 0; } if (chan->ch_bufring != NULL) { hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring); chan->ch_bufring = NULL; } atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED); return error; } int vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr, int size, uint32_t *gpadl0) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_conn *req; const struct vmbus_message *msg; size_t reqsz; uint32_t gpadl, status; int page_count, range_len, i, cnt, error; uint64_t page_id; /* * Preliminary checks. */ KASSERT((size & PAGE_MASK) == 0, ("invalid GPA size %d, not multiple page size", size)); page_count = size >> PAGE_SHIFT; KASSERT((paddr & PAGE_MASK) == 0, ("GPA is not page aligned %jx", (uintmax_t)paddr)); page_id = paddr >> PAGE_SHIFT; range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]); /* * We don't support multiple GPA ranges. */ if (range_len > UINT16_MAX) { device_printf(sc->vmbus_dev, "GPA too large, %d pages\n", page_count); return EOPNOTSUPP; } /* * Allocate GPADL id. */ gpadl = vmbus_gpadl_alloc(sc); *gpadl0 = gpadl; /* * Connect this GPADL to the target channel. * * NOTE: * Since each message can only hold small set of page * addresses, several messages may be required to * complete the connection. */ if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn, chm_range.gpa_page[cnt]); mh = vmbus_msghc_get(sc, reqsz); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for gpadl->chan%u\n", chan->ch_id); return EIO; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; req->chm_range_len = range_len; req->chm_range_cnt = 1; req->chm_range.gpa_len = size; req->chm_range.gpa_ofs = 0; for (i = 0; i < cnt; ++i) req->chm_range.gpa_page[i] = page_id++; error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "gpadl->chan%u msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); return error; } while (page_count > 0) { struct vmbus_chanmsg_gpadl_subconn *subreq; if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn, chm_gpa_page[cnt]); vmbus_msghc_reset(mh, reqsz); subreq = vmbus_msghc_dataptr(mh); subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN; subreq->chm_gpadl = gpadl; for (i = 0; i < cnt; ++i) subreq->chm_gpa_page[i] = page_id++; vmbus_msghc_exec_noresult(mh); } KASSERT(page_count == 0, ("invalid page count %d", page_count)); msg = vmbus_msghc_wait_result(sc, mh); status = ((const struct vmbus_chanmsg_gpadl_connresp *) msg->msg_data)->chm_status; vmbus_msghc_put(sc, mh); if (status != 0) { device_printf(sc->vmbus_dev, "gpadl->chan%u failed: " "status %u\n", chan->ch_id, status); return EIO; } else { if (bootverbose) { device_printf(sc->vmbus_dev, "gpadl->chan%u " "succeeded\n", chan->ch_id); } } return 0; } /* * Disconnect the GPA from the target channel */ int vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_disconn *req; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for gpa x->chan%u\n", chan->ch_id); return EBUSY; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "gpa x->chan%u msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); return error; } vmbus_msghc_wait_result(sc, mh); /* Discard result; no useful information */ vmbus_msghc_put(sc, mh); return 0; } static void vmbus_chan_close_internal(struct vmbus_channel *chan) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_chclose *req; struct taskqueue *tq = chan->ch_tq; int error; /* TODO: stringent check */ atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED); /* * Free this channel's sysctl tree attached to its device's * sysctl tree. */ sysctl_ctx_free(&chan->ch_sysctl_ctx); /* * Set ch_tq to NULL to avoid more requests be scheduled. * XXX pretty broken; need rework. */ chan->ch_tq = NULL; taskqueue_drain(tq, &chan->ch_task); chan->ch_cb = NULL; /* * Close this channel. */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chclose(chan%u)\n", chan->ch_id); return; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chclose(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); return; } else if (bootverbose) { device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id); } /* * Disconnect the TX+RX bufrings from this channel. */ if (chan->ch_bufring_gpadl) { vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl); chan->ch_bufring_gpadl = 0; } /* * Destroy the TX+RX bufrings. */ if (chan->ch_bufring != NULL) { hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring); chan->ch_bufring = NULL; } } /* * Caller should make sure that all sub-channels have * been added to 'chan' and all to-be-closed channels * are not being opened. */ void vmbus_chan_close(struct vmbus_channel *chan) { int subchan_cnt; if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * Sub-channel is closed when its primary channel * is closed; done. */ return; } /* * Close all sub-channels, if any. */ subchan_cnt = chan->ch_subchan_cnt; if (subchan_cnt > 0) { struct vmbus_channel **subchan; int i; subchan = vmbus_subchan_get(chan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) vmbus_chan_close_internal(subchan[i]); vmbus_subchan_rel(subchan, subchan_cnt); } /* Then close the primary channel. */ vmbus_chan_close_internal(chan); } int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt pkt; int pktlen, pad_pktlen, hlen, error; uint64_t pad = 0; struct iovec iov[3]; boolean_t send_evt; hlen = sizeof(pkt); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); + KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr), + ("invalid packet size %d", pad_pktlen)); pkt.cp_hdr.cph_type = type; pkt.cp_hdr.cph_flags = flags; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; iov[0].iov_base = &pkt; iov[0].iov_len = hlen; iov[1].iov_base = data; iov[1].iov_len = dlen; iov[2].iov_base = &pad; iov[2].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 3, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_send_sglist(struct vmbus_channel *chan, struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_sglist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; - KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX, - ("invalid sglist len %d", sglen)); - hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); + KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr), + ("invalid packet size %d", pad_pktlen)); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_gpa_cnt = sglen; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = sg; iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen; iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_send_prplist(struct vmbus_channel *chan, struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_prplist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; - KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX, - ("invalid prplist entry count %d", prp_cnt)); - hlen = __offsetof(struct vmbus_chanpkt_prplist, cp_range[0].gpa_page[prp_cnt]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); + KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr), + ("invalid packet size %d", pad_pktlen)); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_range_cnt = 1; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = prp; iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]); iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen0, uint64_t *xactid) { struct vmbus_chanpkt_hdr pkt; int error, dlen, hlen; error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt)); if (error) return error; hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen); dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen; if (*dlen0 < dlen) { /* Return the size of this packet's data. */ *dlen0 = dlen; return ENOBUFS; } *xactid = pkt.cph_xactid; *dlen0 = dlen; /* Skip packet header */ error = vmbus_rxbr_read(&chan->ch_rxbr, data, dlen, hlen); KASSERT(!error, ("vmbus_rxbr_read failed")); return 0; } int vmbus_chan_recv_pkt(struct vmbus_channel *chan, struct vmbus_chanpkt_hdr *pkt0, int *pktlen0) { struct vmbus_chanpkt_hdr pkt; int error, pktlen; error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt)); if (error) return error; pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen); if (*pktlen0 < pktlen) { /* Return the size of this packet. */ *pktlen0 = pktlen; return ENOBUFS; } *pktlen0 = pktlen; /* Include packet header */ error = vmbus_rxbr_read(&chan->ch_rxbr, pkt0, pktlen, 0); KASSERT(!error, ("vmbus_rxbr_read failed")); return 0; } static void vmbus_chan_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; vmbus_chan_callback_t cb = chan->ch_cb; void *cbarg = chan->ch_cbarg; /* * Optimize host to guest signaling by ensuring: * 1. While reading the channel, we disable interrupts from * host. * 2. Ensure that we process all posted messages from the host * before returning from this callback. * 3. Once we return, enable signaling from the host. Once this * state is set we check to see if additional packets are * available to read. In this case we repeat the process. * * NOTE: Interrupt has been disabled in the ISR. */ for (;;) { uint32_t left; cb(chan, cbarg); left = vmbus_rxbr_intr_unmask(&chan->ch_rxbr); if (left == 0) { /* No more data in RX bufring; done */ break; } vmbus_rxbr_intr_mask(&chan->ch_rxbr); } } static void vmbus_chan_task_nobatch(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; chan->ch_cb(chan, chan->ch_cbarg); } static __inline void vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags, int flag_cnt) { int f; for (f = 0; f < flag_cnt; ++f) { uint32_t chid_base; u_long flags; int chid_ofs; if (event_flags[f] == 0) continue; flags = atomic_swap_long(&event_flags[f], 0); chid_base = f << VMBUS_EVTFLAG_SHIFT; while ((chid_ofs = ffsl(flags)) != 0) { struct vmbus_channel *chan; --chid_ofs; /* NOTE: ffsl is 1-based */ flags &= ~(1UL << chid_ofs); chan = sc->vmbus_chmap[chid_base + chid_ofs]; /* if channel is closed or closing */ if (chan == NULL || chan->ch_tq == NULL) continue; if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) vmbus_rxbr_intr_mask(&chan->ch_rxbr); taskqueue_enqueue(chan->ch_tq, &chan->ch_task); } } } void vmbus_event_proc(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; /* * On Host with Win8 or above, the event page can be checked directly * to get the id of the channel that has the pending interrupt. */ eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; vmbus_event_flags_proc(sc, eventf->evt_flags, VMBUS_PCPU_GET(sc, event_flags_cnt, cpu)); } void vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) { vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags, VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT); } } static void vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc, const struct vmbus_channel *chan) { volatile int *flag_cnt_ptr; int flag_cnt; flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1; flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid); for (;;) { int old_flag_cnt; old_flag_cnt = *flag_cnt_ptr; if (old_flag_cnt >= flag_cnt) break; if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) { if (bootverbose) { device_printf(sc->vmbus_dev, "channel%u update cpu%d flag_cnt to %d\n", chan->ch_id, chan->ch_cpuid, flag_cnt); } break; } } } static struct vmbus_channel * vmbus_chan_alloc(struct vmbus_softc *sc) { struct vmbus_channel *chan; chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO); chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param), &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (chan->ch_monprm == NULL) { device_printf(sc->vmbus_dev, "monprm alloc failed\n"); free(chan, M_DEVBUF); return NULL; } chan->ch_vmbus = sc; mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF); TAILQ_INIT(&chan->ch_subchans); TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan); vmbus_rxbr_init(&chan->ch_rxbr); vmbus_txbr_init(&chan->ch_txbr); return chan; } static void vmbus_chan_free(struct vmbus_channel *chan) { /* TODO: assert sub-channel list is empty */ /* TODO: asset no longer on the primary channel's sub-channel list */ /* TODO: asset no longer on the vmbus channel list */ hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm); mtx_destroy(&chan->ch_subchan_lock); vmbus_rxbr_deinit(&chan->ch_rxbr); vmbus_txbr_deinit(&chan->ch_txbr); free(chan, M_DEVBUF); } static int vmbus_chan_add(struct vmbus_channel *newchan) { struct vmbus_softc *sc = newchan->ch_vmbus; struct vmbus_channel *prichan; if (newchan->ch_id == 0) { /* * XXX * Chan0 will neither be processed nor should be offered; * skip it. */ device_printf(sc->vmbus_dev, "got chan0 offer, discard\n"); return EINVAL; } else if (newchan->ch_id >= VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid chan%u offer\n", newchan->ch_id); return EINVAL; } sc->vmbus_chmap[newchan->ch_id] = newchan; if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n", newchan->ch_id, newchan->ch_subidx); } mtx_lock(&sc->vmbus_prichan_lock); TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) { /* * Sub-channel will have the same type GUID and instance * GUID as its primary channel. */ if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type, sizeof(struct hyperv_guid)) == 0 && memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst, sizeof(struct hyperv_guid)) == 0) break; } if (VMBUS_CHAN_ISPRIMARY(newchan)) { if (prichan == NULL) { /* Install the new primary channel */ TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan, ch_prilink); mtx_unlock(&sc->vmbus_prichan_lock); return 0; } else { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "duplicated primary " "chan%u\n", newchan->ch_id); return EINVAL; } } else { /* Sub-channel */ if (prichan == NULL) { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "no primary chan for " "chan%u\n", newchan->ch_id); return EINVAL; } /* * Found the primary channel for this sub-channel and * move on. * * XXX refcnt prichan */ } mtx_unlock(&sc->vmbus_prichan_lock); /* * This is a sub-channel; link it with the primary channel. */ KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan), ("new channel is not sub-channel")); KASSERT(prichan != NULL, ("no primary channel")); newchan->ch_prichan = prichan; newchan->ch_dev = prichan->ch_dev; mtx_lock(&prichan->ch_subchan_lock); TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink); /* * Bump up sub-channel count and notify anyone that is * interested in this sub-channel, after this sub-channel * is setup. */ prichan->ch_subchan_cnt++; mtx_unlock(&prichan->ch_subchan_lock); wakeup(prichan); return 0; } void vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu) { KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu)); if (chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WS2008 || chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WIN7) { /* Only cpu0 is supported */ cpu = 0; } chan->ch_cpuid = cpu; chan->ch_vcpuid = VMBUS_PCPU_GET(chan->ch_vmbus, vcpuid, cpu); if (bootverbose) { printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n", chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid); } } void vmbus_chan_cpu_rr(struct vmbus_channel *chan) { static uint32_t vmbus_chan_nextcpu; int cpu; cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus; vmbus_chan_cpu_set(chan, cpu); } static void vmbus_chan_cpu_default(struct vmbus_channel *chan) { /* * By default, pin the channel to cpu0. Devices having * special channel-cpu mapping requirement should call * vmbus_chan_cpu_{set,rr}(). */ vmbus_chan_cpu_set(chan, 0); } static void vmbus_chan_msgproc_choffer(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_choffer *offer; struct vmbus_channel *chan; int error; offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data; chan = vmbus_chan_alloc(sc); if (chan == NULL) { device_printf(sc->vmbus_dev, "allocate chan%u failed\n", offer->chm_chanid); return; } chan->ch_id = offer->chm_chanid; chan->ch_subidx = offer->chm_subidx; chan->ch_guid_type = offer->chm_chtype; chan->ch_guid_inst = offer->chm_chinst; /* Batch reading is on by default */ chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT; if (sc->vmbus_version != VMBUS_VERSION_WS2008) chan->ch_monprm->mp_connid = offer->chm_connid; if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) { int trig_idx; /* * Setup MNF stuffs. */ chan->ch_txflags |= VMBUS_CHAN_TXF_HASMNF; trig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN; if (trig_idx >= VMBUS_MONTRIGS_MAX) panic("invalid monitor trigger %u", offer->chm_montrig); chan->ch_montrig = &sc->vmbus_mnf2->mnf_trigs[trig_idx].mt_pending; chan->ch_montrig_mask = 1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN); } /* * Setup event flag. */ chan->ch_evtflag = &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT]; chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK); /* Select default cpu for this channel. */ vmbus_chan_cpu_default(chan); error = vmbus_chan_add(chan); if (error) { device_printf(sc->vmbus_dev, "add chan%u failed: %d\n", chan->ch_id, error); vmbus_chan_free(chan); return; } if (VMBUS_CHAN_ISPRIMARY(chan)) { /* * Add device for this primary channel. * * NOTE: * Error is ignored here; don't have much to do if error * really happens. */ vmbus_add_child(chan); } } /* * XXX pretty broken; need rework. */ static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_chrescind *note; struct vmbus_channel *chan; note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data; if (note->chm_chanid > VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n", note->chm_chanid); return; } if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u rescinded\n", note->chm_chanid); } chan = sc->vmbus_chmap[note->chm_chanid]; if (chan == NULL) return; sc->vmbus_chmap[note->chm_chanid] = NULL; taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task); } static void vmbus_chan_detach_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; if (VMBUS_CHAN_ISPRIMARY(chan)) { /* Only primary channel owns the device */ vmbus_delete_child(chan); /* NOTE: DO NOT free primary channel for now */ } else { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_channel *pri_chan = chan->ch_prichan; struct vmbus_chanmsg_chfree *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chfree(chan%u)\n", chan->ch_id); goto remove; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chfree(chan%u) failed: %d", chan->ch_id, error); /* NOTE: Move on! */ } else { if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u freed\n", chan->ch_id); } } remove: mtx_lock(&pri_chan->ch_subchan_lock); TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink); KASSERT(pri_chan->ch_subchan_cnt > 0, ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt)); pri_chan->ch_subchan_cnt--; mtx_unlock(&pri_chan->ch_subchan_lock); wakeup(pri_chan); vmbus_chan_free(chan); } } /* * Detach all devices and destroy the corresponding primary channels. */ void vmbus_chan_destroy_all(struct vmbus_softc *sc) { struct vmbus_channel *chan; mtx_lock(&sc->vmbus_prichan_lock); while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) { KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel")); TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink); mtx_unlock(&sc->vmbus_prichan_lock); vmbus_delete_child(chan); vmbus_chan_free(chan); mtx_lock(&sc->vmbus_prichan_lock); } bzero(sc->vmbus_chmap, sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX); mtx_unlock(&sc->vmbus_prichan_lock); } /* * The channel whose vcpu binding is closest to the currect vcpu will * be selected. * If no multi-channel, always select primary channel. */ struct vmbus_channel * vmbus_chan_cpu2chan(struct vmbus_channel *prichan, int cpu) { struct vmbus_channel *sel, *chan; uint32_t vcpu, sel_dist; KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpuid %d", cpu)); if (TAILQ_EMPTY(&prichan->ch_subchans)) return prichan; vcpu = VMBUS_PCPU_GET(prichan->ch_vmbus, vcpuid, cpu); #define CHAN_VCPU_DIST(ch, vcpu) \ (((ch)->ch_vcpuid > (vcpu)) ? \ ((ch)->ch_vcpuid - (vcpu)) : ((vcpu) - (ch)->ch_vcpuid)) #define CHAN_SELECT(ch) \ do { \ sel = ch; \ sel_dist = CHAN_VCPU_DIST(ch, vcpu); \ } while (0) CHAN_SELECT(prichan); mtx_lock(&prichan->ch_subchan_lock); TAILQ_FOREACH(chan, &prichan->ch_subchans, ch_sublink) { uint32_t dist; KASSERT(chan->ch_stflags & VMBUS_CHAN_ST_OPENED, ("chan%u is not opened", chan->ch_id)); if (chan->ch_vcpuid == vcpu) { /* Exact match; done */ CHAN_SELECT(chan); break; } dist = CHAN_VCPU_DIST(chan, vcpu); if (sel_dist <= dist) { /* Far or same distance; skip */ continue; } /* Select the closer channel. */ CHAN_SELECT(chan); } mtx_unlock(&prichan->ch_subchan_lock); #undef CHAN_SELECT #undef CHAN_VCPU_DIST return sel; } struct vmbus_channel ** vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt) { struct vmbus_channel **ret, *chan; int i; ret = malloc(subchan_cnt * sizeof(struct vmbus_channel *), M_TEMP, M_WAITOK); mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt < subchan_cnt) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0); i = 0; TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) { /* TODO: refcnt chan */ ret[i] = chan; ++i; if (i == subchan_cnt) break; } KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d", pri_chan->ch_subchan_cnt, subchan_cnt)); mtx_unlock(&pri_chan->ch_subchan_lock); return ret; } void vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt __unused) { free(subchan, M_TEMP); } void vmbus_subchan_drain(struct vmbus_channel *pri_chan) { mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt > 0) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0); mtx_unlock(&pri_chan->ch_subchan_lock); } void vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_chanmsg_proc_t msg_proc; uint32_t msg_type; msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type; KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX, ("invalid message type %u", msg_type)); msg_proc = vmbus_chan_msgprocs[msg_type]; if (msg_proc != NULL) msg_proc(sc, msg); } void vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on) { if (!on) chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD; else chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; } uint32_t vmbus_chan_id(const struct vmbus_channel *chan) { return chan->ch_id; } uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan) { return chan->ch_subidx; } bool vmbus_chan_is_primary(const struct vmbus_channel *chan) { if (VMBUS_CHAN_ISPRIMARY(chan)) return true; else return false; } const struct hyperv_guid * vmbus_chan_guid_inst(const struct vmbus_channel *chan) { return &chan->ch_guid_inst; } Index: user/alc/PQ_LAUNDRY/sys/dev/iwm/if_iwm.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/iwm/if_iwm.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/iwm/if_iwm.c (revision 303642) @@ -1,6189 +1,6189 @@ /* $OpenBSD: if_iwm.c,v 1.42 2015/05/30 02:49:23 deraadt Exp $ */ /* * Copyright (c) 2014 genua mbh * Copyright (c) 2014 Fixup Software Ltd. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*- * Based on BSD-licensed source modules in the Linux iwlwifi driver, * which were used as the reference documentation for this implementation. * * Driver version we are currently based off of is * Linux 3.14.3 (tag id a2df521e42b1d9a23f620ac79dbfe8655a8391dd) * *********************************************************************** * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2007 - 2013 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, * USA * * The full GNU General Public License is included in this distribution * in the file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * * * BSD LICENSE * * Copyright(c) 2005 - 2013 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2007-2010 Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const uint8_t iwm_nvm_channels[] = { /* 2.4 GHz */ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 5 GHz */ 36, 40, 44, 48, 52, 56, 60, 64, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 149, 153, 157, 161, 165 }; _Static_assert(nitems(iwm_nvm_channels) <= IWM_NUM_CHANNELS, "IWM_NUM_CHANNELS is too small"); const uint8_t iwm_nvm_channels_8000[] = { /* 2.4 GHz */ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 5 GHz */ 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 149, 153, 157, 161, 165, 169, 173, 177, 181 }; _Static_assert(nitems(iwm_nvm_channels_8000) <= IWM_NUM_CHANNELS_8000, "IWM_NUM_CHANNELS_8000 is too small"); #define IWM_NUM_2GHZ_CHANNELS 14 #define IWM_N_HW_ADDR_MASK 0xF /* * XXX For now, there's simply a fixed set of rate table entries * that are populated. */ const struct iwm_rate { uint8_t rate; uint8_t plcp; } iwm_rates[] = { { 2, IWM_RATE_1M_PLCP }, { 4, IWM_RATE_2M_PLCP }, { 11, IWM_RATE_5M_PLCP }, { 22, IWM_RATE_11M_PLCP }, { 12, IWM_RATE_6M_PLCP }, { 18, IWM_RATE_9M_PLCP }, { 24, IWM_RATE_12M_PLCP }, { 36, IWM_RATE_18M_PLCP }, { 48, IWM_RATE_24M_PLCP }, { 72, IWM_RATE_36M_PLCP }, { 96, IWM_RATE_48M_PLCP }, { 108, IWM_RATE_54M_PLCP }, }; #define IWM_RIDX_CCK 0 #define IWM_RIDX_OFDM 4 #define IWM_RIDX_MAX (nitems(iwm_rates)-1) #define IWM_RIDX_IS_CCK(_i_) ((_i_) < IWM_RIDX_OFDM) #define IWM_RIDX_IS_OFDM(_i_) ((_i_) >= IWM_RIDX_OFDM) struct iwm_nvm_section { uint16_t length; uint8_t *data; }; static int iwm_store_cscheme(struct iwm_softc *, const uint8_t *, size_t); static int iwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type, const uint8_t *, size_t); static int iwm_set_default_calib(struct iwm_softc *, const void *); static void iwm_fw_info_free(struct iwm_fw_info *); static int iwm_read_firmware(struct iwm_softc *, enum iwm_ucode_type); static void iwm_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int iwm_dma_contig_alloc(bus_dma_tag_t, struct iwm_dma_info *, bus_size_t, bus_size_t); static void iwm_dma_contig_free(struct iwm_dma_info *); static int iwm_alloc_fwmem(struct iwm_softc *); static void iwm_free_fwmem(struct iwm_softc *); static int iwm_alloc_sched(struct iwm_softc *); static void iwm_free_sched(struct iwm_softc *); static int iwm_alloc_kw(struct iwm_softc *); static void iwm_free_kw(struct iwm_softc *); static int iwm_alloc_ict(struct iwm_softc *); static void iwm_free_ict(struct iwm_softc *); static int iwm_alloc_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static void iwm_disable_rx_dma(struct iwm_softc *); static void iwm_reset_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static void iwm_free_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static int iwm_alloc_tx_ring(struct iwm_softc *, struct iwm_tx_ring *, int); static void iwm_reset_tx_ring(struct iwm_softc *, struct iwm_tx_ring *); static void iwm_free_tx_ring(struct iwm_softc *, struct iwm_tx_ring *); static void iwm_enable_interrupts(struct iwm_softc *); static void iwm_restore_interrupts(struct iwm_softc *); static void iwm_disable_interrupts(struct iwm_softc *); static void iwm_ict_reset(struct iwm_softc *); static int iwm_allow_mcast(struct ieee80211vap *, struct iwm_softc *); static void iwm_stop_device(struct iwm_softc *); static void iwm_mvm_nic_config(struct iwm_softc *); static int iwm_nic_rx_init(struct iwm_softc *); static int iwm_nic_tx_init(struct iwm_softc *); static int iwm_nic_init(struct iwm_softc *); static int iwm_enable_txq(struct iwm_softc *, int, int, int); static int iwm_post_alive(struct iwm_softc *); static int iwm_nvm_read_chunk(struct iwm_softc *, uint16_t, uint16_t, uint16_t, uint8_t *, uint16_t *); static int iwm_nvm_read_section(struct iwm_softc *, uint16_t, uint8_t *, uint16_t *, size_t); static uint32_t iwm_eeprom_channel_flags(uint16_t); static void iwm_add_channel_band(struct iwm_softc *, struct ieee80211_channel[], int, int *, int, size_t, const uint8_t[]); static void iwm_init_channel_map(struct ieee80211com *, int, int *, struct ieee80211_channel[]); static int iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *, const uint16_t *, const uint16_t *, const uint16_t *, const uint16_t *, const uint16_t *); static void iwm_set_hw_address_8000(struct iwm_softc *, struct iwm_nvm_data *, const uint16_t *, const uint16_t *); static int iwm_get_sku(const struct iwm_softc *, const uint16_t *, const uint16_t *); static int iwm_get_nvm_version(const struct iwm_softc *, const uint16_t *); static int iwm_get_radio_cfg(const struct iwm_softc *, const uint16_t *, const uint16_t *); static int iwm_get_n_hw_addrs(const struct iwm_softc *, const uint16_t *); static void iwm_set_radio_cfg(const struct iwm_softc *, struct iwm_nvm_data *, uint32_t); static int iwm_parse_nvm_sections(struct iwm_softc *, struct iwm_nvm_section *); static int iwm_nvm_init(struct iwm_softc *); static int iwm_firmware_load_sect(struct iwm_softc *, uint32_t, const uint8_t *, uint32_t); static int iwm_firmware_load_chunk(struct iwm_softc *, uint32_t, const uint8_t *, uint32_t); static int iwm_load_firmware_7000(struct iwm_softc *, enum iwm_ucode_type); static int iwm_load_cpu_sections_8000(struct iwm_softc *, struct iwm_fw_sects *, int , int *); static int iwm_load_firmware_8000(struct iwm_softc *, enum iwm_ucode_type); static int iwm_load_firmware(struct iwm_softc *, enum iwm_ucode_type); static int iwm_start_fw(struct iwm_softc *, enum iwm_ucode_type); static int iwm_send_tx_ant_cfg(struct iwm_softc *, uint8_t); static int iwm_send_phy_cfg_cmd(struct iwm_softc *); static int iwm_mvm_load_ucode_wait_alive(struct iwm_softc *, enum iwm_ucode_type); static int iwm_run_init_mvm_ucode(struct iwm_softc *, int); static int iwm_rx_addbuf(struct iwm_softc *, int, int); static int iwm_mvm_calc_rssi(struct iwm_softc *, struct iwm_rx_phy_info *); static int iwm_mvm_get_signal_strength(struct iwm_softc *, struct iwm_rx_phy_info *); static void iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static int iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *); static void iwm_mvm_rx_rx_mpdu(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static int iwm_mvm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_node *); static void iwm_mvm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static void iwm_cmd_done(struct iwm_softc *, struct iwm_rx_packet *); #if 0 static void iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t); #endif static const struct iwm_rate * iwm_tx_fill_cmd(struct iwm_softc *, struct iwm_node *, struct ieee80211_frame *, struct iwm_tx_cmd *); static int iwm_tx(struct iwm_softc *, struct mbuf *, struct ieee80211_node *, int); static int iwm_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static int iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *, struct iwm_mvm_add_sta_cmd_v7 *, int *); static int iwm_mvm_sta_send_to_fw(struct iwm_softc *, struct iwm_node *, int); static int iwm_mvm_add_sta(struct iwm_softc *, struct iwm_node *); static int iwm_mvm_update_sta(struct iwm_softc *, struct iwm_node *); static int iwm_mvm_add_int_sta_common(struct iwm_softc *, struct iwm_int_sta *, const uint8_t *, uint16_t, uint16_t); static int iwm_mvm_add_aux_sta(struct iwm_softc *); static int iwm_mvm_update_quotas(struct iwm_softc *, struct iwm_node *); static int iwm_auth(struct ieee80211vap *, struct iwm_softc *); static int iwm_assoc(struct ieee80211vap *, struct iwm_softc *); static int iwm_release(struct iwm_softc *, struct iwm_node *); static struct ieee80211_node * iwm_node_alloc(struct ieee80211vap *, const uint8_t[IEEE80211_ADDR_LEN]); static void iwm_setrates(struct iwm_softc *, struct iwm_node *); static int iwm_media_change(struct ifnet *); static int iwm_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void iwm_endscan_cb(void *, int); static void iwm_mvm_fill_sf_command(struct iwm_softc *, struct iwm_sf_cfg_cmd *, struct ieee80211_node *); static int iwm_mvm_sf_config(struct iwm_softc *, enum iwm_sf_state); static int iwm_send_bt_init_conf(struct iwm_softc *); static int iwm_send_update_mcc_cmd(struct iwm_softc *, const char *); static void iwm_mvm_tt_tx_backoff(struct iwm_softc *, uint32_t); static int iwm_init_hw(struct iwm_softc *); static void iwm_init(struct iwm_softc *); static void iwm_start(struct iwm_softc *); static void iwm_stop(struct iwm_softc *); static void iwm_watchdog(void *); static void iwm_parent(struct ieee80211com *); #ifdef IWM_DEBUG static const char * iwm_desc_lookup(uint32_t); static void iwm_nic_error(struct iwm_softc *); static void iwm_nic_umac_error(struct iwm_softc *); #endif static void iwm_notif_intr(struct iwm_softc *); static void iwm_intr(void *); static int iwm_attach(device_t); static int iwm_is_valid_ether_addr(uint8_t *); static void iwm_preinit(void *); static int iwm_detach_local(struct iwm_softc *sc, int); static void iwm_init_task(void *); static void iwm_radiotap_attach(struct iwm_softc *); static struct ieee80211vap * iwm_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void iwm_vap_delete(struct ieee80211vap *); static void iwm_scan_start(struct ieee80211com *); static void iwm_scan_end(struct ieee80211com *); static void iwm_update_mcast(struct ieee80211com *); static void iwm_set_channel(struct ieee80211com *); static void iwm_scan_curchan(struct ieee80211_scan_state *, unsigned long); static void iwm_scan_mindwell(struct ieee80211_scan_state *); static int iwm_detach(device_t); /* * Firmware parser. */ static int iwm_store_cscheme(struct iwm_softc *sc, const uint8_t *data, size_t dlen) { const struct iwm_fw_cscheme_list *l = (const void *)data; if (dlen < sizeof(*l) || dlen < sizeof(l->size) + l->size * sizeof(*l->cs)) return EINVAL; /* we don't actually store anything for now, always use s/w crypto */ return 0; } static int iwm_firmware_store_section(struct iwm_softc *sc, enum iwm_ucode_type type, const uint8_t *data, size_t dlen) { struct iwm_fw_sects *fws; struct iwm_fw_onesect *fwone; if (type >= IWM_UCODE_TYPE_MAX) return EINVAL; if (dlen < sizeof(uint32_t)) return EINVAL; fws = &sc->sc_fw.fw_sects[type]; if (fws->fw_count >= IWM_UCODE_SECT_MAX) return EINVAL; fwone = &fws->fw_sect[fws->fw_count]; /* first 32bit are device load offset */ memcpy(&fwone->fws_devoff, data, sizeof(uint32_t)); /* rest is data */ fwone->fws_data = data + sizeof(uint32_t); fwone->fws_len = dlen - sizeof(uint32_t); fws->fw_count++; fws->fw_totlen += fwone->fws_len; return 0; } /* iwlwifi: iwl-drv.c */ struct iwm_tlv_calib_data { uint32_t ucode_type; struct iwm_tlv_calib_ctrl calib; } __packed; static int iwm_set_default_calib(struct iwm_softc *sc, const void *data) { const struct iwm_tlv_calib_data *def_calib = data; uint32_t ucode_type = le32toh(def_calib->ucode_type); if (ucode_type >= IWM_UCODE_TYPE_MAX) { device_printf(sc->sc_dev, "Wrong ucode_type %u for default " "calibration.\n", ucode_type); return EINVAL; } sc->sc_default_calib[ucode_type].flow_trigger = def_calib->calib.flow_trigger; sc->sc_default_calib[ucode_type].event_trigger = def_calib->calib.event_trigger; return 0; } static void iwm_fw_info_free(struct iwm_fw_info *fw) { firmware_put(fw->fw_fp, FIRMWARE_UNLOAD); fw->fw_fp = NULL; /* don't touch fw->fw_status */ memset(fw->fw_sects, 0, sizeof(fw->fw_sects)); } static int iwm_read_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { struct iwm_fw_info *fw = &sc->sc_fw; const struct iwm_tlv_ucode_header *uhdr; struct iwm_ucode_tlv tlv; enum iwm_ucode_tlv_type tlv_type; const struct firmware *fwp; const uint8_t *data; int error = 0; size_t len; if (fw->fw_status == IWM_FW_STATUS_DONE && ucode_type != IWM_UCODE_TYPE_INIT) return 0; while (fw->fw_status == IWM_FW_STATUS_INPROGRESS) msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfwp", 0); fw->fw_status = IWM_FW_STATUS_INPROGRESS; if (fw->fw_fp != NULL) iwm_fw_info_free(fw); /* * Load firmware into driver memory. * fw_fp will be set. */ IWM_UNLOCK(sc); fwp = firmware_get(sc->sc_fwname); IWM_LOCK(sc); if (fwp == NULL) { device_printf(sc->sc_dev, "could not read firmware %s (error %d)\n", sc->sc_fwname, error); goto out; } fw->fw_fp = fwp; /* (Re-)Initialize default values. */ sc->sc_capaflags = 0; sc->sc_capa_n_scan_channels = IWM_MAX_NUM_SCAN_CHANNELS; memset(sc->sc_enabled_capa, 0, sizeof(sc->sc_enabled_capa)); memset(sc->sc_fw_mcc, 0, sizeof(sc->sc_fw_mcc)); /* * Parse firmware contents */ uhdr = (const void *)fw->fw_fp->data; if (*(const uint32_t *)fw->fw_fp->data != 0 || le32toh(uhdr->magic) != IWM_TLV_UCODE_MAGIC) { device_printf(sc->sc_dev, "invalid firmware %s\n", sc->sc_fwname); error = EINVAL; goto out; } snprintf(sc->sc_fwver, sizeof(sc->sc_fwver), "%d.%d (API ver %d)", IWM_UCODE_MAJOR(le32toh(uhdr->ver)), IWM_UCODE_MINOR(le32toh(uhdr->ver)), IWM_UCODE_API(le32toh(uhdr->ver))); data = uhdr->data; len = fw->fw_fp->datasize - sizeof(*uhdr); while (len >= sizeof(tlv)) { size_t tlv_len; const void *tlv_data; memcpy(&tlv, data, sizeof(tlv)); tlv_len = le32toh(tlv.length); tlv_type = le32toh(tlv.type); len -= sizeof(tlv); data += sizeof(tlv); tlv_data = data; if (len < tlv_len) { device_printf(sc->sc_dev, "firmware too short: %zu bytes\n", len); error = EINVAL; goto parse_out; } switch ((int)tlv_type) { case IWM_UCODE_TLV_PROBE_MAX_LEN: if (tlv_len < sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: PROBE_MAX_LEN (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } sc->sc_capa_max_probe_len = le32toh(*(const uint32_t *)tlv_data); /* limit it to something sensible */ if (sc->sc_capa_max_probe_len > IWM_SCAN_OFFLOAD_PROBE_REQ_SIZE) { IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV, "%s: IWM_UCODE_TLV_PROBE_MAX_LEN " "ridiculous\n", __func__); error = EINVAL; goto parse_out; } break; case IWM_UCODE_TLV_PAN: if (tlv_len) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_PAN: tlv_len (%d) > 0\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } sc->sc_capaflags |= IWM_UCODE_TLV_FLAGS_PAN; break; case IWM_UCODE_TLV_FLAGS: if (tlv_len < sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_FLAGS: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } /* * Apparently there can be many flags, but Linux driver * parses only the first one, and so do we. * * XXX: why does this override IWM_UCODE_TLV_PAN? * Intentional or a bug? Observations from * current firmware file: * 1) TLV_PAN is parsed first * 2) TLV_FLAGS contains TLV_FLAGS_PAN * ==> this resets TLV_PAN to itself... hnnnk */ sc->sc_capaflags = le32toh(*(const uint32_t *)tlv_data); break; case IWM_UCODE_TLV_CSCHEME: if ((error = iwm_store_cscheme(sc, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: iwm_store_cscheme(): returned %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_NUM_OF_CPU: { uint32_t num_cpu; if (tlv_len != sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_NUM_OF_CPU: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } num_cpu = le32toh(*(const uint32_t *)tlv_data); if (num_cpu < 1 || num_cpu > 2) { device_printf(sc->sc_dev, "%s: Driver supports only 1 or 2 CPUs\n", __func__); error = EINVAL; goto parse_out; } break; } case IWM_UCODE_TLV_SEC_RT: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_REGULAR, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_REGULAR: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_SEC_INIT: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_INIT, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_INIT: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_SEC_WOWLAN: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_WOW, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_WOW: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_DEF_CALIB: if (tlv_len != sizeof(struct iwm_tlv_calib_data)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_DEV_CALIB: tlv_len (%d) < sizeof(iwm_tlv_calib_data) (%d)\n", __func__, (int) tlv_len, (int) sizeof(struct iwm_tlv_calib_data)); error = EINVAL; goto parse_out; } if ((error = iwm_set_default_calib(sc, tlv_data)) != 0) { device_printf(sc->sc_dev, "%s: iwm_set_default_calib() failed: %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_PHY_SKU: if (tlv_len != sizeof(uint32_t)) { error = EINVAL; device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_PHY_SKU: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); goto parse_out; } sc->sc_fw_phy_config = le32toh(*(const uint32_t *)tlv_data); break; case IWM_UCODE_TLV_API_CHANGES_SET: { const struct iwm_ucode_api *api; if (tlv_len != sizeof(*api)) { error = EINVAL; goto parse_out; } api = (const struct iwm_ucode_api *)tlv_data; /* Flags may exceed 32 bits in future firmware. */ if (le32toh(api->api_index) > 0) { device_printf(sc->sc_dev, "unsupported API index %d\n", le32toh(api->api_index)); goto parse_out; } sc->sc_ucode_api = le32toh(api->api_flags); break; } case IWM_UCODE_TLV_ENABLED_CAPABILITIES: { const struct iwm_ucode_capa *capa; int idx, i; if (tlv_len != sizeof(*capa)) { error = EINVAL; goto parse_out; } capa = (const struct iwm_ucode_capa *)tlv_data; idx = le32toh(capa->api_index); if (idx > howmany(IWM_NUM_UCODE_TLV_CAPA, 32)) { device_printf(sc->sc_dev, "unsupported API index %d\n", idx); goto parse_out; } for (i = 0; i < 32; i++) { if ((le32toh(capa->api_capa) & (1U << i)) == 0) continue; setbit(sc->sc_enabled_capa, i + (32 * idx)); } break; } case 48: /* undocumented TLV */ case IWM_UCODE_TLV_SDIO_ADMA_ADDR: case IWM_UCODE_TLV_FW_GSCAN_CAPA: /* ignore, not used by current driver */ break; case IWM_UCODE_TLV_SEC_RT_USNIFFER: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_REGULAR_USNIFFER, tlv_data, tlv_len)) != 0) goto parse_out; break; case IWM_UCODE_TLV_N_SCAN_CHANNELS: if (tlv_len != sizeof(uint32_t)) { error = EINVAL; goto parse_out; } sc->sc_capa_n_scan_channels = le32toh(*(const uint32_t *)tlv_data); break; case IWM_UCODE_TLV_FW_VERSION: if (tlv_len != sizeof(uint32_t) * 3) { error = EINVAL; goto parse_out; } snprintf(sc->sc_fwver, sizeof(sc->sc_fwver), "%d.%d.%d", le32toh(((const uint32_t *)tlv_data)[0]), le32toh(((const uint32_t *)tlv_data)[1]), le32toh(((const uint32_t *)tlv_data)[2])); break; default: device_printf(sc->sc_dev, "%s: unknown firmware section %d, abort\n", __func__, tlv_type); error = EINVAL; goto parse_out; } len -= roundup(tlv_len, 4); data += roundup(tlv_len, 4); } KASSERT(error == 0, ("unhandled error")); parse_out: if (error) { device_printf(sc->sc_dev, "firmware parse error %d, " "section type %d\n", error, tlv_type); } if (!(sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_PM_CMD_SUPPORT)) { device_printf(sc->sc_dev, "device uses unsupported power ops\n"); error = ENOTSUP; } out: if (error) { fw->fw_status = IWM_FW_STATUS_NONE; if (fw->fw_fp != NULL) iwm_fw_info_free(fw); } else fw->fw_status = IWM_FW_STATUS_DONE; wakeup(&sc->sc_fw); return error; } /* * DMA resource routines */ static void iwm_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs)); *(bus_addr_t *)arg = segs[0].ds_addr; } static int iwm_dma_contig_alloc(bus_dma_tag_t tag, struct iwm_dma_info *dma, bus_size_t size, bus_size_t alignment) { int error; dma->tag = NULL; dma->map = NULL; dma->size = size; dma->vaddr = NULL; error = bus_dma_tag_create(tag, alignment, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &dma->tag); if (error != 0) goto fail; error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &dma->map); if (error != 0) goto fail; error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, iwm_dma_map_addr, &dma->paddr, BUS_DMA_NOWAIT); if (error != 0) { bus_dmamem_free(dma->tag, dma->vaddr, dma->map); dma->vaddr = NULL; goto fail; } bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); return 0; fail: iwm_dma_contig_free(dma); return error; } static void iwm_dma_contig_free(struct iwm_dma_info *dma) { if (dma->vaddr != NULL) { bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); dma->vaddr = NULL; } if (dma->tag != NULL) { bus_dma_tag_destroy(dma->tag); dma->tag = NULL; } } /* fwmem is used to load firmware onto the card */ static int iwm_alloc_fwmem(struct iwm_softc *sc) { /* Must be aligned on a 16-byte boundary. */ return iwm_dma_contig_alloc(sc->sc_dmat, &sc->fw_dma, sc->sc_fwdmasegsz, 16); } static void iwm_free_fwmem(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->fw_dma); } /* tx scheduler rings. not used? */ static int iwm_alloc_sched(struct iwm_softc *sc) { /* TX scheduler rings must be aligned on a 1KB boundary. */ return iwm_dma_contig_alloc(sc->sc_dmat, &sc->sched_dma, nitems(sc->txq) * sizeof(struct iwm_agn_scd_bc_tbl), 1024); } static void iwm_free_sched(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->sched_dma); } /* keep-warm page is used internally by the card. see iwl-fh.h for more info */ static int iwm_alloc_kw(struct iwm_softc *sc) { return iwm_dma_contig_alloc(sc->sc_dmat, &sc->kw_dma, 4096, 4096); } static void iwm_free_kw(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->kw_dma); } /* interrupt cause table */ static int iwm_alloc_ict(struct iwm_softc *sc) { return iwm_dma_contig_alloc(sc->sc_dmat, &sc->ict_dma, IWM_ICT_SIZE, 1<ict_dma); } static int iwm_alloc_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { bus_size_t size; int i, error; ring->cur = 0; /* Allocate RX descriptors (256-byte aligned). */ size = IWM_RX_RING_COUNT * sizeof(uint32_t); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256); if (error != 0) { device_printf(sc->sc_dev, "could not allocate RX ring DMA memory\n"); goto fail; } ring->desc = ring->desc_dma.vaddr; /* Allocate RX status area (16-byte aligned). */ error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->stat_dma, sizeof(*ring->stat), 16); if (error != 0) { device_printf(sc->sc_dev, "could not allocate RX status DMA memory\n"); goto fail; } ring->stat = ring->stat_dma.vaddr; /* Create RX buffer DMA tag. */ error = bus_dma_tag_create(sc->sc_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, IWM_RBUF_SIZE, 1, IWM_RBUF_SIZE, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA tag, error %d\n", __func__, error); goto fail; } /* Allocate spare bus_dmamap_t for iwm_rx_addbuf() */ error = bus_dmamap_create(ring->data_dmat, 0, &ring->spare_map); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA map, error %d\n", __func__, error); goto fail; } /* * Allocate and map RX buffers. */ for (i = 0; i < IWM_RX_RING_COUNT; i++) { struct iwm_rx_data *data = &ring->data[i]; error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA map, error %d\n", __func__, error); goto fail; } data->m = NULL; if ((error = iwm_rx_addbuf(sc, IWM_RBUF_SIZE, i)) != 0) { goto fail; } } return 0; fail: iwm_free_rx_ring(sc, ring); return error; } static void iwm_disable_rx_dma(struct iwm_softc *sc) { /* XXX conditional nic locks are stupid */ /* XXX print out if we can't lock the NIC? */ if (iwm_nic_lock(sc)) { /* XXX handle if RX stop doesn't finish? */ (void) iwm_pcie_rx_stop(sc); iwm_nic_unlock(sc); } } static void iwm_reset_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { /* Reset the ring state */ ring->cur = 0; /* * The hw rx ring index in shared memory must also be cleared, * otherwise the discrepancy can cause reprocessing chaos. */ memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat)); } static void iwm_free_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { int i; iwm_dma_contig_free(&ring->desc_dma); iwm_dma_contig_free(&ring->stat_dma); for (i = 0; i < IWM_RX_RING_COUNT; i++) { struct iwm_rx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->map != NULL) { bus_dmamap_destroy(ring->data_dmat, data->map); data->map = NULL; } } if (ring->spare_map != NULL) { bus_dmamap_destroy(ring->data_dmat, ring->spare_map); ring->spare_map = NULL; } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } static int iwm_alloc_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring, int qid) { bus_addr_t paddr; bus_size_t size; size_t maxsize; int nsegments; int i, error; ring->qid = qid; ring->queued = 0; ring->cur = 0; /* Allocate TX descriptors (256-byte aligned). */ size = IWM_TX_RING_COUNT * sizeof (struct iwm_tfd); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256); if (error != 0) { device_printf(sc->sc_dev, "could not allocate TX ring DMA memory\n"); goto fail; } ring->desc = ring->desc_dma.vaddr; /* * We only use rings 0 through 9 (4 EDCA + cmd) so there is no need * to allocate commands space for other rings. */ if (qid > IWM_MVM_CMD_QUEUE) return 0; size = IWM_TX_RING_COUNT * sizeof(struct iwm_device_cmd); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->cmd_dma, size, 4); if (error != 0) { device_printf(sc->sc_dev, "could not allocate TX cmd DMA memory\n"); goto fail; } ring->cmd = ring->cmd_dma.vaddr; /* FW commands may require more mapped space than packets. */ if (qid == IWM_MVM_CMD_QUEUE) { maxsize = IWM_RBUF_SIZE; nsegments = 1; } else { maxsize = MCLBYTES; nsegments = IWM_MAX_SCATTER - 2; } error = bus_dma_tag_create(sc->sc_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, maxsize, nsegments, maxsize, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create TX buf DMA tag\n"); goto fail; } paddr = ring->cmd_dma.paddr; for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; data->cmd_paddr = paddr; data->scratch_paddr = paddr + sizeof(struct iwm_cmd_header) + offsetof(struct iwm_tx_cmd, scratch); paddr += sizeof(struct iwm_device_cmd); error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "could not create TX buf DMA map\n"); goto fail; } } KASSERT(paddr == ring->cmd_dma.paddr + size, ("invalid physical address")); return 0; fail: iwm_free_tx_ring(sc, ring); return error; } static void iwm_reset_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring) { int i; for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } } /* Clear TX descriptors. */ memset(ring->desc, 0, ring->desc_dma.size); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); sc->qfullmsk &= ~(1 << ring->qid); ring->queued = 0; ring->cur = 0; } static void iwm_free_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring) { int i; iwm_dma_contig_free(&ring->desc_dma); iwm_dma_contig_free(&ring->cmd_dma); for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->map != NULL) { bus_dmamap_destroy(ring->data_dmat, data->map); data->map = NULL; } } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } /* * High-level hardware frobbing routines */ static void iwm_enable_interrupts(struct iwm_softc *sc) { sc->sc_intmask = IWM_CSR_INI_SET_MASK; IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask); } static void iwm_restore_interrupts(struct iwm_softc *sc) { IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask); } static void iwm_disable_interrupts(struct iwm_softc *sc) { /* disable interrupts */ IWM_WRITE(sc, IWM_CSR_INT_MASK, 0); /* acknowledge all interrupts */ IWM_WRITE(sc, IWM_CSR_INT, ~0); IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, ~0); } static void iwm_ict_reset(struct iwm_softc *sc) { iwm_disable_interrupts(sc); /* Reset ICT table. */ memset(sc->ict_dma.vaddr, 0, IWM_ICT_SIZE); sc->ict_cur = 0; /* Set physical address of ICT table (4KB aligned). */ IWM_WRITE(sc, IWM_CSR_DRAM_INT_TBL_REG, IWM_CSR_DRAM_INT_TBL_ENABLE | IWM_CSR_DRAM_INIT_TBL_WRITE_POINTER | IWM_CSR_DRAM_INIT_TBL_WRAP_CHECK | sc->ict_dma.paddr >> IWM_ICT_PADDR_SHIFT); /* Switch to ICT interrupt mode in driver. */ sc->sc_flags |= IWM_FLAG_USE_ICT; /* Re-enable interrupts. */ IWM_WRITE(sc, IWM_CSR_INT, ~0); iwm_enable_interrupts(sc); } /* iwlwifi pcie/trans.c */ /* * Since this .. hard-resets things, it's time to actually * mark the first vap (if any) as having no mac context. * It's annoying, but since the driver is potentially being * stop/start'ed whilst active (thanks openbsd port!) we * have to correctly track this. */ static void iwm_stop_device(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); int chnl, qid; uint32_t mask = 0; /* tell the device to stop sending interrupts */ iwm_disable_interrupts(sc); /* * FreeBSD-local: mark the first vap as not-uploaded, * so the next transition through auth/assoc * will correctly populate the MAC context. */ if (vap) { struct iwm_vap *iv = IWM_VAP(vap); iv->is_uploaded = 0; } /* device going down, Stop using ICT table */ sc->sc_flags &= ~IWM_FLAG_USE_ICT; /* stop tx and rx. tx and rx bits, as usual, are from if_iwn */ iwm_write_prph(sc, IWM_SCD_TXFACT, 0); if (iwm_nic_lock(sc)) { /* Stop each Tx DMA channel */ for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) { IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl), 0); mask |= IWM_FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(chnl); } /* Wait for DMA channels to be idle */ - if (iwm_poll_bit(sc, IWM_FH_TSSR_TX_STATUS_REG, mask, mask, - 5000) < 0) { + if (!iwm_poll_bit(sc, IWM_FH_TSSR_TX_STATUS_REG, mask, mask, + 5000)) { device_printf(sc->sc_dev, "Failing on timeout while stopping DMA channel: [0x%08x]\n", IWM_READ(sc, IWM_FH_TSSR_TX_STATUS_REG)); } iwm_nic_unlock(sc); } iwm_disable_rx_dma(sc); /* Stop RX ring. */ iwm_reset_rx_ring(sc, &sc->rxq); /* Reset all TX rings. */ for (qid = 0; qid < nitems(sc->txq); qid++) iwm_reset_tx_ring(sc, &sc->txq[qid]); /* * Power-down device's busmaster DMA clocks */ iwm_write_prph(sc, IWM_APMG_CLK_DIS_REG, IWM_APMG_CLK_VAL_DMA_CLK_RQT); DELAY(5); /* Make sure (redundant) we've released our request to stay awake */ IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); /* Stop the device, and put it in low power state */ iwm_apm_stop(sc); /* Upon stop, the APM issues an interrupt if HW RF kill is set. * Clean again the interrupt here */ iwm_disable_interrupts(sc); /* stop and reset the on-board processor */ IWM_WRITE(sc, IWM_CSR_RESET, IWM_CSR_RESET_REG_FLAG_SW_RESET); /* * Even if we stop the HW, we still want the RF kill * interrupt */ iwm_enable_rfkill_int(sc); iwm_check_rfkill(sc); } /* iwlwifi: mvm/ops.c */ static void iwm_mvm_nic_config(struct iwm_softc *sc) { uint8_t radio_cfg_type, radio_cfg_step, radio_cfg_dash; uint32_t reg_val = 0; radio_cfg_type = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_TYPE) >> IWM_FW_PHY_CFG_RADIO_TYPE_POS; radio_cfg_step = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_STEP) >> IWM_FW_PHY_CFG_RADIO_STEP_POS; radio_cfg_dash = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_DASH) >> IWM_FW_PHY_CFG_RADIO_DASH_POS; /* SKU control */ reg_val |= IWM_CSR_HW_REV_STEP(sc->sc_hw_rev) << IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_STEP; reg_val |= IWM_CSR_HW_REV_DASH(sc->sc_hw_rev) << IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_DASH; /* radio configuration */ reg_val |= radio_cfg_type << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_TYPE; reg_val |= radio_cfg_step << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_STEP; reg_val |= radio_cfg_dash << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_DASH; IWM_WRITE(sc, IWM_CSR_HW_IF_CONFIG_REG, reg_val); IWM_DPRINTF(sc, IWM_DEBUG_RESET, "Radio type=0x%x-0x%x-0x%x\n", radio_cfg_type, radio_cfg_step, radio_cfg_dash); /* * W/A : NIC is stuck in a reset state after Early PCIe power off * (PCIe power is lost before PERST# is asserted), causing ME FW * to lose ownership and not being able to obtain it back. */ if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) { iwm_set_bits_mask_prph(sc, IWM_APMG_PS_CTRL_REG, IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS, ~IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS); } } static int iwm_nic_rx_init(struct iwm_softc *sc) { if (!iwm_nic_lock(sc)) return EBUSY; /* * Initialize RX ring. This is from the iwn driver. */ memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat)); /* stop DMA */ iwm_disable_rx_dma(sc); IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_RBDCB_WPTR, 0); IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_FLUSH_RB_REQ, 0); IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RDPTR, 0); IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RBDCB_WPTR_REG, 0); /* Set physical address of RX ring (256-byte aligned). */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RBDCB_BASE_REG, sc->rxq.desc_dma.paddr >> 8); /* Set physical address of RX status (16-byte aligned). */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_STTS_WPTR_REG, sc->rxq.stat_dma.paddr >> 4); /* Enable RX. */ IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG, IWM_FH_RCSR_RX_CONFIG_CHNL_EN_ENABLE_VAL | IWM_FH_RCSR_CHNL0_RX_IGNORE_RXF_EMPTY | /* HW bug */ IWM_FH_RCSR_CHNL0_RX_CONFIG_IRQ_DEST_INT_HOST_VAL | IWM_FH_RCSR_CHNL0_RX_CONFIG_SINGLE_FRAME_MSK | (IWM_RX_RB_TIMEOUT << IWM_FH_RCSR_RX_CONFIG_REG_IRQ_RBTH_POS) | IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_4K | IWM_RX_QUEUE_SIZE_LOG << IWM_FH_RCSR_RX_CONFIG_RBDCB_SIZE_POS); IWM_WRITE_1(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_TIMEOUT_DEF); /* W/A for interrupt coalescing bug in 7260 and 3160 */ if (sc->host_interrupt_operation_mode) IWM_SETBITS(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_OPER_MODE); /* * Thus sayeth el jefe (iwlwifi) via a comment: * * This value should initially be 0 (before preparing any * RBs), should be 8 after preparing the first 8 RBs (for example) */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, 8); iwm_nic_unlock(sc); return 0; } static int iwm_nic_tx_init(struct iwm_softc *sc) { int qid; if (!iwm_nic_lock(sc)) return EBUSY; /* Deactivate TX scheduler. */ iwm_write_prph(sc, IWM_SCD_TXFACT, 0); /* Set physical address of "keep warm" page (16-byte aligned). */ IWM_WRITE(sc, IWM_FH_KW_MEM_ADDR_REG, sc->kw_dma.paddr >> 4); /* Initialize TX rings. */ for (qid = 0; qid < nitems(sc->txq); qid++) { struct iwm_tx_ring *txq = &sc->txq[qid]; /* Set physical address of TX ring (256-byte aligned). */ IWM_WRITE(sc, IWM_FH_MEM_CBBC_QUEUE(qid), txq->desc_dma.paddr >> 8); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: loading ring %d descriptors (%p) at %lx\n", __func__, qid, txq->desc, (unsigned long) (txq->desc_dma.paddr >> 8)); } iwm_write_prph(sc, IWM_SCD_GP_CTRL, IWM_SCD_GP_CTRL_AUTO_ACTIVE_MODE); iwm_nic_unlock(sc); return 0; } static int iwm_nic_init(struct iwm_softc *sc) { int error; iwm_apm_init(sc); if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) iwm_set_pwr(sc); iwm_mvm_nic_config(sc); if ((error = iwm_nic_rx_init(sc)) != 0) return error; /* * Ditto for TX, from iwn */ if ((error = iwm_nic_tx_init(sc)) != 0) return error; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: shadow registers enabled\n", __func__); IWM_SETBITS(sc, IWM_CSR_MAC_SHADOW_REG_CTRL, 0x800fffff); return 0; } const uint8_t iwm_mvm_ac_to_tx_fifo[] = { IWM_MVM_TX_FIFO_VO, IWM_MVM_TX_FIFO_VI, IWM_MVM_TX_FIFO_BE, IWM_MVM_TX_FIFO_BK, }; static int iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo) { if (!iwm_nic_lock(sc)) { device_printf(sc->sc_dev, "%s: cannot enable txq %d\n", __func__, qid); return EBUSY; } IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | 0); if (qid == IWM_MVM_CMD_QUEUE) { /* unactivate before configuration */ iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid), (0 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE) | (1 << IWM_SCD_QUEUE_STTS_REG_POS_SCD_ACT_EN)); iwm_clear_bits_prph(sc, IWM_SCD_AGGR_SEL, (1 << qid)); iwm_write_prph(sc, IWM_SCD_QUEUE_RDPTR(qid), 0); iwm_write_mem32(sc, sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid), 0); /* Set scheduler window size and frame limit. */ iwm_write_mem32(sc, sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid) + sizeof(uint32_t), ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_POS) & IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_MSK) | ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_POS) & IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_MSK)); iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid), (1 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE) | (fifo << IWM_SCD_QUEUE_STTS_REG_POS_TXF) | (1 << IWM_SCD_QUEUE_STTS_REG_POS_WSL) | IWM_SCD_QUEUE_STTS_REG_MSK); } else { struct iwm_scd_txq_cfg_cmd cmd; int error; iwm_nic_unlock(sc); memset(&cmd, 0, sizeof(cmd)); cmd.scd_queue = qid; cmd.enable = 1; cmd.sta_id = sta_id; cmd.tx_fifo = fifo; cmd.aggregate = 0; cmd.window = IWM_FRAME_LIMIT; error = iwm_mvm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, IWM_CMD_SYNC, sizeof(cmd), &cmd); if (error) { device_printf(sc->sc_dev, "cannot enable txq %d\n", qid); return error; } if (!iwm_nic_lock(sc)) return EBUSY; } iwm_write_prph(sc, IWM_SCD_EN_CTRL, iwm_read_prph(sc, IWM_SCD_EN_CTRL) | qid); iwm_nic_unlock(sc); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: enabled txq %d FIFO %d\n", __func__, qid, fifo); return 0; } static int iwm_post_alive(struct iwm_softc *sc) { int nwords; int error, chnl; uint32_t base; if (!iwm_nic_lock(sc)) return EBUSY; base = iwm_read_prph(sc, IWM_SCD_SRAM_BASE_ADDR); if (sc->sched_base != base) { device_printf(sc->sc_dev, "%s: sched addr mismatch: alive: 0x%x prph: 0x%x\n", __func__, sc->sched_base, base); } iwm_ict_reset(sc); /* Clear TX scheduler state in SRAM. */ nwords = (IWM_SCD_TRANS_TBL_MEM_UPPER_BOUND - IWM_SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(uint32_t); error = iwm_write_mem(sc, sc->sched_base + IWM_SCD_CONTEXT_MEM_LOWER_BOUND, NULL, nwords); if (error) goto out; /* Set physical address of TX scheduler rings (1KB aligned). */ iwm_write_prph(sc, IWM_SCD_DRAM_BASE_ADDR, sc->sched_dma.paddr >> 10); iwm_write_prph(sc, IWM_SCD_CHAINEXT_EN, 0); iwm_nic_unlock(sc); /* enable command channel */ error = iwm_enable_txq(sc, 0 /* unused */, IWM_MVM_CMD_QUEUE, 7); if (error) return error; if (!iwm_nic_lock(sc)) return EBUSY; iwm_write_prph(sc, IWM_SCD_TXFACT, 0xff); /* Enable DMA channels. */ for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) { IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE); } IWM_SETBITS(sc, IWM_FH_TX_CHICKEN_BITS_REG, IWM_FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN); /* Enable L1-Active */ if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) { iwm_clear_bits_prph(sc, IWM_APMG_PCIDEV_STT_REG, IWM_APMG_PCIDEV_STT_VAL_L1_ACT_DIS); } out: iwm_nic_unlock(sc); return error; } /* * NVM read access and content parsing. We do not support * external NVM or writing NVM. * iwlwifi/mvm/nvm.c */ /* list of NVM sections we are allowed/need to read */ const int nvm_to_read[] = { IWM_NVM_SECTION_TYPE_HW, IWM_NVM_SECTION_TYPE_SW, IWM_NVM_SECTION_TYPE_REGULATORY, IWM_NVM_SECTION_TYPE_CALIBRATION, IWM_NVM_SECTION_TYPE_PRODUCTION, IWM_NVM_SECTION_TYPE_HW_8000, IWM_NVM_SECTION_TYPE_MAC_OVERRIDE, IWM_NVM_SECTION_TYPE_PHY_SKU, }; /* Default NVM size to read */ #define IWM_NVM_DEFAULT_CHUNK_SIZE (2*1024) #define IWM_MAX_NVM_SECTION_SIZE 8192 #define IWM_NVM_WRITE_OPCODE 1 #define IWM_NVM_READ_OPCODE 0 /* load nvm chunk response */ #define IWM_READ_NVM_CHUNK_SUCCEED 0 #define IWM_READ_NVM_CHUNK_INVALID_ADDRESS 1 static int iwm_nvm_read_chunk(struct iwm_softc *sc, uint16_t section, uint16_t offset, uint16_t length, uint8_t *data, uint16_t *len) { offset = 0; struct iwm_nvm_access_cmd nvm_access_cmd = { .offset = htole16(offset), .length = htole16(length), .type = htole16(section), .op_code = IWM_NVM_READ_OPCODE, }; struct iwm_nvm_access_resp *nvm_resp; struct iwm_rx_packet *pkt; struct iwm_host_cmd cmd = { .id = IWM_NVM_ACCESS_CMD, .flags = IWM_CMD_SYNC | IWM_CMD_WANT_SKB | IWM_CMD_SEND_IN_RFKILL, .data = { &nvm_access_cmd, }, }; int ret, offset_read; size_t bytes_read; uint8_t *resp_data; cmd.len[0] = sizeof(struct iwm_nvm_access_cmd); ret = iwm_send_cmd(sc, &cmd); if (ret) { device_printf(sc->sc_dev, "Could not send NVM_ACCESS command (error=%d)\n", ret); return ret; } pkt = cmd.resp_pkt; if (pkt->hdr.flags & IWM_CMD_FAILED_MSK) { device_printf(sc->sc_dev, "Bad return from IWM_NVM_ACCES_COMMAND (0x%08X)\n", pkt->hdr.flags); ret = EIO; goto exit; } /* Extract NVM response */ nvm_resp = (void *)pkt->data; ret = le16toh(nvm_resp->status); bytes_read = le16toh(nvm_resp->length); offset_read = le16toh(nvm_resp->offset); resp_data = nvm_resp->data; if (ret) { IWM_DPRINTF(sc, IWM_DEBUG_RESET, "NVM access command failed with status %d\n", ret); ret = EINVAL; goto exit; } if (offset_read != offset) { device_printf(sc->sc_dev, "NVM ACCESS response with invalid offset %d\n", offset_read); ret = EINVAL; goto exit; } if (bytes_read > length) { device_printf(sc->sc_dev, "NVM ACCESS response with too much data " "(%d bytes requested, %zd bytes received)\n", length, bytes_read); ret = EINVAL; goto exit; } memcpy(data + offset, resp_data, bytes_read); *len = bytes_read; exit: iwm_free_resp(sc, &cmd); return ret; } /* * Reads an NVM section completely. * NICs prior to 7000 family don't have a real NVM, but just read * section 0 which is the EEPROM. Because the EEPROM reading is unlimited * by uCode, we need to manually check in this case that we don't * overflow and try to read more than the EEPROM size. * For 7000 family NICs, we supply the maximal size we can read, and * the uCode fills the response with as much data as we can, * without overflowing, so no check is needed. */ static int iwm_nvm_read_section(struct iwm_softc *sc, uint16_t section, uint8_t *data, uint16_t *len, size_t max_len) { uint16_t chunklen, seglen; int error = 0; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "reading NVM section %d\n", section); chunklen = seglen = IWM_NVM_DEFAULT_CHUNK_SIZE; *len = 0; /* Read NVM chunks until exhausted (reading less than requested) */ while (seglen == chunklen && *len < max_len) { error = iwm_nvm_read_chunk(sc, section, *len, chunklen, data, &seglen); if (error) { IWM_DPRINTF(sc, IWM_DEBUG_RESET, "Cannot read from NVM section " "%d at offset %d\n", section, *len); return error; } *len += seglen; } IWM_DPRINTF(sc, IWM_DEBUG_RESET, "NVM section %d read completed (%d bytes, error=%d)\n", section, *len, error); return error; } /* * BEGIN IWM_NVM_PARSE */ /* iwlwifi/iwl-nvm-parse.c */ /* NVM offsets (in words) definitions */ enum iwm_nvm_offsets { /* NVM HW-Section offset (in words) definitions */ IWM_HW_ADDR = 0x15, /* NVM SW-Section offset (in words) definitions */ IWM_NVM_SW_SECTION = 0x1C0, IWM_NVM_VERSION = 0, IWM_RADIO_CFG = 1, IWM_SKU = 2, IWM_N_HW_ADDRS = 3, IWM_NVM_CHANNELS = 0x1E0 - IWM_NVM_SW_SECTION, /* NVM calibration section offset (in words) definitions */ IWM_NVM_CALIB_SECTION = 0x2B8, IWM_XTAL_CALIB = 0x316 - IWM_NVM_CALIB_SECTION }; enum iwm_8000_nvm_offsets { /* NVM HW-Section offset (in words) definitions */ IWM_HW_ADDR0_WFPM_8000 = 0x12, IWM_HW_ADDR1_WFPM_8000 = 0x16, IWM_HW_ADDR0_PCIE_8000 = 0x8A, IWM_HW_ADDR1_PCIE_8000 = 0x8E, IWM_MAC_ADDRESS_OVERRIDE_8000 = 1, /* NVM SW-Section offset (in words) definitions */ IWM_NVM_SW_SECTION_8000 = 0x1C0, IWM_NVM_VERSION_8000 = 0, IWM_RADIO_CFG_8000 = 0, IWM_SKU_8000 = 2, IWM_N_HW_ADDRS_8000 = 3, /* NVM REGULATORY -Section offset (in words) definitions */ IWM_NVM_CHANNELS_8000 = 0, IWM_NVM_LAR_OFFSET_8000_OLD = 0x4C7, IWM_NVM_LAR_OFFSET_8000 = 0x507, IWM_NVM_LAR_ENABLED_8000 = 0x7, /* NVM calibration section offset (in words) definitions */ IWM_NVM_CALIB_SECTION_8000 = 0x2B8, IWM_XTAL_CALIB_8000 = 0x316 - IWM_NVM_CALIB_SECTION_8000 }; /* SKU Capabilities (actual values from NVM definition) */ enum nvm_sku_bits { IWM_NVM_SKU_CAP_BAND_24GHZ = (1 << 0), IWM_NVM_SKU_CAP_BAND_52GHZ = (1 << 1), IWM_NVM_SKU_CAP_11N_ENABLE = (1 << 2), IWM_NVM_SKU_CAP_11AC_ENABLE = (1 << 3), }; /* radio config bits (actual values from NVM definition) */ #define IWM_NVM_RF_CFG_DASH_MSK(x) (x & 0x3) /* bits 0-1 */ #define IWM_NVM_RF_CFG_STEP_MSK(x) ((x >> 2) & 0x3) /* bits 2-3 */ #define IWM_NVM_RF_CFG_TYPE_MSK(x) ((x >> 4) & 0x3) /* bits 4-5 */ #define IWM_NVM_RF_CFG_PNUM_MSK(x) ((x >> 6) & 0x3) /* bits 6-7 */ #define IWM_NVM_RF_CFG_TX_ANT_MSK(x) ((x >> 8) & 0xF) /* bits 8-11 */ #define IWM_NVM_RF_CFG_RX_ANT_MSK(x) ((x >> 12) & 0xF) /* bits 12-15 */ #define IWM_NVM_RF_CFG_FLAVOR_MSK_8000(x) (x & 0xF) #define IWM_NVM_RF_CFG_DASH_MSK_8000(x) ((x >> 4) & 0xF) #define IWM_NVM_RF_CFG_STEP_MSK_8000(x) ((x >> 8) & 0xF) #define IWM_NVM_RF_CFG_TYPE_MSK_8000(x) ((x >> 12) & 0xFFF) #define IWM_NVM_RF_CFG_TX_ANT_MSK_8000(x) ((x >> 24) & 0xF) #define IWM_NVM_RF_CFG_RX_ANT_MSK_8000(x) ((x >> 28) & 0xF) #define DEFAULT_MAX_TX_POWER 16 /** * enum iwm_nvm_channel_flags - channel flags in NVM * @IWM_NVM_CHANNEL_VALID: channel is usable for this SKU/geo * @IWM_NVM_CHANNEL_IBSS: usable as an IBSS channel * @IWM_NVM_CHANNEL_ACTIVE: active scanning allowed * @IWM_NVM_CHANNEL_RADAR: radar detection required * XXX cannot find this (DFS) flag in iwl-nvm-parse.c * @IWM_NVM_CHANNEL_DFS: dynamic freq selection candidate * @IWM_NVM_CHANNEL_WIDE: 20 MHz channel okay (?) * @IWM_NVM_CHANNEL_40MHZ: 40 MHz channel okay (?) * @IWM_NVM_CHANNEL_80MHZ: 80 MHz channel okay (?) * @IWM_NVM_CHANNEL_160MHZ: 160 MHz channel okay (?) */ enum iwm_nvm_channel_flags { IWM_NVM_CHANNEL_VALID = (1 << 0), IWM_NVM_CHANNEL_IBSS = (1 << 1), IWM_NVM_CHANNEL_ACTIVE = (1 << 3), IWM_NVM_CHANNEL_RADAR = (1 << 4), IWM_NVM_CHANNEL_DFS = (1 << 7), IWM_NVM_CHANNEL_WIDE = (1 << 8), IWM_NVM_CHANNEL_40MHZ = (1 << 9), IWM_NVM_CHANNEL_80MHZ = (1 << 10), IWM_NVM_CHANNEL_160MHZ = (1 << 11), }; /* * Translate EEPROM flags to net80211. */ static uint32_t iwm_eeprom_channel_flags(uint16_t ch_flags) { uint32_t nflags; nflags = 0; if ((ch_flags & IWM_NVM_CHANNEL_ACTIVE) == 0) nflags |= IEEE80211_CHAN_PASSIVE; if ((ch_flags & IWM_NVM_CHANNEL_IBSS) == 0) nflags |= IEEE80211_CHAN_NOADHOC; if (ch_flags & IWM_NVM_CHANNEL_RADAR) { nflags |= IEEE80211_CHAN_DFS; /* Just in case. */ nflags |= IEEE80211_CHAN_NOADHOC; } return (nflags); } static void iwm_add_channel_band(struct iwm_softc *sc, struct ieee80211_channel chans[], int maxchans, int *nchans, int ch_idx, size_t ch_num, const uint8_t bands[]) { const uint16_t * const nvm_ch_flags = sc->sc_nvm.nvm_ch_flags; uint32_t nflags; uint16_t ch_flags; uint8_t ieee; int error; for (; ch_idx < ch_num; ch_idx++) { ch_flags = le16_to_cpup(nvm_ch_flags + ch_idx); if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) ieee = iwm_nvm_channels[ch_idx]; else ieee = iwm_nvm_channels_8000[ch_idx]; if (!(ch_flags & IWM_NVM_CHANNEL_VALID)) { IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "Ch. %d Flags %x [%sGHz] - No traffic\n", ieee, ch_flags, (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ? "5.2" : "2.4"); continue; } nflags = iwm_eeprom_channel_flags(ch_flags); error = ieee80211_add_channel(chans, maxchans, nchans, ieee, 0, 0, nflags, bands); if (error != 0) break; IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "Ch. %d Flags %x [%sGHz] - Added\n", ieee, ch_flags, (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ? "5.2" : "2.4"); } } static void iwm_init_channel_map(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { struct iwm_softc *sc = ic->ic_softc; struct iwm_nvm_data *data = &sc->sc_nvm; uint8_t bands[IEEE80211_MODE_BYTES]; size_t ch_num; memset(bands, 0, sizeof(bands)); /* 1-13: 11b/g channels. */ setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); iwm_add_channel_band(sc, chans, maxchans, nchans, 0, IWM_NUM_2GHZ_CHANNELS - 1, bands); /* 14: 11b channel only. */ clrbit(bands, IEEE80211_MODE_11G); iwm_add_channel_band(sc, chans, maxchans, nchans, IWM_NUM_2GHZ_CHANNELS - 1, IWM_NUM_2GHZ_CHANNELS, bands); if (data->sku_cap_band_52GHz_enable) { if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) ch_num = nitems(iwm_nvm_channels); else ch_num = nitems(iwm_nvm_channels_8000); memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11A); iwm_add_channel_band(sc, chans, maxchans, nchans, IWM_NUM_2GHZ_CHANNELS, ch_num, bands); } } static void iwm_set_hw_address_8000(struct iwm_softc *sc, struct iwm_nvm_data *data, const uint16_t *mac_override, const uint16_t *nvm_hw) { const uint8_t *hw_addr; if (mac_override) { static const uint8_t reserved_mac[] = { 0x02, 0xcc, 0xaa, 0xff, 0xee, 0x00 }; hw_addr = (const uint8_t *)(mac_override + IWM_MAC_ADDRESS_OVERRIDE_8000); /* * Store the MAC address from MAO section. * No byte swapping is required in MAO section */ IEEE80211_ADDR_COPY(data->hw_addr, hw_addr); /* * Force the use of the OTP MAC address in case of reserved MAC * address in the NVM, or if address is given but invalid. */ if (!IEEE80211_ADDR_EQ(reserved_mac, hw_addr) && !IEEE80211_ADDR_EQ(ieee80211broadcastaddr, data->hw_addr) && iwm_is_valid_ether_addr(data->hw_addr) && !IEEE80211_IS_MULTICAST(data->hw_addr)) return; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: mac address from nvm override section invalid\n", __func__); } if (nvm_hw) { /* read the mac address from WFMP registers */ uint32_t mac_addr0 = htole32(iwm_read_prph(sc, IWM_WFMP_MAC_ADDR_0)); uint32_t mac_addr1 = htole32(iwm_read_prph(sc, IWM_WFMP_MAC_ADDR_1)); hw_addr = (const uint8_t *)&mac_addr0; data->hw_addr[0] = hw_addr[3]; data->hw_addr[1] = hw_addr[2]; data->hw_addr[2] = hw_addr[1]; data->hw_addr[3] = hw_addr[0]; hw_addr = (const uint8_t *)&mac_addr1; data->hw_addr[4] = hw_addr[1]; data->hw_addr[5] = hw_addr[0]; return; } device_printf(sc->sc_dev, "%s: mac address not found\n", __func__); memset(data->hw_addr, 0, sizeof(data->hw_addr)); } static int iwm_get_sku(const struct iwm_softc *sc, const uint16_t *nvm_sw, const uint16_t *phy_sku) { if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) return le16_to_cpup(nvm_sw + IWM_SKU); return le32_to_cpup((const uint32_t *)(phy_sku + IWM_SKU_8000)); } static int iwm_get_nvm_version(const struct iwm_softc *sc, const uint16_t *nvm_sw) { if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) return le16_to_cpup(nvm_sw + IWM_NVM_VERSION); else return le32_to_cpup((const uint32_t *)(nvm_sw + IWM_NVM_VERSION_8000)); } static int iwm_get_radio_cfg(const struct iwm_softc *sc, const uint16_t *nvm_sw, const uint16_t *phy_sku) { if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) return le16_to_cpup(nvm_sw + IWM_RADIO_CFG); return le32_to_cpup((const uint32_t *)(phy_sku + IWM_RADIO_CFG_8000)); } static int iwm_get_n_hw_addrs(const struct iwm_softc *sc, const uint16_t *nvm_sw) { int n_hw_addr; if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) return le16_to_cpup(nvm_sw + IWM_N_HW_ADDRS); n_hw_addr = le32_to_cpup((const uint32_t *)(nvm_sw + IWM_N_HW_ADDRS_8000)); return n_hw_addr & IWM_N_HW_ADDR_MASK; } static void iwm_set_radio_cfg(const struct iwm_softc *sc, struct iwm_nvm_data *data, uint32_t radio_cfg) { if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) { data->radio_cfg_type = IWM_NVM_RF_CFG_TYPE_MSK(radio_cfg); data->radio_cfg_step = IWM_NVM_RF_CFG_STEP_MSK(radio_cfg); data->radio_cfg_dash = IWM_NVM_RF_CFG_DASH_MSK(radio_cfg); data->radio_cfg_pnum = IWM_NVM_RF_CFG_PNUM_MSK(radio_cfg); return; } /* set the radio configuration for family 8000 */ data->radio_cfg_type = IWM_NVM_RF_CFG_TYPE_MSK_8000(radio_cfg); data->radio_cfg_step = IWM_NVM_RF_CFG_STEP_MSK_8000(radio_cfg); data->radio_cfg_dash = IWM_NVM_RF_CFG_DASH_MSK_8000(radio_cfg); data->radio_cfg_pnum = IWM_NVM_RF_CFG_FLAVOR_MSK_8000(radio_cfg); data->valid_tx_ant = IWM_NVM_RF_CFG_TX_ANT_MSK_8000(radio_cfg); data->valid_rx_ant = IWM_NVM_RF_CFG_RX_ANT_MSK_8000(radio_cfg); } static int iwm_parse_nvm_data(struct iwm_softc *sc, const uint16_t *nvm_hw, const uint16_t *nvm_sw, const uint16_t *nvm_calib, const uint16_t *mac_override, const uint16_t *phy_sku, const uint16_t *regulatory) { struct iwm_nvm_data *data = &sc->sc_nvm; uint8_t hw_addr[IEEE80211_ADDR_LEN]; uint32_t sku, radio_cfg; data->nvm_version = iwm_get_nvm_version(sc, nvm_sw); radio_cfg = iwm_get_radio_cfg(sc, nvm_sw, phy_sku); iwm_set_radio_cfg(sc, data, radio_cfg); sku = iwm_get_sku(sc, nvm_sw, phy_sku); data->sku_cap_band_24GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_24GHZ; data->sku_cap_band_52GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_52GHZ; data->sku_cap_11n_enable = 0; data->n_hw_addrs = iwm_get_n_hw_addrs(sc, nvm_sw); /* The byte order is little endian 16 bit, meaning 214365 */ if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) { IEEE80211_ADDR_COPY(hw_addr, nvm_hw + IWM_HW_ADDR); data->hw_addr[0] = hw_addr[1]; data->hw_addr[1] = hw_addr[0]; data->hw_addr[2] = hw_addr[3]; data->hw_addr[3] = hw_addr[2]; data->hw_addr[4] = hw_addr[5]; data->hw_addr[5] = hw_addr[4]; } else { iwm_set_hw_address_8000(sc, data, mac_override, nvm_hw); } if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) { memcpy(data->nvm_ch_flags, &nvm_sw[IWM_NVM_CHANNELS], IWM_NUM_CHANNELS * sizeof(uint16_t)); } else { memcpy(data->nvm_ch_flags, ®ulatory[IWM_NVM_CHANNELS_8000], IWM_NUM_CHANNELS_8000 * sizeof(uint16_t)); } data->calib_version = 255; /* TODO: this value will prevent some checks from failing, we need to check if this field is still needed, and if it does, where is it in the NVM */ return 0; } /* * END NVM PARSE */ static int iwm_parse_nvm_sections(struct iwm_softc *sc, struct iwm_nvm_section *sections) { const uint16_t *hw, *sw, *calib, *regulatory, *mac_override, *phy_sku; /* Checking for required sections */ if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) { if (!sections[IWM_NVM_SECTION_TYPE_SW].data || !sections[IWM_NVM_SECTION_TYPE_HW].data) { device_printf(sc->sc_dev, "Can't parse empty OTP/NVM sections\n"); return ENOENT; } hw = (const uint16_t *) sections[IWM_NVM_SECTION_TYPE_HW].data; } else if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) { /* SW and REGULATORY sections are mandatory */ if (!sections[IWM_NVM_SECTION_TYPE_SW].data || !sections[IWM_NVM_SECTION_TYPE_REGULATORY].data) { device_printf(sc->sc_dev, "Can't parse empty OTP/NVM sections\n"); return ENOENT; } /* MAC_OVERRIDE or at least HW section must exist */ if (!sections[IWM_NVM_SECTION_TYPE_HW_8000].data && !sections[IWM_NVM_SECTION_TYPE_MAC_OVERRIDE].data) { device_printf(sc->sc_dev, "Can't parse mac_address, empty sections\n"); return ENOENT; } /* PHY_SKU section is mandatory in B0 */ if (!sections[IWM_NVM_SECTION_TYPE_PHY_SKU].data) { device_printf(sc->sc_dev, "Can't parse phy_sku in B0, empty sections\n"); return ENOENT; } hw = (const uint16_t *) sections[IWM_NVM_SECTION_TYPE_HW_8000].data; } else { panic("unknown device family %d\n", sc->sc_device_family); } sw = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_SW].data; calib = (const uint16_t *) sections[IWM_NVM_SECTION_TYPE_CALIBRATION].data; regulatory = (const uint16_t *) sections[IWM_NVM_SECTION_TYPE_REGULATORY].data; mac_override = (const uint16_t *) sections[IWM_NVM_SECTION_TYPE_MAC_OVERRIDE].data; phy_sku = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_PHY_SKU].data; return iwm_parse_nvm_data(sc, hw, sw, calib, mac_override, phy_sku, regulatory); } static int iwm_nvm_init(struct iwm_softc *sc) { struct iwm_nvm_section nvm_sections[IWM_NVM_NUM_OF_SECTIONS]; int i, section, error; uint16_t len; uint8_t *buf; const size_t bufsz = IWM_MAX_NVM_SECTION_SIZE; memset(nvm_sections, 0 , sizeof(nvm_sections)); buf = malloc(bufsz, M_DEVBUF, M_NOWAIT); if (buf == NULL) return ENOMEM; for (i = 0; i < nitems(nvm_to_read); i++) { section = nvm_to_read[i]; KASSERT(section <= nitems(nvm_sections), ("too many sections")); error = iwm_nvm_read_section(sc, section, buf, &len, bufsz); if (error) { error = 0; continue; } nvm_sections[section].data = malloc(len, M_DEVBUF, M_NOWAIT); if (nvm_sections[section].data == NULL) { error = ENOMEM; break; } memcpy(nvm_sections[section].data, buf, len); nvm_sections[section].length = len; } free(buf, M_DEVBUF); if (error == 0) error = iwm_parse_nvm_sections(sc, nvm_sections); for (i = 0; i < IWM_NVM_NUM_OF_SECTIONS; i++) { if (nvm_sections[i].data != NULL) free(nvm_sections[i].data, M_DEVBUF); } return error; } /* * Firmware loading gunk. This is kind of a weird hybrid between the * iwn driver and the Linux iwlwifi driver. */ static int iwm_firmware_load_sect(struct iwm_softc *sc, uint32_t dst_addr, const uint8_t *section, uint32_t byte_cnt) { int error = EINVAL; uint32_t chunk_sz, offset; chunk_sz = MIN(IWM_FH_MEM_TB_MAX_LENGTH, byte_cnt); for (offset = 0; offset < byte_cnt; offset += chunk_sz) { uint32_t addr, len; const uint8_t *data; addr = dst_addr + offset; len = MIN(chunk_sz, byte_cnt - offset); data = section + offset; error = iwm_firmware_load_chunk(sc, addr, data, len); if (error) break; } return error; } static int iwm_firmware_load_chunk(struct iwm_softc *sc, uint32_t dst_addr, const uint8_t *chunk, uint32_t byte_cnt) { struct iwm_dma_info *dma = &sc->fw_dma; int error; /* Copy firmware chunk into pre-allocated DMA-safe memory. */ memcpy(dma->vaddr, chunk, byte_cnt); bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); if (dst_addr >= IWM_FW_MEM_EXTENDED_START && dst_addr <= IWM_FW_MEM_EXTENDED_END) { iwm_set_bits_prph(sc, IWM_LMPM_CHICK, IWM_LMPM_CHICK_EXTENDED_ADDR_SPACE); } sc->sc_fw_chunk_done = 0; if (!iwm_nic_lock(sc)) return EBUSY; IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE); IWM_WRITE(sc, IWM_FH_SRVC_CHNL_SRAM_ADDR_REG(IWM_FH_SRVC_CHNL), dst_addr); IWM_WRITE(sc, IWM_FH_TFDIB_CTRL0_REG(IWM_FH_SRVC_CHNL), dma->paddr & IWM_FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK); IWM_WRITE(sc, IWM_FH_TFDIB_CTRL1_REG(IWM_FH_SRVC_CHNL), (iwm_get_dma_hi_addr(dma->paddr) << IWM_FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt); IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_BUF_STS_REG(IWM_FH_SRVC_CHNL), 1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_NUM | 1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_IDX | IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_VALID); IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD); iwm_nic_unlock(sc); /* wait 1s for this segment to load */ while (!sc->sc_fw_chunk_done) if ((error = msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfw", hz)) != 0) break; if (!sc->sc_fw_chunk_done) { device_printf(sc->sc_dev, "fw chunk addr 0x%x len %d failed to load\n", dst_addr, byte_cnt); } if (dst_addr >= IWM_FW_MEM_EXTENDED_START && dst_addr <= IWM_FW_MEM_EXTENDED_END && iwm_nic_lock(sc)) { iwm_clear_bits_prph(sc, IWM_LMPM_CHICK, IWM_LMPM_CHICK_EXTENDED_ADDR_SPACE); iwm_nic_unlock(sc); } return error; } int iwm_load_cpu_sections_8000(struct iwm_softc *sc, struct iwm_fw_sects *fws, int cpu, int *first_ucode_section) { int shift_param; int i, error = 0, sec_num = 0x1; uint32_t val, last_read_idx = 0; const void *data; uint32_t dlen; uint32_t offset; if (cpu == 1) { shift_param = 0; *first_ucode_section = 0; } else { shift_param = 16; (*first_ucode_section)++; } for (i = *first_ucode_section; i < IWM_UCODE_SECT_MAX; i++) { last_read_idx = i; data = fws->fw_sect[i].fws_data; dlen = fws->fw_sect[i].fws_len; offset = fws->fw_sect[i].fws_devoff; /* * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between * CPU1 to CPU2. * PAGING_SEPARATOR_SECTION delimiter - separate between * CPU2 non paged to CPU2 paging sec. */ if (!data || offset == IWM_CPU1_CPU2_SEPARATOR_SECTION || offset == IWM_PAGING_SEPARATOR_SECTION) break; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "LOAD FIRMWARE chunk %d offset 0x%x len %d for cpu %d\n", i, offset, dlen, cpu); if (dlen > sc->sc_fwdmasegsz) { IWM_DPRINTF(sc, IWM_DEBUG_RESET, "chunk %d too large (%d bytes)\n", i, dlen); error = EFBIG; } else { error = iwm_firmware_load_sect(sc, offset, data, dlen); } if (error) { device_printf(sc->sc_dev, "could not load firmware chunk %d (error %d)\n", i, error); return error; } /* Notify the ucode of the loaded section number and status */ if (iwm_nic_lock(sc)) { val = IWM_READ(sc, IWM_FH_UCODE_LOAD_STATUS); val = val | (sec_num << shift_param); IWM_WRITE(sc, IWM_FH_UCODE_LOAD_STATUS, val); sec_num = (sec_num << 1) | 0x1; iwm_nic_unlock(sc); /* * The firmware won't load correctly without this delay. */ DELAY(8000); } } *first_ucode_section = last_read_idx; if (iwm_nic_lock(sc)) { if (cpu == 1) IWM_WRITE(sc, IWM_FH_UCODE_LOAD_STATUS, 0xFFFF); else IWM_WRITE(sc, IWM_FH_UCODE_LOAD_STATUS, 0xFFFFFFFF); iwm_nic_unlock(sc); } return 0; } int iwm_load_firmware_8000(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { struct iwm_fw_sects *fws; int error = 0; int first_ucode_section; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "loading ucode type %d\n", ucode_type); fws = &sc->sc_fw.fw_sects[ucode_type]; /* configure the ucode to be ready to get the secured image */ /* release CPU reset */ iwm_write_prph(sc, IWM_RELEASE_CPU_RESET, IWM_RELEASE_CPU_RESET_BIT); /* load to FW the binary Secured sections of CPU1 */ error = iwm_load_cpu_sections_8000(sc, fws, 1, &first_ucode_section); if (error) return error; /* load to FW the binary sections of CPU2 */ return iwm_load_cpu_sections_8000(sc, fws, 2, &first_ucode_section); } static int iwm_load_firmware_7000(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { struct iwm_fw_sects *fws; int error, i; const void *data; uint32_t dlen; uint32_t offset; sc->sc_uc.uc_intr = 0; fws = &sc->sc_fw.fw_sects[ucode_type]; for (i = 0; i < fws->fw_count; i++) { data = fws->fw_sect[i].fws_data; dlen = fws->fw_sect[i].fws_len; offset = fws->fw_sect[i].fws_devoff; IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV, "LOAD FIRMWARE type %d offset %u len %d\n", ucode_type, offset, dlen); if (dlen > sc->sc_fwdmasegsz) { IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV, "chunk %d too large (%d bytes)\n", i, dlen); error = EFBIG; } else { error = iwm_firmware_load_sect(sc, offset, data, dlen); } if (error) { device_printf(sc->sc_dev, "could not load firmware chunk %u of %u " "(error=%d)\n", i, fws->fw_count, error); return error; } } IWM_WRITE(sc, IWM_CSR_RESET, 0); return 0; } static int iwm_load_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { int error, w; if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) error = iwm_load_firmware_8000(sc, ucode_type); else error = iwm_load_firmware_7000(sc, ucode_type); if (error) return error; /* wait for the firmware to load */ for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) { error = msleep(&sc->sc_uc, &sc->sc_mtx, 0, "iwmuc", hz/10); } if (error || !sc->sc_uc.uc_ok) { device_printf(sc->sc_dev, "could not load firmware\n"); if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) { device_printf(sc->sc_dev, "cpu1 status: 0x%x\n", iwm_read_prph(sc, IWM_SB_CPU_1_STATUS)); device_printf(sc->sc_dev, "cpu2 status: 0x%x\n", iwm_read_prph(sc, IWM_SB_CPU_2_STATUS)); } } /* * Give the firmware some time to initialize. * Accessing it too early causes errors. */ msleep(&w, &sc->sc_mtx, 0, "iwmfwinit", hz); return error; } /* iwlwifi: pcie/trans.c */ static int iwm_start_fw(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { int error; IWM_WRITE(sc, IWM_CSR_INT, ~0); if ((error = iwm_nic_init(sc)) != 0) { device_printf(sc->sc_dev, "unable to init nic\n"); return error; } /* make sure rfkill handshake bits are cleared */ IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED); /* clear (again), then enable host interrupts */ IWM_WRITE(sc, IWM_CSR_INT, ~0); iwm_enable_interrupts(sc); /* really make sure rfkill handshake bits are cleared */ /* maybe we should write a few times more? just to make sure */ IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); /* Load the given image to the HW */ return iwm_load_firmware(sc, ucode_type); } static int iwm_send_tx_ant_cfg(struct iwm_softc *sc, uint8_t valid_tx_ant) { struct iwm_tx_ant_cfg_cmd tx_ant_cmd = { .valid = htole32(valid_tx_ant), }; return iwm_mvm_send_cmd_pdu(sc, IWM_TX_ANT_CONFIGURATION_CMD, IWM_CMD_SYNC, sizeof(tx_ant_cmd), &tx_ant_cmd); } /* iwlwifi: mvm/fw.c */ static int iwm_send_phy_cfg_cmd(struct iwm_softc *sc) { struct iwm_phy_cfg_cmd phy_cfg_cmd; enum iwm_ucode_type ucode_type = sc->sc_uc_current; /* Set parameters */ phy_cfg_cmd.phy_cfg = htole32(sc->sc_fw_phy_config); phy_cfg_cmd.calib_control.event_trigger = sc->sc_default_calib[ucode_type].event_trigger; phy_cfg_cmd.calib_control.flow_trigger = sc->sc_default_calib[ucode_type].flow_trigger; IWM_DPRINTF(sc, IWM_DEBUG_CMD | IWM_DEBUG_RESET, "Sending Phy CFG command: 0x%x\n", phy_cfg_cmd.phy_cfg); return iwm_mvm_send_cmd_pdu(sc, IWM_PHY_CONFIGURATION_CMD, IWM_CMD_SYNC, sizeof(phy_cfg_cmd), &phy_cfg_cmd); } static int iwm_mvm_load_ucode_wait_alive(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { enum iwm_ucode_type old_type = sc->sc_uc_current; int error; if ((error = iwm_read_firmware(sc, ucode_type)) != 0) { device_printf(sc->sc_dev, "iwm_read_firmware: failed %d\n", error); return error; } sc->sc_uc_current = ucode_type; error = iwm_start_fw(sc, ucode_type); if (error) { device_printf(sc->sc_dev, "iwm_start_fw: failed %d\n", error); sc->sc_uc_current = old_type; return error; } error = iwm_post_alive(sc); if (error) { device_printf(sc->sc_dev, "iwm_fw_alive: failed %d\n", error); } return error; } /* * mvm misc bits */ /* * follows iwlwifi/fw.c */ static int iwm_run_init_mvm_ucode(struct iwm_softc *sc, int justnvm) { int error; /* do not operate with rfkill switch turned on */ if ((sc->sc_flags & IWM_FLAG_RFKILL) && !justnvm) { device_printf(sc->sc_dev, "radio is disabled by hardware switch\n"); return EPERM; } sc->sc_init_complete = 0; if ((error = iwm_mvm_load_ucode_wait_alive(sc, IWM_UCODE_TYPE_INIT)) != 0) { device_printf(sc->sc_dev, "failed to load init firmware\n"); return error; } if (justnvm) { if ((error = iwm_nvm_init(sc)) != 0) { device_printf(sc->sc_dev, "failed to read nvm\n"); return error; } IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, sc->sc_nvm.hw_addr); return 0; } if ((error = iwm_send_bt_init_conf(sc)) != 0) { device_printf(sc->sc_dev, "failed to send bt coex configuration: %d\n", error); return error; } /* Init Smart FIFO. */ error = iwm_mvm_sf_config(sc, IWM_SF_INIT_OFF); if (error != 0) return error; /* Send TX valid antennas before triggering calibrations */ if ((error = iwm_send_tx_ant_cfg(sc, iwm_fw_valid_tx_ant(sc))) != 0) { device_printf(sc->sc_dev, "failed to send antennas before calibration: %d\n", error); return error; } /* * Send phy configurations command to init uCode * to start the 16.0 uCode init image internal calibrations. */ if ((error = iwm_send_phy_cfg_cmd(sc)) != 0 ) { device_printf(sc->sc_dev, "%s: failed to run internal calibration: %d\n", __func__, error); return error; } /* * Nothing to do but wait for the init complete notification * from the firmware */ while (!sc->sc_init_complete) { error = msleep(&sc->sc_init_complete, &sc->sc_mtx, 0, "iwminit", 2*hz); if (error) { device_printf(sc->sc_dev, "init complete failed: %d\n", sc->sc_init_complete); break; } } IWM_DPRINTF(sc, IWM_DEBUG_RESET, "init %scomplete\n", sc->sc_init_complete ? "" : "not "); return error; } /* * receive side */ /* (re)stock rx ring, called at init-time and at runtime */ static int iwm_rx_addbuf(struct iwm_softc *sc, int size, int idx) { struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &ring->data[idx]; struct mbuf *m; bus_dmamap_t dmamap = NULL; bus_dma_segment_t seg; int nsegs, error; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, IWM_RBUF_SIZE); if (m == NULL) return ENOBUFS; m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; error = bus_dmamap_load_mbuf_sg(ring->data_dmat, ring->spare_map, m, &seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "%s: can't map mbuf, error %d\n", __func__, error); goto fail; } if (data->m != NULL) bus_dmamap_unload(ring->data_dmat, data->map); /* Swap ring->spare_map with data->map */ dmamap = data->map; data->map = ring->spare_map; ring->spare_map = dmamap; bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREREAD); data->m = m; /* Update RX descriptor. */ KASSERT((seg.ds_addr & 255) == 0, ("seg.ds_addr not aligned")); ring->desc[idx] = htole32(seg.ds_addr >> 8); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); return 0; fail: m_freem(m); return error; } /* iwlwifi: mvm/rx.c */ #define IWM_RSSI_OFFSET 50 static int iwm_mvm_calc_rssi(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info) { int rssi_a, rssi_b, rssi_a_dbm, rssi_b_dbm, max_rssi_dbm; uint32_t agc_a, agc_b; uint32_t val; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_AGC_IDX]); agc_a = (val & IWM_OFDM_AGC_A_MSK) >> IWM_OFDM_AGC_A_POS; agc_b = (val & IWM_OFDM_AGC_B_MSK) >> IWM_OFDM_AGC_B_POS; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_RSSI_AB_IDX]); rssi_a = (val & IWM_OFDM_RSSI_INBAND_A_MSK) >> IWM_OFDM_RSSI_A_POS; rssi_b = (val & IWM_OFDM_RSSI_INBAND_B_MSK) >> IWM_OFDM_RSSI_B_POS; /* * dBm = rssi dB - agc dB - constant. * Higher AGC (higher radio gain) means lower signal. */ rssi_a_dbm = rssi_a - IWM_RSSI_OFFSET - agc_a; rssi_b_dbm = rssi_b - IWM_RSSI_OFFSET - agc_b; max_rssi_dbm = MAX(rssi_a_dbm, rssi_b_dbm); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "Rssi In A %d B %d Max %d AGCA %d AGCB %d\n", rssi_a_dbm, rssi_b_dbm, max_rssi_dbm, agc_a, agc_b); return max_rssi_dbm; } /* iwlwifi: mvm/rx.c */ /* * iwm_mvm_get_signal_strength - use new rx PHY INFO API * values are reported by the fw as positive values - need to negate * to obtain their dBM. Account for missing antennas by replacing 0 * values by -256dBm: practically 0 power and a non-feasible 8 bit value. */ static int iwm_mvm_get_signal_strength(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info) { int energy_a, energy_b, energy_c, max_energy; uint32_t val; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_ENERGY_ANT_ABC_IDX]); energy_a = (val & IWM_RX_INFO_ENERGY_ANT_A_MSK) >> IWM_RX_INFO_ENERGY_ANT_A_POS; energy_a = energy_a ? -energy_a : -256; energy_b = (val & IWM_RX_INFO_ENERGY_ANT_B_MSK) >> IWM_RX_INFO_ENERGY_ANT_B_POS; energy_b = energy_b ? -energy_b : -256; energy_c = (val & IWM_RX_INFO_ENERGY_ANT_C_MSK) >> IWM_RX_INFO_ENERGY_ANT_C_POS; energy_c = energy_c ? -energy_c : -256; max_energy = MAX(energy_a, energy_b); max_energy = MAX(max_energy, energy_c); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "energy In A %d B %d C %d , and max %d\n", energy_a, energy_b, energy_c, max_energy); return max_energy; } static void iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct iwm_rx_phy_info *phy_info = (void *)pkt->data; IWM_DPRINTF(sc, IWM_DEBUG_RECV, "received PHY stats\n"); bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); memcpy(&sc->sc_last_phy_info, phy_info, sizeof(sc->sc_last_phy_info)); } /* * Retrieve the average noise (in dBm) among receivers. */ static int iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *stats) { int i, total, nbant, noise; total = nbant = noise = 0; for (i = 0; i < 3; i++) { noise = le32toh(stats->beacon_silence_rssi[i]) & 0xff; if (noise) { total += noise; nbant++; } } /* There should be at least one antenna but check anyway. */ return (nbant == 0) ? -127 : (total / nbant) - 107; } /* * iwm_mvm_rx_rx_mpdu - IWM_REPLY_RX_MPDU_CMD handler * * Handles the actual data of the Rx packet from the fw */ static void iwm_mvm_rx_rx_mpdu(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_frame *wh; struct ieee80211_node *ni; struct ieee80211_rx_stats rxs; struct mbuf *m; struct iwm_rx_phy_info *phy_info; struct iwm_rx_mpdu_res_start *rx_res; uint32_t len; uint32_t rx_pkt_status; int rssi; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); phy_info = &sc->sc_last_phy_info; rx_res = (struct iwm_rx_mpdu_res_start *)pkt->data; wh = (struct ieee80211_frame *)(pkt->data + sizeof(*rx_res)); len = le16toh(rx_res->byte_count); rx_pkt_status = le32toh(*(uint32_t *)(pkt->data + sizeof(*rx_res) + len)); m = data->m; m->m_data = pkt->data + sizeof(*rx_res); m->m_pkthdr.len = m->m_len = len; if (__predict_false(phy_info->cfg_phy_cnt > 20)) { device_printf(sc->sc_dev, "dsp size out of range [0,20]: %d\n", phy_info->cfg_phy_cnt); return; } if (!(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_CRC_OK) || !(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_OVERRUN_OK)) { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "Bad CRC or FIFO: 0x%08X.\n", rx_pkt_status); return; /* drop */ } if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_RX_ENERGY_API) { rssi = iwm_mvm_get_signal_strength(sc, phy_info); } else { rssi = iwm_mvm_calc_rssi(sc, phy_info); } rssi = (0 - IWM_MIN_DBM) + rssi; /* normalize */ rssi = MIN(rssi, sc->sc_max_rssi); /* clip to max. 100% */ /* replenish ring for the buffer we're going to feed to the sharks */ if (iwm_rx_addbuf(sc, IWM_RBUF_SIZE, sc->rxq.cur) != 0) { device_printf(sc->sc_dev, "%s: unable to add more buffers\n", __func__); return; } ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "%s: phy_info: channel=%d, flags=0x%08x\n", __func__, le16toh(phy_info->channel), le16toh(phy_info->phy_flags)); /* * Populate an RX state struct with the provided information. */ bzero(&rxs, sizeof(rxs)); rxs.r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ; rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI; rxs.c_ieee = le16toh(phy_info->channel); if (le16toh(phy_info->phy_flags & IWM_RX_RES_PHY_FLAGS_BAND_24)) { rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_2GHZ); } else { rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_5GHZ); } rxs.rssi = rssi - sc->sc_noise; rxs.nf = sc->sc_noise; if (ieee80211_radiotap_active_vap(vap)) { struct iwm_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = 0; if (phy_info->phy_flags & htole16(IWM_PHY_INFO_FLAG_SHPREAMBLE)) tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE; tap->wr_chan_freq = htole16(rxs.c_freq); /* XXX only if ic->ic_curchan->ic_ieee == rxs.c_ieee */ tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags); tap->wr_dbm_antsignal = (int8_t)rssi; tap->wr_dbm_antnoise = (int8_t)sc->sc_noise; tap->wr_tsft = phy_info->system_timestamp; switch (phy_info->rate) { /* CCK rates. */ case 10: tap->wr_rate = 2; break; case 20: tap->wr_rate = 4; break; case 55: tap->wr_rate = 11; break; case 110: tap->wr_rate = 22; break; /* OFDM rates. */ case 0xd: tap->wr_rate = 12; break; case 0xf: tap->wr_rate = 18; break; case 0x5: tap->wr_rate = 24; break; case 0x7: tap->wr_rate = 36; break; case 0x9: tap->wr_rate = 48; break; case 0xb: tap->wr_rate = 72; break; case 0x1: tap->wr_rate = 96; break; case 0x3: tap->wr_rate = 108; break; /* Unknown rate: should not happen. */ default: tap->wr_rate = 0; } } IWM_UNLOCK(sc); if (ni != NULL) { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "input m %p\n", m); ieee80211_input_mimo(ni, m, &rxs); ieee80211_free_node(ni); } else { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "inputall m %p\n", m); ieee80211_input_mimo_all(ic, m, &rxs); } IWM_LOCK(sc); } static int iwm_mvm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_node *in) { struct iwm_mvm_tx_resp *tx_resp = (void *)pkt->data; struct ieee80211_node *ni = &in->in_ni; struct ieee80211vap *vap = ni->ni_vap; int status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK; int failack = tx_resp->failure_frame; KASSERT(tx_resp->frame_count == 1, ("too many frames")); /* Update rate control statistics. */ IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: status=0x%04x, seq=%d, fc=%d, btc=%d, frts=%d, ff=%d, irate=%08x, wmt=%d\n", __func__, (int) le16toh(tx_resp->status.status), (int) le16toh(tx_resp->status.sequence), tx_resp->frame_count, tx_resp->bt_kill_count, tx_resp->failure_rts, tx_resp->failure_frame, le32toh(tx_resp->initial_rate), (int) le16toh(tx_resp->wireless_media_time)); if (status != IWM_TX_STATUS_SUCCESS && status != IWM_TX_STATUS_DIRECT_DONE) { ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_FAILURE, &failack, NULL); return (1); } else { ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_SUCCESS, &failack, NULL); return (0); } } static void iwm_mvm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct iwm_cmd_header *cmd_hdr = &pkt->hdr; int idx = cmd_hdr->idx; int qid = cmd_hdr->qid; struct iwm_tx_ring *ring = &sc->txq[qid]; struct iwm_tx_data *txd = &ring->data[idx]; struct iwm_node *in = txd->in; struct mbuf *m = txd->m; int status; KASSERT(txd->done == 0, ("txd not done")); KASSERT(txd->in != NULL, ("txd without node")); KASSERT(txd->m != NULL, ("txd without mbuf")); bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); sc->sc_tx_timer = 0; status = iwm_mvm_rx_tx_cmd_single(sc, pkt, in); /* Unmap and free mbuf. */ bus_dmamap_sync(ring->data_dmat, txd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, txd->map); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "free txd %p, in %p\n", txd, txd->in); txd->done = 1; txd->m = NULL; txd->in = NULL; ieee80211_tx_complete(&in->in_ni, m, status); if (--ring->queued < IWM_TX_RING_LOMARK) { sc->qfullmsk &= ~(1 << ring->qid); if (sc->qfullmsk == 0) { /* * Well, we're in interrupt context, but then again * I guess net80211 does all sorts of stunts in * interrupt context, so maybe this is no biggie. */ iwm_start(sc); } } } /* * transmit side */ /* * Process a "command done" firmware notification. This is where we wakeup * processes waiting for a synchronous command completion. * from if_iwn */ static void iwm_cmd_done(struct iwm_softc *sc, struct iwm_rx_packet *pkt) { struct iwm_tx_ring *ring = &sc->txq[IWM_MVM_CMD_QUEUE]; struct iwm_tx_data *data; if (pkt->hdr.qid != IWM_MVM_CMD_QUEUE) { return; /* Not a command ack. */ } data = &ring->data[pkt->hdr.idx]; /* If the command was mapped in an mbuf, free it. */ if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } wakeup(&ring->desc[pkt->hdr.idx]); } #if 0 /* * necessary only for block ack mode */ void iwm_update_sched(struct iwm_softc *sc, int qid, int idx, uint8_t sta_id, uint16_t len) { struct iwm_agn_scd_bc_tbl *scd_bc_tbl; uint16_t w_val; scd_bc_tbl = sc->sched_dma.vaddr; len += 8; /* magic numbers came naturally from paris */ if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_DW_BC_TABLE) len = roundup(len, 4) / 4; w_val = htole16(sta_id << 12 | len); /* Update TX scheduler. */ scd_bc_tbl[qid].tfd_offset[idx] = w_val; bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map, BUS_DMASYNC_PREWRITE); /* I really wonder what this is ?!? */ if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP) { scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = w_val; bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map, BUS_DMASYNC_PREWRITE); } } #endif /* * Take an 802.11 (non-n) rate, find the relevant rate * table entry. return the index into in_ridx[]. * * The caller then uses that index back into in_ridx * to figure out the rate index programmed /into/ * the firmware for this given node. */ static int iwm_tx_rateidx_lookup(struct iwm_softc *sc, struct iwm_node *in, uint8_t rate) { int i; uint8_t r; for (i = 0; i < nitems(in->in_ridx); i++) { r = iwm_rates[in->in_ridx[i]].rate; if (rate == r) return (i); } /* XXX Return the first */ /* XXX TODO: have it return the /lowest/ */ return (0); } /* * Fill in the rate related information for a transmit command. */ static const struct iwm_rate * iwm_tx_fill_cmd(struct iwm_softc *sc, struct iwm_node *in, struct ieee80211_frame *wh, struct iwm_tx_cmd *tx) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni = &in->in_ni; const struct iwm_rate *rinfo; int type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; int ridx, rate_flags; tx->rts_retry_limit = IWM_RTS_DFAULT_RETRY_LIMIT; tx->data_retry_limit = IWM_DEFAULT_TX_RETRY; /* * XXX TODO: everything about the rate selection here is terrible! */ if (type == IEEE80211_FC0_TYPE_DATA) { int i; /* for data frames, use RS table */ (void) ieee80211_ratectl_rate(ni, NULL, 0); i = iwm_tx_rateidx_lookup(sc, in, ni->ni_txrate); ridx = in->in_ridx[i]; /* This is the index into the programmed table */ tx->initial_rate_index = i; tx->tx_flags |= htole32(IWM_TX_CMD_FLG_STA_RATE); IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TXRATE, "%s: start with i=%d, txrate %d\n", __func__, i, iwm_rates[ridx].rate); } else { /* * For non-data, use the lowest supported rate for the given * operational mode. * * Note: there may not be any rate control information available. * This driver currently assumes if we're transmitting data * frames, use the rate control table. Grr. * * XXX TODO: use the configured rate for the traffic type! * XXX TODO: this should be per-vap, not curmode; as we later * on we'll want to handle off-channel stuff (eg TDLS). */ if (ic->ic_curmode == IEEE80211_MODE_11A) { /* * XXX this assumes the mode is either 11a or not 11a; * definitely won't work for 11n. */ ridx = IWM_RIDX_OFDM; } else { ridx = IWM_RIDX_CCK; } } rinfo = &iwm_rates[ridx]; IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: ridx=%d; rate=%d, CCK=%d\n", __func__, ridx, rinfo->rate, !! (IWM_RIDX_IS_CCK(ridx)) ); /* XXX TODO: hard-coded TX antenna? */ rate_flags = 1 << IWM_RATE_MCS_ANT_POS; if (IWM_RIDX_IS_CCK(ridx)) rate_flags |= IWM_RATE_MCS_CCK_MSK; tx->rate_n_flags = htole32(rate_flags | rinfo->plcp); return rinfo; } #define TB0_SIZE 16 static int iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ieee80211_node *ni, int ac) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwm_node *in = IWM_NODE(ni); struct iwm_tx_ring *ring; struct iwm_tx_data *data; struct iwm_tfd *desc; struct iwm_device_cmd *cmd; struct iwm_tx_cmd *tx; struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct mbuf *m1; const struct iwm_rate *rinfo; uint32_t flags; u_int hdrlen; bus_dma_segment_t *seg, segs[IWM_MAX_SCATTER]; int nsegs; uint8_t tid, type; int i, totlen, error, pad; wh = mtod(m, struct ieee80211_frame *); hdrlen = ieee80211_anyhdrsize(wh); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; tid = 0; ring = &sc->txq[ac]; desc = &ring->desc[ring->cur]; memset(desc, 0, sizeof(*desc)); data = &ring->data[ring->cur]; /* Fill out iwm_tx_cmd to send to the firmware */ cmd = &ring->cmd[ring->cur]; cmd->hdr.code = IWM_TX_CMD; cmd->hdr.flags = 0; cmd->hdr.qid = ring->qid; cmd->hdr.idx = ring->cur; tx = (void *)cmd->data; memset(tx, 0, sizeof(*tx)); rinfo = iwm_tx_fill_cmd(sc, in, wh, tx); /* Encrypt the frame if need be. */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { /* Retrieve key for TX && do software encryption. */ k = ieee80211_crypto_encap(ni, m); if (k == NULL) { m_freem(m); return (ENOBUFS); } /* 802.11 header may have moved. */ wh = mtod(m, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { struct iwm_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_chan_freq = htole16(ni->ni_chan->ic_freq); tap->wt_chan_flags = htole16(ni->ni_chan->ic_flags); tap->wt_rate = rinfo->rate; if (k != NULL) tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP; ieee80211_radiotap_tx(vap, m); } totlen = m->m_pkthdr.len; flags = 0; if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= IWM_TX_CMD_FLG_ACK; } if (type == IEEE80211_FC0_TYPE_DATA && (totlen + IEEE80211_CRC_LEN > vap->iv_rtsthreshold) && !IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= IWM_TX_CMD_FLG_PROT_REQUIRE; } if (IEEE80211_IS_MULTICAST(wh->i_addr1) || type != IEEE80211_FC0_TYPE_DATA) tx->sta_id = sc->sc_aux_sta.sta_id; else tx->sta_id = IWM_STATION_ID; if (type == IEEE80211_FC0_TYPE_MGT) { uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ || subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ) { tx->pm_frame_timeout = htole16(IWM_PM_FRAME_ASSOC); } else if (subtype == IEEE80211_FC0_SUBTYPE_ACTION) { tx->pm_frame_timeout = htole16(IWM_PM_FRAME_NONE); } else { tx->pm_frame_timeout = htole16(IWM_PM_FRAME_MGMT); } } else { tx->pm_frame_timeout = htole16(IWM_PM_FRAME_NONE); } if (hdrlen & 3) { /* First segment length must be a multiple of 4. */ flags |= IWM_TX_CMD_FLG_MH_PAD; pad = 4 - (hdrlen & 3); } else pad = 0; tx->driver_txop = 0; tx->next_frame_len = 0; tx->len = htole16(totlen); tx->tid_tspec = tid; tx->life_time = htole32(IWM_TX_CMD_LIFE_TIME_INFINITE); /* Set physical address of "scratch area". */ tx->dram_lsb_ptr = htole32(data->scratch_paddr); tx->dram_msb_ptr = iwm_get_dma_hi_addr(data->scratch_paddr); /* Copy 802.11 header in TX command. */ memcpy(((uint8_t *)tx) + sizeof(*tx), wh, hdrlen); flags |= IWM_TX_CMD_FLG_BT_DIS | IWM_TX_CMD_FLG_SEQ_CTL; tx->sec_ctl = 0; tx->tx_flags |= htole32(flags); /* Trim 802.11 header. */ m_adj(m, hdrlen); error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { if (error != EFBIG) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return error; } /* Too many DMA segments, linearize mbuf. */ m1 = m_collapse(m, M_NOWAIT, IWM_MAX_SCATTER - 2); if (m1 == NULL) { device_printf(sc->sc_dev, "%s: could not defrag mbuf\n", __func__); m_freem(m); return (ENOBUFS); } m = m1; error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return error; } } data->m = m; data->in = in; data->done = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "sending txd %p, in %p\n", data, data->in); KASSERT(data->in != NULL, ("node is NULL")); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "sending data: qid=%d idx=%d len=%d nsegs=%d txflags=0x%08x rate_n_flags=0x%08x rateidx=%u\n", ring->qid, ring->cur, totlen, nsegs, le32toh(tx->tx_flags), le32toh(tx->rate_n_flags), tx->initial_rate_index ); /* Fill TX descriptor. */ desc->num_tbs = 2 + nsegs; desc->tbs[0].lo = htole32(data->cmd_paddr); desc->tbs[0].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) | (TB0_SIZE << 4); desc->tbs[1].lo = htole32(data->cmd_paddr + TB0_SIZE); desc->tbs[1].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) | ((sizeof(struct iwm_cmd_header) + sizeof(*tx) + hdrlen + pad - TB0_SIZE) << 4); /* Other DMA segments are for data payload. */ for (i = 0; i < nsegs; i++) { seg = &segs[i]; desc->tbs[i+2].lo = htole32(seg->ds_addr); desc->tbs[i+2].hi_n_len = \ htole16(iwm_get_dma_hi_addr(seg->ds_addr)) | ((seg->ds_len) << 4); } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->cmd_dma.tag, ring->cmd_dma.map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); #if 0 iwm_update_sched(sc, ring->qid, ring->cur, tx->sta_id, le16toh(tx->len)); #endif /* Kick TX ring. */ ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT; IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, ring->qid << 8 | ring->cur); /* Mark TX ring as full if we reach a certain threshold. */ if (++ring->queued > IWM_TX_RING_HIMARK) { sc->qfullmsk |= 1 << ring->qid; } return 0; } static int iwm_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct iwm_softc *sc = ic->ic_softc; int error = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "->%s begin\n", __func__); if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) { m_freem(m); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "<-%s not RUNNING\n", __func__); return (ENETDOWN); } IWM_LOCK(sc); /* XXX fix this */ if (params == NULL) { error = iwm_tx(sc, m, ni, 0); } else { error = iwm_tx(sc, m, ni, 0); } sc->sc_tx_timer = 5; IWM_UNLOCK(sc); return (error); } /* * mvm/tx.c */ #if 0 /* * Note that there are transports that buffer frames before they reach * the firmware. This means that after flush_tx_path is called, the * queue might not be empty. The race-free way to handle this is to: * 1) set the station as draining * 2) flush the Tx path * 3) wait for the transport queues to be empty */ int iwm_mvm_flush_tx_path(struct iwm_softc *sc, int tfd_msk, int sync) { struct iwm_tx_path_flush_cmd flush_cmd = { .queues_ctl = htole32(tfd_msk), .flush_ctl = htole16(IWM_DUMP_TX_FIFO_FLUSH), }; int ret; ret = iwm_mvm_send_cmd_pdu(sc, IWM_TXPATH_FLUSH, sync ? IWM_CMD_SYNC : IWM_CMD_ASYNC, sizeof(flush_cmd), &flush_cmd); if (ret) device_printf(sc->sc_dev, "Flushing tx queue failed: %d\n", ret); return ret; } #endif /* * BEGIN mvm/sta.c */ static int iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *sc, struct iwm_mvm_add_sta_cmd_v7 *cmd, int *status) { return iwm_mvm_send_cmd_pdu_status(sc, IWM_ADD_STA, sizeof(*cmd), cmd, status); } /* send station add/update command to firmware */ static int iwm_mvm_sta_send_to_fw(struct iwm_softc *sc, struct iwm_node *in, int update) { struct iwm_mvm_add_sta_cmd_v7 add_sta_cmd; int ret; uint32_t status; memset(&add_sta_cmd, 0, sizeof(add_sta_cmd)); add_sta_cmd.sta_id = IWM_STATION_ID; add_sta_cmd.mac_id_n_color = htole32(IWM_FW_CMD_ID_AND_COLOR(IWM_DEFAULT_MACID, IWM_DEFAULT_COLOR)); if (!update) { int ac; for (ac = 0; ac < WME_NUM_AC; ac++) { add_sta_cmd.tfd_queue_msk |= htole32(1 << iwm_mvm_ac_to_tx_fifo[ac]); } IEEE80211_ADDR_COPY(&add_sta_cmd.addr, in->in_ni.ni_bssid); } add_sta_cmd.add_modify = update ? 1 : 0; add_sta_cmd.station_flags_msk |= htole32(IWM_STA_FLG_FAT_EN_MSK | IWM_STA_FLG_MIMO_EN_MSK); add_sta_cmd.tid_disable_tx = htole16(0xffff); if (update) add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_TID_DISABLE_TX); status = IWM_ADD_STA_SUCCESS; ret = iwm_mvm_send_add_sta_cmd_status(sc, &add_sta_cmd, &status); if (ret) return ret; switch (status) { case IWM_ADD_STA_SUCCESS: break; default: ret = EIO; device_printf(sc->sc_dev, "IWM_ADD_STA failed\n"); break; } return ret; } static int iwm_mvm_add_sta(struct iwm_softc *sc, struct iwm_node *in) { return iwm_mvm_sta_send_to_fw(sc, in, 0); } static int iwm_mvm_update_sta(struct iwm_softc *sc, struct iwm_node *in) { return iwm_mvm_sta_send_to_fw(sc, in, 1); } static int iwm_mvm_add_int_sta_common(struct iwm_softc *sc, struct iwm_int_sta *sta, const uint8_t *addr, uint16_t mac_id, uint16_t color) { struct iwm_mvm_add_sta_cmd_v7 cmd; int ret; uint32_t status; memset(&cmd, 0, sizeof(cmd)); cmd.sta_id = sta->sta_id; cmd.mac_id_n_color = htole32(IWM_FW_CMD_ID_AND_COLOR(mac_id, color)); cmd.tfd_queue_msk = htole32(sta->tfd_queue_msk); cmd.tid_disable_tx = htole16(0xffff); if (addr) IEEE80211_ADDR_COPY(cmd.addr, addr); ret = iwm_mvm_send_add_sta_cmd_status(sc, &cmd, &status); if (ret) return ret; switch (status) { case IWM_ADD_STA_SUCCESS: IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: Internal station added.\n", __func__); return 0; default: device_printf(sc->sc_dev, "%s: Add internal station failed, status=0x%x\n", __func__, status); ret = EIO; break; } return ret; } static int iwm_mvm_add_aux_sta(struct iwm_softc *sc) { int ret; sc->sc_aux_sta.sta_id = IWM_AUX_STA_ID; sc->sc_aux_sta.tfd_queue_msk = (1 << IWM_MVM_AUX_QUEUE); ret = iwm_enable_txq(sc, 0, IWM_MVM_AUX_QUEUE, IWM_MVM_TX_FIFO_MCAST); if (ret) return ret; ret = iwm_mvm_add_int_sta_common(sc, &sc->sc_aux_sta, NULL, IWM_MAC_INDEX_AUX, 0); if (ret) memset(&sc->sc_aux_sta, 0, sizeof(sc->sc_aux_sta)); return ret; } /* * END mvm/sta.c */ /* * BEGIN mvm/quota.c */ static int iwm_mvm_update_quotas(struct iwm_softc *sc, struct iwm_node *in) { struct iwm_time_quota_cmd cmd; int i, idx, ret, num_active_macs, quota, quota_rem; int colors[IWM_MAX_BINDINGS] = { -1, -1, -1, -1, }; int n_ifs[IWM_MAX_BINDINGS] = {0, }; uint16_t id; memset(&cmd, 0, sizeof(cmd)); /* currently, PHY ID == binding ID */ if (in) { id = in->in_phyctxt->id; KASSERT(id < IWM_MAX_BINDINGS, ("invalid id")); colors[id] = in->in_phyctxt->color; if (1) n_ifs[id] = 1; } /* * The FW's scheduling session consists of * IWM_MVM_MAX_QUOTA fragments. Divide these fragments * equally between all the bindings that require quota */ num_active_macs = 0; for (i = 0; i < IWM_MAX_BINDINGS; i++) { cmd.quotas[i].id_and_color = htole32(IWM_FW_CTXT_INVALID); num_active_macs += n_ifs[i]; } quota = 0; quota_rem = 0; if (num_active_macs) { quota = IWM_MVM_MAX_QUOTA / num_active_macs; quota_rem = IWM_MVM_MAX_QUOTA % num_active_macs; } for (idx = 0, i = 0; i < IWM_MAX_BINDINGS; i++) { if (colors[i] < 0) continue; cmd.quotas[idx].id_and_color = htole32(IWM_FW_CMD_ID_AND_COLOR(i, colors[i])); if (n_ifs[i] <= 0) { cmd.quotas[idx].quota = htole32(0); cmd.quotas[idx].max_duration = htole32(0); } else { cmd.quotas[idx].quota = htole32(quota * n_ifs[i]); cmd.quotas[idx].max_duration = htole32(0); } idx++; } /* Give the remainder of the session to the first binding */ cmd.quotas[0].quota = htole32(le32toh(cmd.quotas[0].quota) + quota_rem); ret = iwm_mvm_send_cmd_pdu(sc, IWM_TIME_QUOTA_CMD, IWM_CMD_SYNC, sizeof(cmd), &cmd); if (ret) device_printf(sc->sc_dev, "%s: Failed to send quota: %d\n", __func__, ret); return ret; } /* * END mvm/quota.c */ /* * ieee80211 routines */ /* * Change to AUTH state in 80211 state machine. Roughly matches what * Linux does in bss_info_changed(). */ static int iwm_auth(struct ieee80211vap *vap, struct iwm_softc *sc) { struct ieee80211_node *ni; struct iwm_node *in; struct iwm_vap *iv = IWM_VAP(vap); uint32_t duration; int error; /* * XXX i have a feeling that the vap node is being * freed from underneath us. Grr. */ ni = ieee80211_ref_node(vap->iv_bss); in = IWM_NODE(ni); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_STATE, "%s: called; vap=%p, bss ni=%p\n", __func__, vap, ni); in->in_assoc = 0; error = iwm_mvm_sf_config(sc, IWM_SF_FULL_ON); if (error != 0) return error; error = iwm_allow_mcast(vap, sc); if (error) { device_printf(sc->sc_dev, "%s: failed to set multicast\n", __func__); goto out; } /* * This is where it deviates from what Linux does. * * Linux iwlwifi doesn't reset the nic each time, nor does it * call ctxt_add() here. Instead, it adds it during vap creation, * and always does a mac_ctx_changed(). * * The openbsd port doesn't attempt to do that - it reset things * at odd states and does the add here. * * So, until the state handling is fixed (ie, we never reset * the NIC except for a firmware failure, which should drag * the NIC back to IDLE, re-setup and re-add all the mac/phy * contexts that are required), let's do a dirty hack here. */ if (iv->is_uploaded) { if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to update MAC\n", __func__); goto out; } if ((error = iwm_mvm_phy_ctxt_changed(sc, &sc->sc_phyctxt[0], in->in_ni.ni_chan, 1, 1)) != 0) { device_printf(sc->sc_dev, "%s: failed update phy ctxt\n", __func__); goto out; } in->in_phyctxt = &sc->sc_phyctxt[0]; if ((error = iwm_mvm_binding_update(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: binding update cmd\n", __func__); goto out; } if ((error = iwm_mvm_update_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: failed to update sta\n", __func__); goto out; } } else { if ((error = iwm_mvm_mac_ctxt_add(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to add MAC\n", __func__); goto out; } if ((error = iwm_mvm_phy_ctxt_changed(sc, &sc->sc_phyctxt[0], in->in_ni.ni_chan, 1, 1)) != 0) { device_printf(sc->sc_dev, "%s: failed add phy ctxt!\n", __func__); error = ETIMEDOUT; goto out; } in->in_phyctxt = &sc->sc_phyctxt[0]; if ((error = iwm_mvm_binding_add_vif(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: binding add cmd\n", __func__); goto out; } if ((error = iwm_mvm_add_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: failed to add sta\n", __func__); goto out; } } /* * Prevent the FW from wandering off channel during association * by "protecting" the session with a time event. */ /* XXX duration is in units of TU, not MS */ duration = IWM_MVM_TE_SESSION_PROTECTION_MAX_TIME_MS; iwm_mvm_protect_session(sc, in, duration, 500 /* XXX magic number */); DELAY(100); error = 0; out: ieee80211_free_node(ni); return (error); } static int iwm_assoc(struct ieee80211vap *vap, struct iwm_softc *sc) { struct iwm_node *in = IWM_NODE(vap->iv_bss); int error; if ((error = iwm_mvm_update_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: failed to update STA\n", __func__); return error; } in->in_assoc = 1; if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to update MAC\n", __func__); return error; } return 0; } static int iwm_release(struct iwm_softc *sc, struct iwm_node *in) { /* * Ok, so *technically* the proper set of calls for going * from RUN back to SCAN is: * * iwm_mvm_power_mac_disable(sc, in); * iwm_mvm_mac_ctxt_changed(sc, in); * iwm_mvm_rm_sta(sc, in); * iwm_mvm_update_quotas(sc, NULL); * iwm_mvm_mac_ctxt_changed(sc, in); * iwm_mvm_binding_remove_vif(sc, in); * iwm_mvm_mac_ctxt_remove(sc, in); * * However, that freezes the device not matter which permutations * and modifications are attempted. Obviously, this driver is missing * something since it works in the Linux driver, but figuring out what * is missing is a little more complicated. Now, since we're going * back to nothing anyway, we'll just do a complete device reset. * Up your's, device! */ /* iwm_mvm_flush_tx_path(sc, 0xf, 1); */ iwm_stop_device(sc); iwm_init_hw(sc); if (in) in->in_assoc = 0; return 0; #if 0 int error; iwm_mvm_power_mac_disable(sc, in); if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) { device_printf(sc->sc_dev, "mac ctxt change fail 1 %d\n", error); return error; } if ((error = iwm_mvm_rm_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "sta remove fail %d\n", error); return error; } error = iwm_mvm_rm_sta(sc, in); in->in_assoc = 0; iwm_mvm_update_quotas(sc, NULL); if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) { device_printf(sc->sc_dev, "mac ctxt change fail 2 %d\n", error); return error; } iwm_mvm_binding_remove_vif(sc, in); iwm_mvm_mac_ctxt_remove(sc, in); return error; #endif } static struct ieee80211_node * iwm_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { return malloc(sizeof (struct iwm_node), M_80211_NODE, M_NOWAIT | M_ZERO); } static void iwm_setrates(struct iwm_softc *sc, struct iwm_node *in) { struct ieee80211_node *ni = &in->in_ni; struct iwm_lq_cmd *lq = &in->in_lq; int nrates = ni->ni_rates.rs_nrates; int i, ridx, tab = 0; int txant = 0; if (nrates > nitems(lq->rs_table)) { device_printf(sc->sc_dev, "%s: node supports %d rates, driver handles " "only %zu\n", __func__, nrates, nitems(lq->rs_table)); return; } if (nrates == 0) { device_printf(sc->sc_dev, "%s: node supports 0 rates, odd!\n", __func__); return; } /* * XXX .. and most of iwm_node is not initialised explicitly; * it's all just 0x0 passed to the firmware. */ /* first figure out which rates we should support */ /* XXX TODO: this isn't 11n aware /at all/ */ memset(&in->in_ridx, -1, sizeof(in->in_ridx)); IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: nrates=%d\n", __func__, nrates); /* * Loop over nrates and populate in_ridx from the highest * rate to the lowest rate. Remember, in_ridx[] has * IEEE80211_RATE_MAXSIZE entries! */ for (i = 0; i < min(nrates, IEEE80211_RATE_MAXSIZE); i++) { int rate = ni->ni_rates.rs_rates[(nrates - 1) - i] & IEEE80211_RATE_VAL; /* Map 802.11 rate to HW rate index. */ for (ridx = 0; ridx <= IWM_RIDX_MAX; ridx++) if (iwm_rates[ridx].rate == rate) break; if (ridx > IWM_RIDX_MAX) { device_printf(sc->sc_dev, "%s: WARNING: device rate for %d not found!\n", __func__, rate); } else { IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: rate: i: %d, rate=%d, ridx=%d\n", __func__, i, rate, ridx); in->in_ridx[i] = ridx; } } /* then construct a lq_cmd based on those */ memset(lq, 0, sizeof(*lq)); lq->sta_id = IWM_STATION_ID; /* For HT, always enable RTS/CTS to avoid excessive retries. */ if (ni->ni_flags & IEEE80211_NODE_HT) lq->flags |= IWM_LQ_FLAG_USE_RTS_MSK; /* * are these used? (we don't do SISO or MIMO) * need to set them to non-zero, though, or we get an error. */ lq->single_stream_ant_msk = 1; lq->dual_stream_ant_msk = 1; /* * Build the actual rate selection table. * The lowest bits are the rates. Additionally, * CCK needs bit 9 to be set. The rest of the bits * we add to the table select the tx antenna * Note that we add the rates in the highest rate first * (opposite of ni_rates). */ /* * XXX TODO: this should be looping over the min of nrates * and LQ_MAX_RETRY_NUM. Sigh. */ for (i = 0; i < nrates; i++) { int nextant; if (txant == 0) txant = iwm_fw_valid_tx_ant(sc); nextant = 1<<(ffs(txant)-1); txant &= ~nextant; /* * Map the rate id into a rate index into * our hardware table containing the * configuration to use for this rate. */ ridx = in->in_ridx[i]; tab = iwm_rates[ridx].plcp; tab |= nextant << IWM_RATE_MCS_ANT_POS; if (IWM_RIDX_IS_CCK(ridx)) tab |= IWM_RATE_MCS_CCK_MSK; IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "station rate i=%d, rate=%d, hw=%x\n", i, iwm_rates[ridx].rate, tab); lq->rs_table[i] = htole32(tab); } /* then fill the rest with the lowest possible rate */ for (i = nrates; i < nitems(lq->rs_table); i++) { KASSERT(tab != 0, ("invalid tab")); lq->rs_table[i] = htole32(tab); } } static int iwm_media_change(struct ifnet *ifp) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; struct iwm_softc *sc = ic->ic_softc; int error; error = ieee80211_media_change(ifp); if (error != ENETRESET) return error; IWM_LOCK(sc); if (ic->ic_nrunning > 0) { iwm_stop(sc); iwm_init(sc); } IWM_UNLOCK(sc); return error; } static int iwm_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct iwm_vap *ivp = IWM_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct iwm_softc *sc = ic->ic_softc; struct iwm_node *in; int error; IWM_DPRINTF(sc, IWM_DEBUG_STATE, "switching state %s -> %s\n", ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate]); IEEE80211_UNLOCK(ic); IWM_LOCK(sc); if (vap->iv_state == IEEE80211_S_SCAN && nstate != vap->iv_state) iwm_led_blink_stop(sc); /* disable beacon filtering if we're hopping out of RUN */ if (vap->iv_state == IEEE80211_S_RUN && nstate != vap->iv_state) { iwm_mvm_disable_beacon_filter(sc); if (((in = IWM_NODE(vap->iv_bss)) != NULL)) in->in_assoc = 0; iwm_release(sc, NULL); /* * It's impossible to directly go RUN->SCAN. If we iwm_release() * above then the card will be completely reinitialized, * so the driver must do everything necessary to bring the card * from INIT to SCAN. * * Additionally, upon receiving deauth frame from AP, * OpenBSD 802.11 stack puts the driver in IEEE80211_S_AUTH * state. This will also fail with this driver, so bring the FSM * from IEEE80211_S_RUN to IEEE80211_S_SCAN in this case as well. * * XXX TODO: fix this for FreeBSD! */ if (nstate == IEEE80211_S_SCAN || nstate == IEEE80211_S_AUTH || nstate == IEEE80211_S_ASSOC) { IWM_DPRINTF(sc, IWM_DEBUG_STATE, "Force transition to INIT; MGT=%d\n", arg); IWM_UNLOCK(sc); IEEE80211_LOCK(ic); /* Always pass arg as -1 since we can't Tx right now. */ /* * XXX arg is just ignored anyway when transitioning * to IEEE80211_S_INIT. */ vap->iv_newstate(vap, IEEE80211_S_INIT, -1); IWM_DPRINTF(sc, IWM_DEBUG_STATE, "Going INIT->SCAN\n"); nstate = IEEE80211_S_SCAN; IEEE80211_UNLOCK(ic); IWM_LOCK(sc); } } switch (nstate) { case IEEE80211_S_INIT: break; case IEEE80211_S_AUTH: if ((error = iwm_auth(vap, sc)) != 0) { device_printf(sc->sc_dev, "%s: could not move to auth state: %d\n", __func__, error); break; } break; case IEEE80211_S_ASSOC: if ((error = iwm_assoc(vap, sc)) != 0) { device_printf(sc->sc_dev, "%s: failed to associate: %d\n", __func__, error); break; } break; case IEEE80211_S_RUN: { struct iwm_host_cmd cmd = { .id = IWM_LQ_CMD, .len = { sizeof(in->in_lq), }, .flags = IWM_CMD_SYNC, }; /* Update the association state, now we have it all */ /* (eg associd comes in at this point */ error = iwm_assoc(vap, sc); if (error != 0) { device_printf(sc->sc_dev, "%s: failed to update association state: %d\n", __func__, error); break; } in = IWM_NODE(vap->iv_bss); iwm_mvm_power_mac_update_mode(sc, in); iwm_mvm_enable_beacon_filter(sc, in); iwm_mvm_update_quotas(sc, in); iwm_setrates(sc, in); cmd.data[0] = &in->in_lq; if ((error = iwm_send_cmd(sc, &cmd)) != 0) { device_printf(sc->sc_dev, "%s: IWM_LQ_CMD failed\n", __func__); } iwm_mvm_led_enable(sc); break; } default: break; } IWM_UNLOCK(sc); IEEE80211_LOCK(ic); return (ivp->iv_newstate(vap, nstate, arg)); } void iwm_endscan_cb(void *arg, int pending) { struct iwm_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; IWM_DPRINTF(sc, IWM_DEBUG_SCAN | IWM_DEBUG_TRACE, "%s: scan ended\n", __func__); ieee80211_scan_done(TAILQ_FIRST(&ic->ic_vaps)); } /* * Aging and idle timeouts for the different possible scenarios * in default configuration */ static const uint32_t iwm_sf_full_timeout_def[IWM_SF_NUM_SCENARIO][IWM_SF_NUM_TIMEOUT_TYPES] = { { htole32(IWM_SF_SINGLE_UNICAST_AGING_TIMER_DEF), htole32(IWM_SF_SINGLE_UNICAST_IDLE_TIMER_DEF) }, { htole32(IWM_SF_AGG_UNICAST_AGING_TIMER_DEF), htole32(IWM_SF_AGG_UNICAST_IDLE_TIMER_DEF) }, { htole32(IWM_SF_MCAST_AGING_TIMER_DEF), htole32(IWM_SF_MCAST_IDLE_TIMER_DEF) }, { htole32(IWM_SF_BA_AGING_TIMER_DEF), htole32(IWM_SF_BA_IDLE_TIMER_DEF) }, { htole32(IWM_SF_TX_RE_AGING_TIMER_DEF), htole32(IWM_SF_TX_RE_IDLE_TIMER_DEF) }, }; /* * Aging and idle timeouts for the different possible scenarios * in single BSS MAC configuration. */ static const uint32_t iwm_sf_full_timeout[IWM_SF_NUM_SCENARIO][IWM_SF_NUM_TIMEOUT_TYPES] = { { htole32(IWM_SF_SINGLE_UNICAST_AGING_TIMER), htole32(IWM_SF_SINGLE_UNICAST_IDLE_TIMER) }, { htole32(IWM_SF_AGG_UNICAST_AGING_TIMER), htole32(IWM_SF_AGG_UNICAST_IDLE_TIMER) }, { htole32(IWM_SF_MCAST_AGING_TIMER), htole32(IWM_SF_MCAST_IDLE_TIMER) }, { htole32(IWM_SF_BA_AGING_TIMER), htole32(IWM_SF_BA_IDLE_TIMER) }, { htole32(IWM_SF_TX_RE_AGING_TIMER), htole32(IWM_SF_TX_RE_IDLE_TIMER) }, }; static void iwm_mvm_fill_sf_command(struct iwm_softc *sc, struct iwm_sf_cfg_cmd *sf_cmd, struct ieee80211_node *ni) { int i, j, watermark; sf_cmd->watermark[IWM_SF_LONG_DELAY_ON] = htole32(IWM_SF_W_MARK_SCAN); /* * If we are in association flow - check antenna configuration * capabilities of the AP station, and choose the watermark accordingly. */ if (ni) { if (ni->ni_flags & IEEE80211_NODE_HT) { #ifdef notyet if (ni->ni_rxmcs[2] != 0) watermark = IWM_SF_W_MARK_MIMO3; else if (ni->ni_rxmcs[1] != 0) watermark = IWM_SF_W_MARK_MIMO2; else #endif watermark = IWM_SF_W_MARK_SISO; } else { watermark = IWM_SF_W_MARK_LEGACY; } /* default watermark value for unassociated mode. */ } else { watermark = IWM_SF_W_MARK_MIMO2; } sf_cmd->watermark[IWM_SF_FULL_ON] = htole32(watermark); for (i = 0; i < IWM_SF_NUM_SCENARIO; i++) { for (j = 0; j < IWM_SF_NUM_TIMEOUT_TYPES; j++) { sf_cmd->long_delay_timeouts[i][j] = htole32(IWM_SF_LONG_DELAY_AGING_TIMER); } } if (ni) { memcpy(sf_cmd->full_on_timeouts, iwm_sf_full_timeout, sizeof(iwm_sf_full_timeout)); } else { memcpy(sf_cmd->full_on_timeouts, iwm_sf_full_timeout_def, sizeof(iwm_sf_full_timeout_def)); } } static int iwm_mvm_sf_config(struct iwm_softc *sc, enum iwm_sf_state new_state) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwm_sf_cfg_cmd sf_cmd = { .state = htole32(IWM_SF_FULL_ON), }; int ret = 0; if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) sf_cmd.state |= htole32(IWM_SF_CFG_DUMMY_NOTIF_OFF); switch (new_state) { case IWM_SF_UNINIT: case IWM_SF_INIT_OFF: iwm_mvm_fill_sf_command(sc, &sf_cmd, NULL); break; case IWM_SF_FULL_ON: iwm_mvm_fill_sf_command(sc, &sf_cmd, vap->iv_bss); break; default: IWM_DPRINTF(sc, IWM_DEBUG_PWRSAVE, "Invalid state: %d. not sending Smart Fifo cmd\n", new_state); return EINVAL; } ret = iwm_mvm_send_cmd_pdu(sc, IWM_REPLY_SF_CFG_CMD, IWM_CMD_ASYNC, sizeof(sf_cmd), &sf_cmd); return ret; } static int iwm_send_bt_init_conf(struct iwm_softc *sc) { struct iwm_bt_coex_cmd bt_cmd; bt_cmd.mode = htole32(IWM_BT_COEX_WIFI); bt_cmd.enabled_modules = htole32(IWM_BT_COEX_HIGH_BAND_RET); return iwm_mvm_send_cmd_pdu(sc, IWM_BT_CONFIG, 0, sizeof(bt_cmd), &bt_cmd); } static int iwm_send_update_mcc_cmd(struct iwm_softc *sc, const char *alpha2) { struct iwm_mcc_update_cmd mcc_cmd; struct iwm_host_cmd hcmd = { .id = IWM_MCC_UPDATE_CMD, .flags = (IWM_CMD_SYNC | IWM_CMD_WANT_SKB), .data = { &mcc_cmd }, }; int ret; #ifdef IWM_DEBUG struct iwm_rx_packet *pkt; struct iwm_mcc_update_resp_v1 *mcc_resp_v1 = NULL; struct iwm_mcc_update_resp *mcc_resp; int n_channels; uint16_t mcc; #endif int resp_v2 = isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_LAR_SUPPORT_V2); memset(&mcc_cmd, 0, sizeof(mcc_cmd)); mcc_cmd.mcc = htole16(alpha2[0] << 8 | alpha2[1]); if ((sc->sc_ucode_api & IWM_UCODE_TLV_API_WIFI_MCC_UPDATE) || isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_LAR_MULTI_MCC)) mcc_cmd.source_id = IWM_MCC_SOURCE_GET_CURRENT; else mcc_cmd.source_id = IWM_MCC_SOURCE_OLD_FW; if (resp_v2) hcmd.len[0] = sizeof(struct iwm_mcc_update_cmd); else hcmd.len[0] = sizeof(struct iwm_mcc_update_cmd_v1); IWM_DPRINTF(sc, IWM_DEBUG_NODE, "send MCC update to FW with '%c%c' src = %d\n", alpha2[0], alpha2[1], mcc_cmd.source_id); ret = iwm_send_cmd(sc, &hcmd); if (ret) return ret; #ifdef IWM_DEBUG pkt = hcmd.resp_pkt; /* Extract MCC response */ if (resp_v2) { mcc_resp = (void *)pkt->data; mcc = mcc_resp->mcc; n_channels = le32toh(mcc_resp->n_channels); } else { mcc_resp_v1 = (void *)pkt->data; mcc = mcc_resp_v1->mcc; n_channels = le32toh(mcc_resp_v1->n_channels); } /* W/A for a FW/NVM issue - returns 0x00 for the world domain */ if (mcc == 0) mcc = 0x3030; /* "00" - world */ IWM_DPRINTF(sc, IWM_DEBUG_NODE, "regulatory domain '%c%c' (%d channels available)\n", mcc >> 8, mcc & 0xff, n_channels); #endif iwm_free_resp(sc, &hcmd); return 0; } static void iwm_mvm_tt_tx_backoff(struct iwm_softc *sc, uint32_t backoff) { struct iwm_host_cmd cmd = { .id = IWM_REPLY_THERMAL_MNG_BACKOFF, .len = { sizeof(uint32_t), }, .data = { &backoff, }, }; if (iwm_send_cmd(sc, &cmd) != 0) { device_printf(sc->sc_dev, "failed to change thermal tx backoff\n"); } } static int iwm_init_hw(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; int error, i, ac; if ((error = iwm_start_hw(sc)) != 0) { printf("iwm_start_hw: failed %d\n", error); return error; } if ((error = iwm_run_init_mvm_ucode(sc, 0)) != 0) { printf("iwm_run_init_mvm_ucode: failed %d\n", error); return error; } /* * should stop and start HW since that INIT * image just loaded */ iwm_stop_device(sc); if ((error = iwm_start_hw(sc)) != 0) { device_printf(sc->sc_dev, "could not initialize hardware\n"); return error; } /* omstart, this time with the regular firmware */ error = iwm_mvm_load_ucode_wait_alive(sc, IWM_UCODE_TYPE_REGULAR); if (error) { device_printf(sc->sc_dev, "could not load firmware\n"); goto error; } if ((error = iwm_send_bt_init_conf(sc)) != 0) { device_printf(sc->sc_dev, "bt init conf failed\n"); goto error; } if ((error = iwm_send_tx_ant_cfg(sc, iwm_fw_valid_tx_ant(sc))) != 0) { device_printf(sc->sc_dev, "antenna config failed\n"); goto error; } /* Send phy db control command and then phy db calibration*/ if ((error = iwm_send_phy_db_data(sc)) != 0) { device_printf(sc->sc_dev, "phy_db_data failed\n"); goto error; } if ((error = iwm_send_phy_cfg_cmd(sc)) != 0) { device_printf(sc->sc_dev, "phy_cfg_cmd failed\n"); goto error; } /* Add auxiliary station for scanning */ if ((error = iwm_mvm_add_aux_sta(sc)) != 0) { device_printf(sc->sc_dev, "add_aux_sta failed\n"); goto error; } for (i = 0; i < IWM_NUM_PHY_CTX; i++) { /* * The channel used here isn't relevant as it's * going to be overwritten in the other flows. * For now use the first channel we have. */ if ((error = iwm_mvm_phy_ctxt_add(sc, &sc->sc_phyctxt[i], &ic->ic_channels[1], 1, 1)) != 0) goto error; } /* Initialize tx backoffs to the minimum. */ if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) iwm_mvm_tt_tx_backoff(sc, 0); error = iwm_mvm_power_update_device(sc); if (error) goto error; if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_LAR_SUPPORT)) { if ((error = iwm_send_update_mcc_cmd(sc, "ZZ")) != 0) goto error; } if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_UMAC_SCAN)) { if ((error = iwm_mvm_config_umac_scan(sc)) != 0) goto error; } /* Enable Tx queues. */ for (ac = 0; ac < WME_NUM_AC; ac++) { error = iwm_enable_txq(sc, IWM_STATION_ID, ac, iwm_mvm_ac_to_tx_fifo[ac]); if (error) goto error; } if ((error = iwm_mvm_disable_beacon_filter(sc)) != 0) { device_printf(sc->sc_dev, "failed to disable beacon filter\n"); goto error; } return 0; error: iwm_stop_device(sc); return error; } /* Allow multicast from our BSSID. */ static int iwm_allow_mcast(struct ieee80211vap *vap, struct iwm_softc *sc) { struct ieee80211_node *ni = vap->iv_bss; struct iwm_mcast_filter_cmd *cmd; size_t size; int error; size = roundup(sizeof(*cmd), 4); cmd = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); if (cmd == NULL) return ENOMEM; cmd->filter_own = 1; cmd->port_id = 0; cmd->count = 0; cmd->pass_all = 1; IEEE80211_ADDR_COPY(cmd->bssid, ni->ni_bssid); error = iwm_mvm_send_cmd_pdu(sc, IWM_MCAST_FILTER_CMD, IWM_CMD_SYNC, size, cmd); free(cmd, M_DEVBUF); return (error); } /* * ifnet interfaces */ static void iwm_init(struct iwm_softc *sc) { int error; if (sc->sc_flags & IWM_FLAG_HW_INITED) { return; } sc->sc_generation++; sc->sc_flags &= ~IWM_FLAG_STOPPED; if ((error = iwm_init_hw(sc)) != 0) { printf("iwm_init_hw failed %d\n", error); iwm_stop(sc); return; } /* * Ok, firmware loaded and we are jogging */ sc->sc_flags |= IWM_FLAG_HW_INITED; callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc); } static int iwm_transmit(struct ieee80211com *ic, struct mbuf *m) { struct iwm_softc *sc; int error; sc = ic->ic_softc; IWM_LOCK(sc); if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) { IWM_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { IWM_UNLOCK(sc); return (error); } iwm_start(sc); IWM_UNLOCK(sc); return (0); } /* * Dequeue packets from sendq and call send. */ static void iwm_start(struct iwm_softc *sc) { struct ieee80211_node *ni; struct mbuf *m; int ac = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "->%s\n", __func__); while (sc->qfullmsk == 0 && (m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; if (iwm_tx(sc, m, ni, ac) != 0) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); continue; } sc->sc_tx_timer = 15; } IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "<-%s\n", __func__); } static void iwm_stop(struct iwm_softc *sc) { sc->sc_flags &= ~IWM_FLAG_HW_INITED; sc->sc_flags |= IWM_FLAG_STOPPED; sc->sc_generation++; iwm_led_blink_stop(sc); sc->sc_tx_timer = 0; iwm_stop_device(sc); } static void iwm_watchdog(void *arg) { struct iwm_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; if (sc->sc_tx_timer > 0) { if (--sc->sc_tx_timer == 0) { device_printf(sc->sc_dev, "device timeout\n"); #ifdef IWM_DEBUG iwm_nic_error(sc); #endif ieee80211_restart_all(ic); counter_u64_add(sc->sc_ic.ic_oerrors, 1); return; } } callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc); } static void iwm_parent(struct ieee80211com *ic) { struct iwm_softc *sc = ic->ic_softc; int startall = 0; IWM_LOCK(sc); if (ic->ic_nrunning > 0) { if (!(sc->sc_flags & IWM_FLAG_HW_INITED)) { iwm_init(sc); startall = 1; } } else if (sc->sc_flags & IWM_FLAG_HW_INITED) iwm_stop(sc); IWM_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } /* * The interrupt side of things */ /* * error dumping routines are from iwlwifi/mvm/utils.c */ /* * Note: This structure is read from the device with IO accesses, * and the reading already does the endian conversion. As it is * read with uint32_t-sized accesses, any members with a different size * need to be ordered correctly though! */ struct iwm_error_event_table { uint32_t valid; /* (nonzero) valid, (0) log is empty */ uint32_t error_id; /* type of error */ uint32_t trm_hw_status0; /* TRM HW status */ uint32_t trm_hw_status1; /* TRM HW status */ uint32_t blink2; /* branch link */ uint32_t ilink1; /* interrupt link */ uint32_t ilink2; /* interrupt link */ uint32_t data1; /* error-specific data */ uint32_t data2; /* error-specific data */ uint32_t data3; /* error-specific data */ uint32_t bcon_time; /* beacon timer */ uint32_t tsf_low; /* network timestamp function timer */ uint32_t tsf_hi; /* network timestamp function timer */ uint32_t gp1; /* GP1 timer register */ uint32_t gp2; /* GP2 timer register */ uint32_t fw_rev_type; /* firmware revision type */ uint32_t major; /* uCode version major */ uint32_t minor; /* uCode version minor */ uint32_t hw_ver; /* HW Silicon version */ uint32_t brd_ver; /* HW board version */ uint32_t log_pc; /* log program counter */ uint32_t frame_ptr; /* frame pointer */ uint32_t stack_ptr; /* stack pointer */ uint32_t hcmd; /* last host command header */ uint32_t isr0; /* isr status register LMPM_NIC_ISR0: * rxtx_flag */ uint32_t isr1; /* isr status register LMPM_NIC_ISR1: * host_flag */ uint32_t isr2; /* isr status register LMPM_NIC_ISR2: * enc_flag */ uint32_t isr3; /* isr status register LMPM_NIC_ISR3: * time_flag */ uint32_t isr4; /* isr status register LMPM_NIC_ISR4: * wico interrupt */ uint32_t last_cmd_id; /* last HCMD id handled by the firmware */ uint32_t wait_event; /* wait event() caller address */ uint32_t l2p_control; /* L2pControlField */ uint32_t l2p_duration; /* L2pDurationField */ uint32_t l2p_mhvalid; /* L2pMhValidBits */ uint32_t l2p_addr_match; /* L2pAddrMatchStat */ uint32_t lmpm_pmg_sel; /* indicate which clocks are turned on * (LMPM_PMG_SEL) */ uint32_t u_timestamp; /* indicate when the date and time of the * compilation */ uint32_t flow_handler; /* FH read/write pointers, RX credit */ } __packed /* LOG_ERROR_TABLE_API_S_VER_3 */; /* * UMAC error struct - relevant starting from family 8000 chip. * Note: This structure is read from the device with IO accesses, * and the reading already does the endian conversion. As it is * read with u32-sized accesses, any members with a different size * need to be ordered correctly though! */ struct iwm_umac_error_event_table { uint32_t valid; /* (nonzero) valid, (0) log is empty */ uint32_t error_id; /* type of error */ uint32_t blink1; /* branch link */ uint32_t blink2; /* branch link */ uint32_t ilink1; /* interrupt link */ uint32_t ilink2; /* interrupt link */ uint32_t data1; /* error-specific data */ uint32_t data2; /* error-specific data */ uint32_t data3; /* error-specific data */ uint32_t umac_major; uint32_t umac_minor; uint32_t frame_pointer; /* core register 27*/ uint32_t stack_pointer; /* core register 28 */ uint32_t cmd_header; /* latest host cmd sent to UMAC */ uint32_t nic_isr_pref; /* ISR status register */ } __packed; #define ERROR_START_OFFSET (1 * sizeof(uint32_t)) #define ERROR_ELEM_SIZE (7 * sizeof(uint32_t)) #ifdef IWM_DEBUG struct { const char *name; uint8_t num; } advanced_lookup[] = { { "NMI_INTERRUPT_WDG", 0x34 }, { "SYSASSERT", 0x35 }, { "UCODE_VERSION_MISMATCH", 0x37 }, { "BAD_COMMAND", 0x38 }, { "NMI_INTERRUPT_DATA_ACTION_PT", 0x3C }, { "FATAL_ERROR", 0x3D }, { "NMI_TRM_HW_ERR", 0x46 }, { "NMI_INTERRUPT_TRM", 0x4C }, { "NMI_INTERRUPT_BREAK_POINT", 0x54 }, { "NMI_INTERRUPT_WDG_RXF_FULL", 0x5C }, { "NMI_INTERRUPT_WDG_NO_RBD_RXF_FULL", 0x64 }, { "NMI_INTERRUPT_HOST", 0x66 }, { "NMI_INTERRUPT_ACTION_PT", 0x7C }, { "NMI_INTERRUPT_UNKNOWN", 0x84 }, { "NMI_INTERRUPT_INST_ACTION_PT", 0x86 }, { "ADVANCED_SYSASSERT", 0 }, }; static const char * iwm_desc_lookup(uint32_t num) { int i; for (i = 0; i < nitems(advanced_lookup) - 1; i++) if (advanced_lookup[i].num == num) return advanced_lookup[i].name; /* No entry matches 'num', so it is the last: ADVANCED_SYSASSERT */ return advanced_lookup[i].name; } static void iwm_nic_umac_error(struct iwm_softc *sc) { struct iwm_umac_error_event_table table; uint32_t base; base = sc->sc_uc.uc_umac_error_event_table; if (base < 0x800000) { device_printf(sc->sc_dev, "Invalid error log pointer 0x%08x\n", base); return; } if (iwm_read_mem(sc, base, &table, sizeof(table)/sizeof(uint32_t))) { device_printf(sc->sc_dev, "reading errlog failed\n"); return; } if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { device_printf(sc->sc_dev, "Start UMAC Error Log Dump:\n"); device_printf(sc->sc_dev, "Status: 0x%x, count: %d\n", sc->sc_flags, table.valid); } device_printf(sc->sc_dev, "0x%08X | %s\n", table.error_id, iwm_desc_lookup(table.error_id)); device_printf(sc->sc_dev, "0x%08X | umac branchlink1\n", table.blink1); device_printf(sc->sc_dev, "0x%08X | umac branchlink2\n", table.blink2); device_printf(sc->sc_dev, "0x%08X | umac interruptlink1\n", table.ilink1); device_printf(sc->sc_dev, "0x%08X | umac interruptlink2\n", table.ilink2); device_printf(sc->sc_dev, "0x%08X | umac data1\n", table.data1); device_printf(sc->sc_dev, "0x%08X | umac data2\n", table.data2); device_printf(sc->sc_dev, "0x%08X | umac data3\n", table.data3); device_printf(sc->sc_dev, "0x%08X | umac major\n", table.umac_major); device_printf(sc->sc_dev, "0x%08X | umac minor\n", table.umac_minor); device_printf(sc->sc_dev, "0x%08X | frame pointer\n", table.frame_pointer); device_printf(sc->sc_dev, "0x%08X | stack pointer\n", table.stack_pointer); device_printf(sc->sc_dev, "0x%08X | last host cmd\n", table.cmd_header); device_printf(sc->sc_dev, "0x%08X | isr status reg\n", table.nic_isr_pref); } /* * Support for dumping the error log seemed like a good idea ... * but it's mostly hex junk and the only sensible thing is the * hw/ucode revision (which we know anyway). Since it's here, * I'll just leave it in, just in case e.g. the Intel guys want to * help us decipher some "ADVANCED_SYSASSERT" later. */ static void iwm_nic_error(struct iwm_softc *sc) { struct iwm_error_event_table table; uint32_t base; device_printf(sc->sc_dev, "dumping device error log\n"); base = sc->sc_uc.uc_error_event_table; if (base < 0x800000) { device_printf(sc->sc_dev, "Invalid error log pointer 0x%08x\n", base); return; } if (iwm_read_mem(sc, base, &table, sizeof(table)/sizeof(uint32_t))) { device_printf(sc->sc_dev, "reading errlog failed\n"); return; } if (!table.valid) { device_printf(sc->sc_dev, "errlog not found, skipping\n"); return; } if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { device_printf(sc->sc_dev, "Start Error Log Dump:\n"); device_printf(sc->sc_dev, "Status: 0x%x, count: %d\n", sc->sc_flags, table.valid); } device_printf(sc->sc_dev, "0x%08X | %-28s\n", table.error_id, iwm_desc_lookup(table.error_id)); device_printf(sc->sc_dev, "%08X | trm_hw_status0\n", table.trm_hw_status0); device_printf(sc->sc_dev, "%08X | trm_hw_status1\n", table.trm_hw_status1); device_printf(sc->sc_dev, "%08X | branchlink2\n", table.blink2); device_printf(sc->sc_dev, "%08X | interruptlink1\n", table.ilink1); device_printf(sc->sc_dev, "%08X | interruptlink2\n", table.ilink2); device_printf(sc->sc_dev, "%08X | data1\n", table.data1); device_printf(sc->sc_dev, "%08X | data2\n", table.data2); device_printf(sc->sc_dev, "%08X | data3\n", table.data3); device_printf(sc->sc_dev, "%08X | beacon time\n", table.bcon_time); device_printf(sc->sc_dev, "%08X | tsf low\n", table.tsf_low); device_printf(sc->sc_dev, "%08X | tsf hi\n", table.tsf_hi); device_printf(sc->sc_dev, "%08X | time gp1\n", table.gp1); device_printf(sc->sc_dev, "%08X | time gp2\n", table.gp2); device_printf(sc->sc_dev, "%08X | uCode revision type\n", table.fw_rev_type); device_printf(sc->sc_dev, "%08X | uCode version major\n", table.major); device_printf(sc->sc_dev, "%08X | uCode version minor\n", table.minor); device_printf(sc->sc_dev, "%08X | hw version\n", table.hw_ver); device_printf(sc->sc_dev, "%08X | board version\n", table.brd_ver); device_printf(sc->sc_dev, "%08X | hcmd\n", table.hcmd); device_printf(sc->sc_dev, "%08X | isr0\n", table.isr0); device_printf(sc->sc_dev, "%08X | isr1\n", table.isr1); device_printf(sc->sc_dev, "%08X | isr2\n", table.isr2); device_printf(sc->sc_dev, "%08X | isr3\n", table.isr3); device_printf(sc->sc_dev, "%08X | isr4\n", table.isr4); device_printf(sc->sc_dev, "%08X | last cmd Id\n", table.last_cmd_id); device_printf(sc->sc_dev, "%08X | wait_event\n", table.wait_event); device_printf(sc->sc_dev, "%08X | l2p_control\n", table.l2p_control); device_printf(sc->sc_dev, "%08X | l2p_duration\n", table.l2p_duration); device_printf(sc->sc_dev, "%08X | l2p_mhvalid\n", table.l2p_mhvalid); device_printf(sc->sc_dev, "%08X | l2p_addr_match\n", table.l2p_addr_match); device_printf(sc->sc_dev, "%08X | lmpm_pmg_sel\n", table.lmpm_pmg_sel); device_printf(sc->sc_dev, "%08X | timestamp\n", table.u_timestamp); device_printf(sc->sc_dev, "%08X | flow_handler\n", table.flow_handler); if (sc->sc_uc.uc_umac_error_event_table) iwm_nic_umac_error(sc); } #endif #define SYNC_RESP_STRUCT(_var_, _pkt_) \ do { \ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\ _var_ = (void *)((_pkt_)+1); \ } while (/*CONSTCOND*/0) #define SYNC_RESP_PTR(_ptr_, _len_, _pkt_) \ do { \ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\ _ptr_ = (void *)((_pkt_)+1); \ } while (/*CONSTCOND*/0) #define ADVANCE_RXQ(sc) (sc->rxq.cur = (sc->rxq.cur + 1) % IWM_RX_RING_COUNT); /* * Process an IWM_CSR_INT_BIT_FH_RX or IWM_CSR_INT_BIT_SW_RX interrupt. * Basic structure from if_iwn */ static void iwm_notif_intr(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; uint16_t hw; bus_dmamap_sync(sc->rxq.stat_dma.tag, sc->rxq.stat_dma.map, BUS_DMASYNC_POSTREAD); hw = le16toh(sc->rxq.stat->closed_rb_num) & 0xfff; /* * Process responses */ while (sc->rxq.cur != hw) { struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &sc->rxq.data[sc->rxq.cur]; struct iwm_rx_packet *pkt; struct iwm_cmd_response *cresp; int qid, idx, code; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); pkt = mtod(data->m, struct iwm_rx_packet *); qid = pkt->hdr.qid & ~0x80; idx = pkt->hdr.idx; code = IWM_WIDE_ID(pkt->hdr.flags, pkt->hdr.code); IWM_DPRINTF(sc, IWM_DEBUG_INTR, "rx packet qid=%d idx=%d type=%x %d %d\n", pkt->hdr.qid & ~0x80, pkt->hdr.idx, code, sc->rxq.cur, hw); /* * randomly get these from the firmware, no idea why. * they at least seem harmless, so just ignore them for now */ if (__predict_false((pkt->hdr.code == 0 && qid == 0 && idx == 0) || pkt->len_n_flags == htole32(0x55550000))) { ADVANCE_RXQ(sc); continue; } switch (code) { case IWM_REPLY_RX_PHY_CMD: iwm_mvm_rx_rx_phy_cmd(sc, pkt, data); break; case IWM_REPLY_RX_MPDU_CMD: iwm_mvm_rx_rx_mpdu(sc, pkt, data); break; case IWM_TX_CMD: iwm_mvm_rx_tx_cmd(sc, pkt, data); break; case IWM_MISSED_BEACONS_NOTIFICATION: { struct iwm_missed_beacons_notif *resp; int missed; /* XXX look at mac_id to determine interface ID */ struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); SYNC_RESP_STRUCT(resp, pkt); missed = le32toh(resp->consec_missed_beacons); IWM_DPRINTF(sc, IWM_DEBUG_BEACON | IWM_DEBUG_STATE, "%s: MISSED_BEACON: mac_id=%d, " "consec_since_last_rx=%d, consec=%d, num_expect=%d " "num_rx=%d\n", __func__, le32toh(resp->mac_id), le32toh(resp->consec_missed_beacons_since_last_rx), le32toh(resp->consec_missed_beacons), le32toh(resp->num_expected_beacons), le32toh(resp->num_recvd_beacons)); /* Be paranoid */ if (vap == NULL) break; /* XXX no net80211 locking? */ if (vap->iv_state == IEEE80211_S_RUN && (ic->ic_flags & IEEE80211_F_SCAN) == 0) { if (missed > vap->iv_bmissthreshold) { /* XXX bad locking; turn into task */ IWM_UNLOCK(sc); ieee80211_beacon_miss(ic); IWM_LOCK(sc); } } break; } case IWM_MFUART_LOAD_NOTIFICATION: break; case IWM_MVM_ALIVE: { struct iwm_mvm_alive_resp_v1 *resp1; struct iwm_mvm_alive_resp_v2 *resp2; struct iwm_mvm_alive_resp_v3 *resp3; if (iwm_rx_packet_payload_len(pkt) == sizeof(*resp1)) { SYNC_RESP_STRUCT(resp1, pkt); sc->sc_uc.uc_error_event_table = le32toh(resp1->error_event_table_ptr); sc->sc_uc.uc_log_event_table = le32toh(resp1->log_event_table_ptr); sc->sched_base = le32toh(resp1->scd_base_ptr); if (resp1->status == IWM_ALIVE_STATUS_OK) sc->sc_uc.uc_ok = 1; else sc->sc_uc.uc_ok = 0; } if (iwm_rx_packet_payload_len(pkt) == sizeof(*resp2)) { SYNC_RESP_STRUCT(resp2, pkt); sc->sc_uc.uc_error_event_table = le32toh(resp2->error_event_table_ptr); sc->sc_uc.uc_log_event_table = le32toh(resp2->log_event_table_ptr); sc->sched_base = le32toh(resp2->scd_base_ptr); sc->sc_uc.uc_umac_error_event_table = le32toh(resp2->error_info_addr); if (resp2->status == IWM_ALIVE_STATUS_OK) sc->sc_uc.uc_ok = 1; else sc->sc_uc.uc_ok = 0; } if (iwm_rx_packet_payload_len(pkt) == sizeof(*resp3)) { SYNC_RESP_STRUCT(resp3, pkt); sc->sc_uc.uc_error_event_table = le32toh(resp3->error_event_table_ptr); sc->sc_uc.uc_log_event_table = le32toh(resp3->log_event_table_ptr); sc->sched_base = le32toh(resp3->scd_base_ptr); sc->sc_uc.uc_umac_error_event_table = le32toh(resp3->error_info_addr); if (resp3->status == IWM_ALIVE_STATUS_OK) sc->sc_uc.uc_ok = 1; else sc->sc_uc.uc_ok = 0; } sc->sc_uc.uc_intr = 1; wakeup(&sc->sc_uc); break; } case IWM_CALIB_RES_NOTIF_PHY_DB: { struct iwm_calib_res_notif_phy_db *phy_db_notif; SYNC_RESP_STRUCT(phy_db_notif, pkt); iwm_phy_db_set_section(sc, phy_db_notif); break; } case IWM_STATISTICS_NOTIFICATION: { struct iwm_notif_statistics *stats; SYNC_RESP_STRUCT(stats, pkt); memcpy(&sc->sc_stats, stats, sizeof(sc->sc_stats)); sc->sc_noise = iwm_get_noise(&stats->rx.general); break; } case IWM_NVM_ACCESS_CMD: case IWM_MCC_UPDATE_CMD: if (sc->sc_wantresp == ((qid << 16) | idx)) { bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); memcpy(sc->sc_cmd_resp, pkt, sizeof(sc->sc_cmd_resp)); } break; case IWM_MCC_CHUB_UPDATE_CMD: { struct iwm_mcc_chub_notif *notif; SYNC_RESP_STRUCT(notif, pkt); sc->sc_fw_mcc[0] = (notif->mcc & 0xff00) >> 8; sc->sc_fw_mcc[1] = notif->mcc & 0xff; sc->sc_fw_mcc[2] = '\0'; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "fw source %d sent CC '%s'\n", notif->source_id, sc->sc_fw_mcc); break; } case IWM_DTS_MEASUREMENT_NOTIFICATION: break; case IWM_PHY_CONFIGURATION_CMD: case IWM_TX_ANT_CONFIGURATION_CMD: case IWM_ADD_STA: case IWM_MAC_CONTEXT_CMD: case IWM_REPLY_SF_CFG_CMD: case IWM_POWER_TABLE_CMD: case IWM_PHY_CONTEXT_CMD: case IWM_BINDING_CONTEXT_CMD: case IWM_TIME_EVENT_CMD: case IWM_SCAN_REQUEST_CMD: case IWM_WIDE_ID(IWM_ALWAYS_LONG_GROUP, IWM_SCAN_CFG_CMD): case IWM_WIDE_ID(IWM_ALWAYS_LONG_GROUP, IWM_SCAN_REQ_UMAC): case IWM_SCAN_OFFLOAD_REQUEST_CMD: case IWM_REPLY_BEACON_FILTERING_CMD: case IWM_MAC_PM_POWER_TABLE: case IWM_TIME_QUOTA_CMD: case IWM_REMOVE_STA: case IWM_TXPATH_FLUSH: case IWM_LQ_CMD: case IWM_BT_CONFIG: case IWM_REPLY_THERMAL_MNG_BACKOFF: SYNC_RESP_STRUCT(cresp, pkt); if (sc->sc_wantresp == ((qid << 16) | idx)) { memcpy(sc->sc_cmd_resp, pkt, sizeof(*pkt)+sizeof(*cresp)); } break; /* ignore */ case 0x6c: /* IWM_PHY_DB_CMD, no idea why it's not in fw-api.h */ break; case IWM_INIT_COMPLETE_NOTIF: sc->sc_init_complete = 1; wakeup(&sc->sc_init_complete); break; case IWM_SCAN_OFFLOAD_COMPLETE: { struct iwm_periodic_scan_complete *notif; SYNC_RESP_STRUCT(notif, pkt); break; } case IWM_SCAN_ITERATION_COMPLETE: { struct iwm_lmac_scan_complete_notif *notif; SYNC_RESP_STRUCT(notif, pkt); ieee80211_runtask(&sc->sc_ic, &sc->sc_es_task); break; } case IWM_SCAN_COMPLETE_UMAC: { struct iwm_umac_scan_complete *notif; SYNC_RESP_STRUCT(notif, pkt); IWM_DPRINTF(sc, IWM_DEBUG_SCAN, "UMAC scan complete, status=0x%x\n", notif->status); #if 0 /* XXX This would be a duplicate scan end call */ taskqueue_enqueue(sc->sc_tq, &sc->sc_es_task); #endif break; } case IWM_SCAN_ITERATION_COMPLETE_UMAC: { struct iwm_umac_scan_iter_complete_notif *notif; SYNC_RESP_STRUCT(notif, pkt); IWM_DPRINTF(sc, IWM_DEBUG_SCAN, "UMAC scan iteration " "complete, status=0x%x, %d channels scanned\n", notif->status, notif->scanned_channels); ieee80211_runtask(&sc->sc_ic, &sc->sc_es_task); break; } case IWM_REPLY_ERROR: { struct iwm_error_resp *resp; SYNC_RESP_STRUCT(resp, pkt); device_printf(sc->sc_dev, "firmware error 0x%x, cmd 0x%x\n", le32toh(resp->error_type), resp->cmd_id); break; } case IWM_TIME_EVENT_NOTIFICATION: { struct iwm_time_event_notif *notif; SYNC_RESP_STRUCT(notif, pkt); IWM_DPRINTF(sc, IWM_DEBUG_INTR, "TE notif status = 0x%x action = 0x%x\n", notif->status, notif->action); break; } case IWM_MCAST_FILTER_CMD: break; case IWM_SCD_QUEUE_CFG: { struct iwm_scd_txq_cfg_rsp *rsp; SYNC_RESP_STRUCT(rsp, pkt); IWM_DPRINTF(sc, IWM_DEBUG_CMD, "queue cfg token=0x%x sta_id=%d " "tid=%d scd_queue=%d\n", rsp->token, rsp->sta_id, rsp->tid, rsp->scd_queue); break; } default: device_printf(sc->sc_dev, "frame %d/%d %x UNHANDLED (this should " "not happen)\n", qid, idx, pkt->len_n_flags); break; } /* * Why test bit 0x80? The Linux driver: * * There is one exception: uCode sets bit 15 when it * originates the response/notification, i.e. when the * response/notification is not a direct response to a * command sent by the driver. For example, uCode issues * IWM_REPLY_RX when it sends a received frame to the driver; * it is not a direct response to any driver command. * * Ok, so since when is 7 == 15? Well, the Linux driver * uses a slightly different format for pkt->hdr, and "qid" * is actually the upper byte of a two-byte field. */ if (!(pkt->hdr.qid & (1 << 7))) { iwm_cmd_done(sc, pkt); } ADVANCE_RXQ(sc); } IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); /* * Tell the firmware what we have processed. * Seems like the hardware gets upset unless we align * the write by 8?? */ hw = (hw == 0) ? IWM_RX_RING_COUNT - 1 : hw - 1; IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, hw & ~7); } static void iwm_intr(void *arg) { struct iwm_softc *sc = arg; int handled = 0; int r1, r2, rv = 0; int isperiodic = 0; IWM_LOCK(sc); IWM_WRITE(sc, IWM_CSR_INT_MASK, 0); if (sc->sc_flags & IWM_FLAG_USE_ICT) { uint32_t *ict = sc->ict_dma.vaddr; int tmp; tmp = htole32(ict[sc->ict_cur]); if (!tmp) goto out_ena; /* * ok, there was something. keep plowing until we have all. */ r1 = r2 = 0; while (tmp) { r1 |= tmp; ict[sc->ict_cur] = 0; sc->ict_cur = (sc->ict_cur+1) % IWM_ICT_COUNT; tmp = htole32(ict[sc->ict_cur]); } /* this is where the fun begins. don't ask */ if (r1 == 0xffffffff) r1 = 0; /* i am not expected to understand this */ if (r1 & 0xc0000) r1 |= 0x8000; r1 = (0xff & r1) | ((0xff00 & r1) << 16); } else { r1 = IWM_READ(sc, IWM_CSR_INT); /* "hardware gone" (where, fishing?) */ if (r1 == 0xffffffff || (r1 & 0xfffffff0) == 0xa5a5a5a0) goto out; r2 = IWM_READ(sc, IWM_CSR_FH_INT_STATUS); } if (r1 == 0 && r2 == 0) { goto out_ena; } IWM_WRITE(sc, IWM_CSR_INT, r1 | ~sc->sc_intmask); /* ignored */ handled |= (r1 & (IWM_CSR_INT_BIT_ALIVE /*| IWM_CSR_INT_BIT_SCD*/)); if (r1 & IWM_CSR_INT_BIT_SW_ERR) { int i; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); #ifdef IWM_DEBUG iwm_nic_error(sc); #endif /* Dump driver status (TX and RX rings) while we're here. */ device_printf(sc->sc_dev, "driver status:\n"); for (i = 0; i < IWM_MVM_MAX_QUEUES; i++) { struct iwm_tx_ring *ring = &sc->txq[i]; device_printf(sc->sc_dev, " tx ring %2d: qid=%-2d cur=%-3d " "queued=%-3d\n", i, ring->qid, ring->cur, ring->queued); } device_printf(sc->sc_dev, " rx ring: cur=%d\n", sc->rxq.cur); device_printf(sc->sc_dev, " 802.11 state %d\n", (vap == NULL) ? -1 : vap->iv_state); /* Don't stop the device; just do a VAP restart */ IWM_UNLOCK(sc); if (vap == NULL) { printf("%s: null vap\n", __func__); return; } device_printf(sc->sc_dev, "%s: controller panicked, iv_state = %d; " "restarting\n", __func__, vap->iv_state); /* XXX TODO: turn this into a callout/taskqueue */ ieee80211_restart_all(ic); return; } if (r1 & IWM_CSR_INT_BIT_HW_ERR) { handled |= IWM_CSR_INT_BIT_HW_ERR; device_printf(sc->sc_dev, "hardware error, stopping device\n"); iwm_stop(sc); rv = 1; goto out; } /* firmware chunk loaded */ if (r1 & IWM_CSR_INT_BIT_FH_TX) { IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_TX_MASK); handled |= IWM_CSR_INT_BIT_FH_TX; sc->sc_fw_chunk_done = 1; wakeup(&sc->sc_fw); } if (r1 & IWM_CSR_INT_BIT_RF_KILL) { handled |= IWM_CSR_INT_BIT_RF_KILL; if (iwm_check_rfkill(sc)) { device_printf(sc->sc_dev, "%s: rfkill switch, disabling interface\n", __func__); iwm_stop(sc); } } /* * The Linux driver uses periodic interrupts to avoid races. * We cargo-cult like it's going out of fashion. */ if (r1 & IWM_CSR_INT_BIT_RX_PERIODIC) { handled |= IWM_CSR_INT_BIT_RX_PERIODIC; IWM_WRITE(sc, IWM_CSR_INT, IWM_CSR_INT_BIT_RX_PERIODIC); if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) == 0) IWM_WRITE_1(sc, IWM_CSR_INT_PERIODIC_REG, IWM_CSR_INT_PERIODIC_DIS); isperiodic = 1; } if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) || isperiodic) { handled |= (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX); IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_RX_MASK); iwm_notif_intr(sc); /* enable periodic interrupt, see above */ if (r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX) && !isperiodic) IWM_WRITE_1(sc, IWM_CSR_INT_PERIODIC_REG, IWM_CSR_INT_PERIODIC_ENA); } if (__predict_false(r1 & ~handled)) IWM_DPRINTF(sc, IWM_DEBUG_INTR, "%s: unhandled interrupts: %x\n", __func__, r1); rv = 1; out_ena: iwm_restore_interrupts(sc); out: IWM_UNLOCK(sc); return; } /* * Autoconf glue-sniffing */ #define PCI_VENDOR_INTEL 0x8086 #define PCI_PRODUCT_INTEL_WL_3160_1 0x08b3 #define PCI_PRODUCT_INTEL_WL_3160_2 0x08b4 #define PCI_PRODUCT_INTEL_WL_3165_1 0x3165 #define PCI_PRODUCT_INTEL_WL_3165_2 0x3166 #define PCI_PRODUCT_INTEL_WL_7260_1 0x08b1 #define PCI_PRODUCT_INTEL_WL_7260_2 0x08b2 #define PCI_PRODUCT_INTEL_WL_7265_1 0x095a #define PCI_PRODUCT_INTEL_WL_7265_2 0x095b #define PCI_PRODUCT_INTEL_WL_8260_1 0x24f3 #define PCI_PRODUCT_INTEL_WL_8260_2 0x24f4 static const struct iwm_devices { uint16_t device; const char *name; } iwm_devices[] = { { PCI_PRODUCT_INTEL_WL_3160_1, "Intel Dual Band Wireless AC 3160" }, { PCI_PRODUCT_INTEL_WL_3160_2, "Intel Dual Band Wireless AC 3160" }, { PCI_PRODUCT_INTEL_WL_3165_1, "Intel Dual Band Wireless AC 3165" }, { PCI_PRODUCT_INTEL_WL_3165_2, "Intel Dual Band Wireless AC 3165" }, { PCI_PRODUCT_INTEL_WL_7260_1, "Intel Dual Band Wireless AC 7260" }, { PCI_PRODUCT_INTEL_WL_7260_2, "Intel Dual Band Wireless AC 7260" }, { PCI_PRODUCT_INTEL_WL_7265_1, "Intel Dual Band Wireless AC 7265" }, { PCI_PRODUCT_INTEL_WL_7265_2, "Intel Dual Band Wireless AC 7265" }, { PCI_PRODUCT_INTEL_WL_8260_1, "Intel Dual Band Wireless AC 8260" }, { PCI_PRODUCT_INTEL_WL_8260_2, "Intel Dual Band Wireless AC 8260" }, }; static int iwm_probe(device_t dev) { int i; for (i = 0; i < nitems(iwm_devices); i++) { if (pci_get_vendor(dev) == PCI_VENDOR_INTEL && pci_get_device(dev) == iwm_devices[i].device) { device_set_desc(dev, iwm_devices[i].name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int iwm_dev_check(device_t dev) { struct iwm_softc *sc; sc = device_get_softc(dev); sc->sc_hw_rev = IWM_READ(sc, IWM_CSR_HW_REV); switch (pci_get_device(dev)) { case PCI_PRODUCT_INTEL_WL_3160_1: case PCI_PRODUCT_INTEL_WL_3160_2: sc->sc_fwname = "iwm3160fw"; sc->host_interrupt_operation_mode = 1; sc->sc_device_family = IWM_DEVICE_FAMILY_7000; sc->sc_fwdmasegsz = IWM_FWDMASEGSZ; return (0); case PCI_PRODUCT_INTEL_WL_3165_1: case PCI_PRODUCT_INTEL_WL_3165_2: sc->sc_fwname = "iwm7265fw"; sc->host_interrupt_operation_mode = 0; sc->sc_device_family = IWM_DEVICE_FAMILY_7000; sc->sc_fwdmasegsz = IWM_FWDMASEGSZ; return (0); case PCI_PRODUCT_INTEL_WL_7260_1: case PCI_PRODUCT_INTEL_WL_7260_2: sc->sc_fwname = "iwm7260fw"; sc->host_interrupt_operation_mode = 1; sc->sc_device_family = IWM_DEVICE_FAMILY_7000; sc->sc_fwdmasegsz = IWM_FWDMASEGSZ; return (0); case PCI_PRODUCT_INTEL_WL_7265_1: case PCI_PRODUCT_INTEL_WL_7265_2: sc->sc_fwname = "iwm7265fw"; sc->host_interrupt_operation_mode = 0; sc->sc_device_family = IWM_DEVICE_FAMILY_7000; sc->sc_fwdmasegsz = IWM_FWDMASEGSZ; return (0); case PCI_PRODUCT_INTEL_WL_8260_1: case PCI_PRODUCT_INTEL_WL_8260_2: sc->sc_fwname = "iwm8000Cfw"; sc->host_interrupt_operation_mode = 0; sc->sc_device_family = IWM_DEVICE_FAMILY_8000; sc->sc_fwdmasegsz = IWM_FWDMASEGSZ_8000; return (0); default: device_printf(dev, "unknown adapter type\n"); return ENXIO; } } static int iwm_pci_attach(device_t dev) { struct iwm_softc *sc; int count, error, rid; uint16_t reg; sc = device_get_softc(dev); /* Clear device-specific "PCI retry timeout" register (41h). */ reg = pci_read_config(dev, 0x40, sizeof(reg)); pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg)); /* Enable bus-mastering and hardware bug workaround. */ pci_enable_busmaster(dev); reg = pci_read_config(dev, PCIR_STATUS, sizeof(reg)); /* if !MSI */ if (reg & PCIM_STATUS_INTxSTATE) { reg &= ~PCIM_STATUS_INTxSTATE; } pci_write_config(dev, PCIR_STATUS, reg, sizeof(reg)); rid = PCIR_BAR(0); sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_mem == NULL) { device_printf(sc->sc_dev, "can't map mem space\n"); return (ENXIO); } sc->sc_st = rman_get_bustag(sc->sc_mem); sc->sc_sh = rman_get_bushandle(sc->sc_mem); /* Install interrupt handler. */ count = 1; rid = 0; if (pci_alloc_msi(dev, &count) == 0) rid = 1; sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->sc_irq == NULL) { device_printf(dev, "can't map interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, iwm_intr, sc, &sc->sc_ih); if (sc->sc_ih == NULL) { device_printf(dev, "can't establish interrupt"); return (ENXIO); } sc->sc_dmat = bus_get_dma_tag(sc->sc_dev); return (0); } static void iwm_pci_detach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); if (sc->sc_irq != NULL) { bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->sc_irq), sc->sc_irq); pci_release_msi(dev); } if (sc->sc_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->sc_mem), sc->sc_mem); } static int iwm_attach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; int error; int txq_i, i; sc->sc_dev = dev; IWM_LOCK_INIT(sc); mbufq_init(&sc->sc_snd, ifqmaxlen); callout_init_mtx(&sc->sc_watchdog_to, &sc->sc_mtx, 0); callout_init_mtx(&sc->sc_led_blink_to, &sc->sc_mtx, 0); TASK_INIT(&sc->sc_es_task, 0, iwm_endscan_cb, sc); /* PCI attach */ error = iwm_pci_attach(dev); if (error != 0) goto fail; sc->sc_wantresp = -1; /* Check device type */ error = iwm_dev_check(dev); if (error != 0) goto fail; /* * We now start fiddling with the hardware */ /* * In the 8000 HW family the format of the 4 bytes of CSR_HW_REV have * changed, and now the revision step also includes bit 0-1 (no more * "dash" value). To keep hw_rev backwards compatible - we'll store it * in the old format. */ if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) sc->sc_hw_rev = (sc->sc_hw_rev & 0xfff0) | (IWM_CSR_HW_REV_STEP(sc->sc_hw_rev << 2) << 2); if (iwm_prepare_card_hw(sc) != 0) { device_printf(dev, "could not initialize hardware\n"); goto fail; } if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) { int ret; uint32_t hw_step; /* * In order to recognize C step the driver should read the * chip version id located at the AUX bus MISC address. */ IWM_SETBITS(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_INIT_DONE); DELAY(2); ret = iwm_poll_bit(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, 25000); if (ret < 0) { device_printf(sc->sc_dev, "Failed to wake up the nic\n"); goto fail; } if (iwm_nic_lock(sc)) { hw_step = iwm_read_prph(sc, IWM_WFPM_CTRL_REG); hw_step |= IWM_ENABLE_WFPM; iwm_write_prph(sc, IWM_WFPM_CTRL_REG, hw_step); hw_step = iwm_read_prph(sc, IWM_AUX_MISC_REG); hw_step = (hw_step >> IWM_HW_STEP_LOCATION_BITS) & 0xF; if (hw_step == 0x3) sc->sc_hw_rev = (sc->sc_hw_rev & 0xFFFFFFF3) | (IWM_SILICON_C_STEP << 2); iwm_nic_unlock(sc); } else { device_printf(sc->sc_dev, "Failed to lock the nic\n"); goto fail; } } /* Allocate DMA memory for firmware transfers. */ if ((error = iwm_alloc_fwmem(sc)) != 0) { device_printf(dev, "could not allocate memory for firmware\n"); goto fail; } /* Allocate "Keep Warm" page. */ if ((error = iwm_alloc_kw(sc)) != 0) { device_printf(dev, "could not allocate keep warm page\n"); goto fail; } /* We use ICT interrupts */ if ((error = iwm_alloc_ict(sc)) != 0) { device_printf(dev, "could not allocate ICT table\n"); goto fail; } /* Allocate TX scheduler "rings". */ if ((error = iwm_alloc_sched(sc)) != 0) { device_printf(dev, "could not allocate TX scheduler rings\n"); goto fail; } /* Allocate TX rings */ for (txq_i = 0; txq_i < nitems(sc->txq); txq_i++) { if ((error = iwm_alloc_tx_ring(sc, &sc->txq[txq_i], txq_i)) != 0) { device_printf(dev, "could not allocate TX ring %d\n", txq_i); goto fail; } } /* Allocate RX ring. */ if ((error = iwm_alloc_rx_ring(sc, &sc->rxq)) != 0) { device_printf(dev, "could not allocate RX ring\n"); goto fail; } /* Clear pending interrupts. */ IWM_WRITE(sc, IWM_CSR_INT, 0xffffffff); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(sc->sc_dev); ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ ic->ic_opmode = IEEE80211_M_STA; /* default to BSS mode */ /* Set device capabilities. */ ic->ic_caps = IEEE80211_C_STA | IEEE80211_C_WPA | /* WPA/RSN */ IEEE80211_C_WME | IEEE80211_C_SHSLOT | /* short slot time supported */ IEEE80211_C_SHPREAMBLE /* short preamble supported */ // IEEE80211_C_BGSCAN /* capable of bg scanning */ ; for (i = 0; i < nitems(sc->sc_phyctxt); i++) { sc->sc_phyctxt[i].id = i; sc->sc_phyctxt[i].color = 0; sc->sc_phyctxt[i].ref = 0; sc->sc_phyctxt[i].channel = NULL; } /* Max RSSI */ sc->sc_max_rssi = IWM_MAX_DBM - IWM_MIN_DBM; sc->sc_preinit_hook.ich_func = iwm_preinit; sc->sc_preinit_hook.ich_arg = sc; if (config_intrhook_establish(&sc->sc_preinit_hook) != 0) { device_printf(dev, "config_intrhook_establish failed\n"); goto fail; } #ifdef IWM_DEBUG SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &sc->sc_debug, 0, "control debugging"); #endif IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "<-%s\n", __func__); return 0; /* Free allocated memory if something failed during attachment. */ fail: iwm_detach_local(sc, 0); return ENXIO; } static int iwm_is_valid_ether_addr(uint8_t *addr) { char zero_addr[IEEE80211_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || IEEE80211_ADDR_EQ(zero_addr, addr)) return (FALSE); return (TRUE); } static int iwm_update_edca(struct ieee80211com *ic) { struct iwm_softc *sc = ic->ic_softc; device_printf(sc->sc_dev, "%s: called\n", __func__); return (0); } static void iwm_preinit(void *arg) { struct iwm_softc *sc = arg; device_t dev = sc->sc_dev; struct ieee80211com *ic = &sc->sc_ic; int error; IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s\n", __func__); IWM_LOCK(sc); if ((error = iwm_start_hw(sc)) != 0) { device_printf(dev, "could not initialize hardware\n"); IWM_UNLOCK(sc); goto fail; } error = iwm_run_init_mvm_ucode(sc, 1); iwm_stop_device(sc); if (error) { IWM_UNLOCK(sc); goto fail; } device_printf(dev, "hw rev 0x%x, fw ver %s, address %s\n", sc->sc_hw_rev & IWM_CSR_HW_REV_TYPE_MSK, sc->sc_fwver, ether_sprintf(sc->sc_nvm.hw_addr)); /* not all hardware can do 5GHz band */ if (!sc->sc_nvm.sku_cap_band_52GHz_enable) memset(&ic->ic_sup_rates[IEEE80211_MODE_11A], 0, sizeof(ic->ic_sup_rates[IEEE80211_MODE_11A])); IWM_UNLOCK(sc); iwm_init_channel_map(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); /* * At this point we've committed - if we fail to do setup, * we now also have to tear down the net80211 state. */ ieee80211_ifattach(ic); ic->ic_vap_create = iwm_vap_create; ic->ic_vap_delete = iwm_vap_delete; ic->ic_raw_xmit = iwm_raw_xmit; ic->ic_node_alloc = iwm_node_alloc; ic->ic_scan_start = iwm_scan_start; ic->ic_scan_end = iwm_scan_end; ic->ic_update_mcast = iwm_update_mcast; ic->ic_getradiocaps = iwm_init_channel_map; ic->ic_set_channel = iwm_set_channel; ic->ic_scan_curchan = iwm_scan_curchan; ic->ic_scan_mindwell = iwm_scan_mindwell; ic->ic_wme.wme_update = iwm_update_edca; ic->ic_parent = iwm_parent; ic->ic_transmit = iwm_transmit; iwm_radiotap_attach(sc); if (bootverbose) ieee80211_announce(ic); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "<-%s\n", __func__); config_intrhook_disestablish(&sc->sc_preinit_hook); return; fail: config_intrhook_disestablish(&sc->sc_preinit_hook); iwm_detach_local(sc, 0); } /* * Attach the interface to 802.11 radiotap. */ static void iwm_radiotap_attach(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s begin\n", __func__); ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), IWM_TX_RADIOTAP_PRESENT, &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap), IWM_RX_RADIOTAP_PRESENT); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s end\n", __func__); } static struct ieee80211vap * iwm_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct iwm_vap *ivp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; ivp = malloc(sizeof(struct iwm_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &ivp->iv_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); vap->iv_bmissthreshold = 10; /* override default */ /* Override with driver methods. */ ivp->iv_newstate = vap->iv_newstate; vap->iv_newstate = iwm_newstate; ieee80211_ratectl_init(vap); /* Complete setup. */ ieee80211_vap_attach(vap, iwm_media_change, ieee80211_media_status, mac); ic->ic_opmode = opmode; return vap; } static void iwm_vap_delete(struct ieee80211vap *vap) { struct iwm_vap *ivp = IWM_VAP(vap); ieee80211_ratectl_deinit(vap); ieee80211_vap_detach(vap); free(ivp, M_80211_VAP); } static void iwm_scan_start(struct ieee80211com *ic) { struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwm_softc *sc = ic->ic_softc; int error; IWM_LOCK(sc); if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_UMAC_SCAN)) error = iwm_mvm_umac_scan(sc); else error = iwm_mvm_lmac_scan(sc); if (error != 0) { device_printf(sc->sc_dev, "could not initiate 2 GHz scan\n"); IWM_UNLOCK(sc); ieee80211_cancel_scan(vap); } else { iwm_led_blink_start(sc); IWM_UNLOCK(sc); } } static void iwm_scan_end(struct ieee80211com *ic) { struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwm_softc *sc = ic->ic_softc; IWM_LOCK(sc); iwm_led_blink_stop(sc); if (vap->iv_state == IEEE80211_S_RUN) iwm_mvm_led_enable(sc); IWM_UNLOCK(sc); } static void iwm_update_mcast(struct ieee80211com *ic) { } static void iwm_set_channel(struct ieee80211com *ic) { } static void iwm_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { } static void iwm_scan_mindwell(struct ieee80211_scan_state *ss) { return; } void iwm_init_task(void *arg1) { struct iwm_softc *sc = arg1; IWM_LOCK(sc); while (sc->sc_flags & IWM_FLAG_BUSY) msleep(&sc->sc_flags, &sc->sc_mtx, 0, "iwmpwr", 0); sc->sc_flags |= IWM_FLAG_BUSY; iwm_stop(sc); if (sc->sc_ic.ic_nrunning > 0) iwm_init(sc); sc->sc_flags &= ~IWM_FLAG_BUSY; wakeup(&sc->sc_flags); IWM_UNLOCK(sc); } static int iwm_resume(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); int do_reinit = 0; uint16_t reg; /* Clear device-specific "PCI retry timeout" register (41h). */ reg = pci_read_config(dev, 0x40, sizeof(reg)); pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg)); iwm_init_task(device_get_softc(dev)); IWM_LOCK(sc); if (sc->sc_flags & IWM_FLAG_SCANNING) { sc->sc_flags &= ~IWM_FLAG_SCANNING; do_reinit = 1; } IWM_UNLOCK(sc); if (do_reinit) ieee80211_resume_all(&sc->sc_ic); return 0; } static int iwm_suspend(device_t dev) { int do_stop = 0; struct iwm_softc *sc = device_get_softc(dev); do_stop = !! (sc->sc_ic.ic_nrunning > 0); ieee80211_suspend_all(&sc->sc_ic); if (do_stop) { IWM_LOCK(sc); iwm_stop(sc); sc->sc_flags |= IWM_FLAG_SCANNING; IWM_UNLOCK(sc); } return (0); } static int iwm_detach_local(struct iwm_softc *sc, int do_net80211) { struct iwm_fw_info *fw = &sc->sc_fw; device_t dev = sc->sc_dev; int i; ieee80211_draintask(&sc->sc_ic, &sc->sc_es_task); callout_drain(&sc->sc_led_blink_to); callout_drain(&sc->sc_watchdog_to); iwm_stop_device(sc); if (do_net80211) { ieee80211_ifdetach(&sc->sc_ic); } iwm_phy_db_free(sc); /* Free descriptor rings */ iwm_free_rx_ring(sc, &sc->rxq); for (i = 0; i < nitems(sc->txq); i++) iwm_free_tx_ring(sc, &sc->txq[i]); /* Free firmware */ if (fw->fw_fp != NULL) iwm_fw_info_free(fw); /* Free scheduler */ iwm_free_sched(sc); if (sc->ict_dma.vaddr != NULL) iwm_free_ict(sc); if (sc->kw_dma.vaddr != NULL) iwm_free_kw(sc); if (sc->fw_dma.vaddr != NULL) iwm_free_fwmem(sc); /* Finished with the hardware - detach things */ iwm_pci_detach(dev); mbufq_drain(&sc->sc_snd); IWM_LOCK_DESTROY(sc); return (0); } static int iwm_detach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); return (iwm_detach_local(sc, 1)); } static device_method_t iwm_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, iwm_probe), DEVMETHOD(device_attach, iwm_attach), DEVMETHOD(device_detach, iwm_detach), DEVMETHOD(device_suspend, iwm_suspend), DEVMETHOD(device_resume, iwm_resume), DEVMETHOD_END }; static driver_t iwm_pci_driver = { "iwm", iwm_pci_methods, sizeof (struct iwm_softc) }; static devclass_t iwm_devclass; DRIVER_MODULE(iwm, pci, iwm_pci_driver, iwm_devclass, NULL, NULL); MODULE_DEPEND(iwm, firmware, 1, 1, 1); MODULE_DEPEND(iwm, pci, 1, 1, 1); MODULE_DEPEND(iwm, wlan, 1, 1, 1); Index: user/alc/PQ_LAUNDRY/sys/dev/ntb/if_ntb/if_ntb.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/ntb/if_ntb/if_ntb.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/ntb/if_ntb/if_ntb.c (revision 303642) @@ -1,517 +1,516 @@ /*- * Copyright (c) 2016 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The Non-Transparent Bridge (NTB) is a device that allows you to connect * two or more systems using a PCI-e links, providing remote memory access. * * This module contains a driver for simulated Ethernet device, using * underlying NTB Transport device. * * NOTE: Much of the code in this module is shared with Linux. Any patches may * be picked up and redistributed in Linux with a dual GPL/BSD license. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../ntb_transport.h" #define KTR_NTB KTR_SPARE3 #define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX) #define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) #define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) #define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ CSUM_PSEUDO_HDR | \ CSUM_IP_CHECKED | CSUM_IP_VALID | \ CSUM_SCTP_VALID) static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb"); static unsigned g_if_ntb_num_queues = UINT_MAX; SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN, &g_if_ntb_num_queues, 0, "Number of queues per interface"); struct ntb_net_queue { struct ntb_net_ctx *sc; if_t ifp; struct ntb_transport_qp *qp; struct buf_ring *br; struct task tx_task; struct taskqueue *tx_tq; struct mtx tx_lock; struct callout queue_full; }; struct ntb_net_ctx { if_t ifp; struct ifmedia media; u_char eaddr[ETHER_ADDR_LEN]; int num_queues; struct ntb_net_queue *queues; int mtu; }; static int ntb_net_probe(device_t dev); static int ntb_net_attach(device_t dev); static int ntb_net_detach(device_t dev); static void ntb_net_init(void *arg); static int ntb_ifmedia_upd(struct ifnet *); static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int ntb_ioctl(if_t ifp, u_long command, caddr_t data); static int ntb_transmit(if_t ifp, struct mbuf *m); static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); static void ntb_net_event_handler(void *data, enum ntb_link_event status); static void ntb_handle_tx(void *arg, int pending); static void ntb_qp_full(void *arg); static void ntb_qflush(if_t ifp); static void create_random_local_eui48(u_char *eaddr); static int ntb_net_probe(device_t dev) { device_set_desc(dev, "NTB Network Interface"); return (0); } static int ntb_net_attach(device_t dev) { struct ntb_net_ctx *sc = device_get_softc(dev); struct ntb_net_queue *q; if_t ifp; struct ntb_queue_handlers handlers = { ntb_net_rx_handler, ntb_net_tx_handler, ntb_net_event_handler }; int i; ifp = sc->ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { printf("ntb: Cannot allocate ifnet structure\n"); return (ENOMEM); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setdev(ifp, dev); sc->num_queues = min(g_if_ntb_num_queues, ntb_transport_queue_count(dev)); sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue), M_DEVBUF, M_WAITOK | M_ZERO); sc->mtu = INT_MAX; for (i = 0; i < sc->num_queues; i++) { q = &sc->queues[i]; q->sc = sc; q->ifp = ifp; q->qp = ntb_transport_create_queue(dev, i, &handlers, q); if (q->qp == NULL) break; sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp)); mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF); q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock); TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q); q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT, taskqueue_thread_enqueue, &q->tx_tq); taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d", device_get_nameunit(dev), i); callout_init(&q->queue_full, 1); } sc->num_queues = i; device_printf(dev, "%d queue(s)\n", sc->num_queues); if_setinitfn(ifp, ntb_net_init); if_setsoftc(ifp, sc); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, ntb_ioctl); if_settransmitfn(ifp, ntb_transmit); if_setqflushfn(ifp, ntb_qflush); create_random_local_eui48(sc->eaddr); ether_ifattach(ifp, sc->eaddr); if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN); ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd, ntb_ifmedia_sts); ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL); ifmedia_set(&sc->media, NTB_MEDIATYPE); for (i = 0; i < sc->num_queues; i++) ntb_transport_link_up(sc->queues[i].qp); return (0); } static int ntb_net_detach(device_t dev) { struct ntb_net_ctx *sc = device_get_softc(dev); struct ntb_net_queue *q; int i; for (i = 0; i < sc->num_queues; i++) ntb_transport_link_down(sc->queues[i].qp); ether_ifdetach(sc->ifp); if_free(sc->ifp); ifmedia_removeall(&sc->media); for (i = 0; i < sc->num_queues; i++) { q = &sc->queues[i]; ntb_transport_free_queue(q->qp); buf_ring_free(q->br, M_DEVBUF); callout_drain(&q->queue_full); taskqueue_drain_all(q->tx_tq); mtx_destroy(&q->tx_lock); } free(sc->queues, M_DEVBUF); return (0); } /* Network device interface */ static void ntb_net_init(void *arg) { struct ntb_net_ctx *sc = arg; if_t ifp = sc->ifp; if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ? LINK_STATE_UP : LINK_STATE_DOWN); } static int ntb_ioctl(if_t ifp, u_long command, caddr_t data) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; int error = 0; switch (command) { case SIOCSIFMTU: { if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) { error = EINVAL; break; } if_setmtu(ifp, ifr->ifr_mtu); break; } case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; case SIOCSIFCAP: if (ifr->ifr_reqcap & IFCAP_RXCSUM) if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); else if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); if (ifr->ifr_reqcap & IFCAP_TXCSUM) { if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0); } else { if_setcapenablebit(ifp, 0, IFCAP_TXCSUM); if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES); } if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6) if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); else if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) { if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0); } else { if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6); if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6); } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int ntb_ifmedia_upd(struct ifnet *ifp) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ifmedia *ifm = &sc->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } static void ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct ntb_net_ctx *sc = if_getsoftc(ifp); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = NTB_MEDIATYPE; if (ntb_transport_link_query(sc->queues[0].qp)) ifmr->ifm_status |= IFM_ACTIVE; } static void ntb_transmit_locked(struct ntb_net_queue *q) { if_t ifp = q->ifp; struct mbuf *m; int rc, len; short mflags; CTR0(KTR_NTB, "TX: ntb_transmit_locked"); while ((m = drbr_peek(ifp, q->br)) != NULL) { CTR1(KTR_NTB, "TX: start mbuf %p", m); if_etherbpfmtap(ifp, m); len = m->m_pkthdr.len; mflags = m->m_flags; rc = ntb_transport_tx_enqueue(q->qp, m, m, len); if (rc != 0) { CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc); if (rc == EAGAIN) { drbr_putback(ifp, q->br, m); callout_reset_sbt(&q->queue_full, SBT_1MS / 4, SBT_1MS / 4, ntb_qp_full, q, 0); } else { m_freem(m); drbr_advance(ifp, q->br); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } break; } drbr_advance(ifp, q->br); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mflags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } } static int ntb_transmit(if_t ifp, struct mbuf *m) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ntb_net_queue *q; int error, i; CTR0(KTR_NTB, "TX: ntb_transmit"); if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % sc->num_queues; else i = curcpu % sc->num_queues; q = &sc->queues[i]; error = drbr_enqueue(ifp, q->br, m); if (error) return (error); if (mtx_trylock(&q->tx_lock)) { ntb_transmit_locked(q); mtx_unlock(&q->tx_lock); } else taskqueue_enqueue(q->tx_tq, &q->tx_task); return (0); } static void ntb_handle_tx(void *arg, int pending) { struct ntb_net_queue *q = arg; mtx_lock(&q->tx_lock); ntb_transmit_locked(q); mtx_unlock(&q->tx_lock); } static void ntb_qp_full(void *arg) { struct ntb_net_queue *q = arg; CTR0(KTR_NTB, "TX: qp_full callout"); if (ntb_transport_tx_free_entry(q->qp) > 0) taskqueue_enqueue(q->tx_tq, &q->tx_task); else callout_schedule_sbt(&q->queue_full, SBT_1MS / 4, SBT_1MS / 4, 0); } static void ntb_qflush(if_t ifp) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ntb_net_queue *q; struct mbuf *m; int i; for (i = 0; i < sc->num_queues; i++) { q = &sc->queues[i]; mtx_lock(&q->tx_lock); while ((m = buf_ring_dequeue_sc(q->br)) != NULL) m_freem(m); mtx_unlock(&q->tx_lock); } if_qflush(ifp); } /* Network Device Callbacks */ static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { m_freem(data); CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data); } static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { struct ntb_net_queue *q = qp_data; struct ntb_net_ctx *sc = q->sc; struct mbuf *m = data; if_t ifp = q->ifp; uint16_t proto; CTR1(KTR_NTB, "RX: rx handler (%d)", len); if (len < 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.rcvif = ifp; if (sc->num_queues > 1) { m->m_pkthdr.flowid = q - sc->queues; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); } if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { m_copydata(m, 12, 2, (void *)&proto); switch (ntohs(proto)) { case ETHERTYPE_IP: if (if_getcapenable(ifp) & IFCAP_RXCSUM) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = NTB_CSUM_SET; } break; case ETHERTYPE_IPV6: if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = NTB_CSUM_SET; } break; } } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_input(ifp, m); } static void ntb_net_event_handler(void *data, enum ntb_link_event status) { struct ntb_net_queue *q = data; int new_state; switch (status) { case NTB_LINK_DOWN: new_state = LINK_STATE_DOWN; break; case NTB_LINK_UP: new_state = LINK_STATE_UP; break; default: new_state = LINK_STATE_UNKNOWN; break; } if_link_state_change(q->ifp, new_state); } /* Helper functions */ /* TODO: This too should really be part of the kernel */ #define EUI48_MULTICAST 1 << 0 #define EUI48_LOCALLY_ADMINISTERED 1 << 1 static void create_random_local_eui48(u_char *eaddr) { static uint8_t counter = 0; - uint32_t seed = ticks; eaddr[0] = EUI48_LOCALLY_ADMINISTERED; - memcpy(&eaddr[1], &seed, sizeof(uint32_t)); + arc4rand(&eaddr[1], 4, 0); eaddr[5] = counter++; } static device_method_t ntb_net_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ntb_net_probe), DEVMETHOD(device_attach, ntb_net_attach), DEVMETHOD(device_detach, ntb_net_detach), DEVMETHOD_END }; devclass_t ntb_net_devclass; static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods, sizeof(struct ntb_net_ctx)); DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass, NULL, NULL); MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1); MODULE_VERSION(if_ntb, 1); Index: user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb.c (revision 303642) @@ -1,462 +1,462 @@ /*- * Copyright (c) 2016 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "ntb.h" devclass_t ntb_hw_devclass; SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls"); struct ntb_child { device_t dev; int enabled; int mwoff; int mwcnt; int spadoff; int spadcnt; int dboff; int dbmask; void *ctx; const struct ntb_ctx_ops *ctx_ops; struct rmlock ctx_lock; struct ntb_child *next; }; int ntb_register_device(device_t dev) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; int i, mw, mwu, mwt, spad, spadu, spadt, db, dbu, dbt; char cfg[128] = ""; char buf[32]; char *n, *np, *c, *p, *name; mwu = 0; mwt = NTB_MW_COUNT(dev); spadu = 0; spadt = NTB_SPAD_COUNT(dev); dbu = 0; dbt = flsll(NTB_DB_VALID_MASK(dev)); device_printf(dev, "%d memory windows, %d scratchpads, " "%d doorbells\n", mwt, spadt, dbt); snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev), device_get_unit(dev)); TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg)); n = cfg; i = 0; while ((c = strsep(&n, ",")) != NULL) { np = c; name = strsep(&np, ":"); if (name != NULL && name[0] == 0) name = NULL; p = strsep(&np, ":"); mw = (p && p[0] != 0) ? strtol(p, NULL, 10) : mwt - mwu; p = strsep(&np, ":"); spad = (p && p[0] != 0) ? strtol(p, NULL, 10) : spadt - spadu; db = (np && np[0] != 0) ? strtol(np, NULL, 10) : dbt - dbu; if (mw > mwt - mwu || spad > spadt - spadu || db > dbt - dbu) { device_printf(dev, "Not enough resources for config\n"); break; } nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO); nc->mwoff = mwu; nc->mwcnt = mw; nc->spadoff = spadu; nc->spadcnt = spad; nc->dboff = dbu; nc->dbmask = (db == 0) ? 0 : (0xffffffffffffffff >> (64 - db)); rm_init(&nc->ctx_lock, "ntb ctx"); nc->dev = device_add_child(dev, name, -1); if (nc->dev == NULL) { ntb_unregister_device(dev); return (ENOMEM); } device_set_ivars(nc->dev, nc); *cpp = nc; cpp = &nc->next; if (bootverbose) { device_printf(dev, "%d \"%s\":", i, name); if (mw > 0) { printf(" memory windows %d", mwu); if (mw > 1) printf("-%d", mwu + mw - 1); } if (spad > 0) { printf(" scratchpads %d", spadu); if (spad > 1) printf("-%d", spadu + spad - 1); } if (db > 0) { printf(" doorbells %d", dbu); if (db > 1) printf("-%d", dbu + db - 1); } printf("\n"); } mwu += mw; spadu += spad; dbu += db; i++; } bus_generic_attach(dev); return (0); } int ntb_unregister_device(device_t dev) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; int error = 0; while ((nc = *cpp) != NULL) { *cpp = (*cpp)->next; error = device_delete_child(dev, nc->dev); if (error) break; rm_destroy(&nc->ctx_lock); free(nc, M_DEVBUF); } return (error); } void ntb_link_event(device_t dev) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; struct rm_priotracker ctx_tracker; for (nc = *cpp; nc != NULL; nc = nc->next) { rm_rlock(&nc->ctx_lock, &ctx_tracker); if (nc->ctx_ops != NULL && nc->ctx_ops->link_event != NULL) nc->ctx_ops->link_event(nc->ctx); rm_runlock(&nc->ctx_lock, &ctx_tracker); } } void ntb_db_event(device_t dev, uint32_t vec) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; struct rm_priotracker ctx_tracker; for (nc = *cpp; nc != NULL; nc = nc->next) { rm_rlock(&nc->ctx_lock, &ctx_tracker); if (nc->ctx_ops != NULL && nc->ctx_ops->db_event != NULL) nc->ctx_ops->db_event(nc->ctx, vec); rm_runlock(&nc->ctx_lock, &ctx_tracker); } } bool ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width) { return (NTB_LINK_IS_UP(device_get_parent(ntb), speed, width)); } int ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width) { struct ntb_child *nc = device_get_ivars(ntb); struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev)); struct ntb_child *nc1; - for (nc1 = *cpp; nc1 != NULL; nc1 = nc->next) { + for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) { if (nc1->enabled) { nc->enabled = 1; return (0); } } nc->enabled = 1; return (NTB_LINK_ENABLE(device_get_parent(ntb), speed, width)); } int ntb_link_disable(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev)); struct ntb_child *nc1; if (!nc->enabled) return (0); nc->enabled = 0; - for (nc1 = *cpp; nc1 != NULL; nc1 = nc->next) { + for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) { if (nc1->enabled) return (0); } return (NTB_LINK_DISABLE(device_get_parent(ntb))); } bool ntb_link_enabled(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->enabled && NTB_LINK_ENABLED(device_get_parent(ntb))); } int ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops) { struct ntb_child *nc = device_get_ivars(ntb); if (ctx == NULL || ctx_ops == NULL) return (EINVAL); rm_wlock(&nc->ctx_lock); if (nc->ctx_ops != NULL) { rm_wunlock(&nc->ctx_lock); return (EINVAL); } nc->ctx = ctx; nc->ctx_ops = ctx_ops; rm_wunlock(&nc->ctx_lock); return (0); } void * ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops) { struct ntb_child *nc = device_get_ivars(ntb); KASSERT(nc->ctx != NULL && nc->ctx_ops != NULL, ("bogus")); if (ctx_ops != NULL) *ctx_ops = nc->ctx_ops; return (nc->ctx); } void ntb_clear_ctx(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); rm_wlock(&nc->ctx_lock); nc->ctx = NULL; nc->ctx_ops = NULL; rm_wunlock(&nc->ctx_lock); } uint8_t ntb_mw_count(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->mwcnt); } int ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_GET_RANGE(device_get_parent(ntb), mw_idx + nc->mwoff, base, vbase, size, align, align_size, plimit)); } int ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_SET_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff, addr, size)); } int ntb_mw_clear_trans(device_t ntb, unsigned mw_idx) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_CLEAR_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff)); } int ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_GET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode)); } int ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_SET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode)); } uint8_t ntb_spad_count(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->spadcnt); } void ntb_spad_clear(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); unsigned i; for (i = 0; i < nc->spadcnt; i++) NTB_SPAD_WRITE(device_get_parent(ntb), i + nc->spadoff, 0); } int ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val)); } int ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val)); } int ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_PEER_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val)); } int ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_PEER_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val)); } uint64_t ntb_db_valid_mask(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->dbmask); } int ntb_db_vector_count(device_t ntb) { return (NTB_DB_VECTOR_COUNT(device_get_parent(ntb))); } uint64_t ntb_db_vector_mask(device_t ntb, uint32_t vector) { struct ntb_child *nc = device_get_ivars(ntb); return ((NTB_DB_VECTOR_MASK(device_get_parent(ntb), vector) >> nc->dboff) & nc->dbmask); } int ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size) { return (NTB_PEER_DB_ADDR(device_get_parent(ntb), db_addr, db_size)); } void ntb_db_clear(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_DB_CLEAR(device_get_parent(ntb), bits << nc->dboff)); } void ntb_db_clear_mask(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_DB_CLEAR_MASK(device_get_parent(ntb), bits << nc->dboff)); } uint64_t ntb_db_read(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return ((NTB_DB_READ(device_get_parent(ntb)) >> nc->dboff) & nc->dbmask); } void ntb_db_set_mask(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_DB_SET_MASK(device_get_parent(ntb), bits << nc->dboff)); } void ntb_peer_db_set(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_PEER_DB_SET(device_get_parent(ntb), bits << nc->dboff)); } MODULE_VERSION(ntb, 1); Index: user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb_hw/ntb_hw.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb_hw/ntb_hw.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb_hw/ntb_hw.c (revision 303642) @@ -1,3095 +1,3114 @@ /*- * Copyright (c) 2016 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The Non-Transparent Bridge (NTB) is a device that allows you to connect * two or more systems using a PCI-e links, providing remote memory access. * * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs. * * NOTE: Much of the code in this module is shared with Linux. Any patches may * be picked up and redistributed in Linux with a dual GPL/BSD license. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ntb_regs.h" #include "../ntb.h" #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT) #define NTB_HB_TIMEOUT 1 /* second */ #define ATOM_LINK_RECOVERY_TIME 500 /* ms */ #define BAR_HIGH_MASK (~((1ull << 12) - 1)) #define NTB_MSIX_VER_GUARD 0xaabbccdd #define NTB_MSIX_RECEIVED 0xe0f0e0f0 /* * PCI constants could be somewhere more generic, but aren't defined/used in * pci.c. */ #define PCI_MSIX_ENTRY_SIZE 16 #define PCI_MSIX_ENTRY_LOWER_ADDR 0 #define PCI_MSIX_ENTRY_UPPER_ADDR 4 #define PCI_MSIX_ENTRY_DATA 8 enum ntb_device_type { NTB_XEON, NTB_ATOM }; /* ntb_conn_type are hardware numbers, cannot change. */ enum ntb_conn_type { NTB_CONN_TRANSPARENT = 0, NTB_CONN_B2B = 1, NTB_CONN_RP = 2, }; enum ntb_b2b_direction { NTB_DEV_USD = 0, NTB_DEV_DSD = 1, }; enum ntb_bar { NTB_CONFIG_BAR = 0, NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3, NTB_MAX_BARS }; enum { NTB_MSIX_GUARD = 0, NTB_MSIX_DATA0, NTB_MSIX_DATA1, NTB_MSIX_DATA2, NTB_MSIX_OFS0, NTB_MSIX_OFS1, NTB_MSIX_OFS2, NTB_MSIX_DONE, NTB_MAX_MSIX_SPAD }; /* Device features and workarounds */ #define HAS_FEATURE(ntb, feature) \ (((ntb)->features & (feature)) != 0) struct ntb_hw_info { uint32_t device_id; const char *desc; enum ntb_device_type type; uint32_t features; }; struct ntb_pci_bar_info { bus_space_tag_t pci_bus_tag; bus_space_handle_t pci_bus_handle; int pci_resource_id; struct resource *pci_resource; vm_paddr_t pbase; caddr_t vbase; vm_size_t size; vm_memattr_t map_mode; /* Configuration register offsets */ uint32_t psz_off; uint32_t ssz_off; uint32_t pbarxlat_off; }; struct ntb_int_info { struct resource *res; int rid; void *tag; }; struct ntb_vec { struct ntb_softc *ntb; uint32_t num; unsigned masked; }; struct ntb_reg { uint32_t ntb_ctl; uint32_t lnk_sta; uint8_t db_size; unsigned mw_bar[NTB_MAX_BARS]; }; struct ntb_alt_reg { uint32_t db_bell; uint32_t db_mask; uint32_t spad; }; struct ntb_xlat_reg { uint32_t bar0_base; uint32_t bar2_base; uint32_t bar4_base; uint32_t bar5_base; uint32_t bar2_xlat; uint32_t bar4_xlat; uint32_t bar5_xlat; uint32_t bar2_limit; uint32_t bar4_limit; uint32_t bar5_limit; }; struct ntb_b2b_addr { uint64_t bar0_addr; uint64_t bar2_addr64; uint64_t bar4_addr64; uint64_t bar4_addr32; uint64_t bar5_addr32; }; struct ntb_msix_data { uint32_t nmd_ofs; uint32_t nmd_data; }; struct ntb_softc { /* ntb.c context. Do not move! Must go first! */ void *ntb_store; device_t device; enum ntb_device_type type; uint32_t features; struct ntb_pci_bar_info bar_info[NTB_MAX_BARS]; struct ntb_int_info int_info[MAX_MSIX_INTERRUPTS]; uint32_t allocated_interrupts; struct ntb_msix_data peer_msix_data[XEON_NONLINK_DB_MSIX_BITS]; struct ntb_msix_data msix_data[XEON_NONLINK_DB_MSIX_BITS]; bool peer_msix_good; bool peer_msix_done; struct ntb_pci_bar_info *peer_lapic_bar; struct callout peer_msix_work; struct callout heartbeat_timer; struct callout lr_timer; struct ntb_vec *msix_vec; uint32_t ppd; enum ntb_conn_type conn_type; enum ntb_b2b_direction dev_type; /* Offset of peer bar0 in B2B BAR */ uint64_t b2b_off; /* Memory window used to access peer bar0 */ #define B2B_MW_DISABLED UINT8_MAX uint8_t b2b_mw_idx; uint32_t msix_xlat; uint8_t msix_mw_idx; uint8_t mw_count; uint8_t spad_count; uint8_t db_count; uint8_t db_vec_count; uint8_t db_vec_shift; /* Protects local db_mask. */ #define DB_MASK_LOCK(sc) mtx_lock_spin(&(sc)->db_mask_lock) #define DB_MASK_UNLOCK(sc) mtx_unlock_spin(&(sc)->db_mask_lock) #define DB_MASK_ASSERT(sc,f) mtx_assert(&(sc)->db_mask_lock, (f)) struct mtx db_mask_lock; volatile uint32_t ntb_ctl; volatile uint32_t lnk_sta; uint64_t db_valid_mask; uint64_t db_link_mask; uint64_t db_mask; uint64_t fake_db_bell; /* NTB_SB01BASE_LOCKUP*/ int last_ts; /* ticks @ last irq */ const struct ntb_reg *reg; const struct ntb_alt_reg *self_reg; const struct ntb_alt_reg *peer_reg; const struct ntb_xlat_reg *xlat_reg; }; #ifdef __i386__ static __inline uint64_t bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { return (bus_space_read_4(tag, handle, offset) | ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32); } static __inline void bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset, uint64_t val) { bus_space_write_4(tag, handle, offset, val); bus_space_write_4(tag, handle, offset + 4, val >> 32); } #endif #define intel_ntb_bar_read(SIZE, bar, offset) \ bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \ ntb->bar_info[(bar)].pci_bus_handle, (offset)) #define intel_ntb_bar_write(SIZE, bar, offset, val) \ bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \ ntb->bar_info[(bar)].pci_bus_handle, (offset), (val)) #define intel_ntb_reg_read(SIZE, offset) \ intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset) #define intel_ntb_reg_write(SIZE, offset, val) \ intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val) #define intel_ntb_mw_read(SIZE, offset) \ intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \ offset) #define intel_ntb_mw_write(SIZE, offset, val) \ intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \ offset, val) static int intel_ntb_probe(device_t device); static int intel_ntb_attach(device_t device); static int intel_ntb_detach(device_t device); static uint64_t intel_ntb_db_valid_mask(device_t dev); static void intel_ntb_spad_clear(device_t dev); static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector); static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width); static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed, enum ntb_width width); static int intel_ntb_link_disable(device_t dev); static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val); static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val); static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx); static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw); static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar); static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar, uint32_t *base, uint32_t *xlat, uint32_t *lmt); static int intel_ntb_map_pci_bars(struct ntb_softc *ntb); static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx, vm_memattr_t); static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *, const char *); static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar); static int map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar); static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb); static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail); static int intel_ntb_init_isr(struct ntb_softc *ntb); static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb); static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors); static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb); static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector); static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec); static void ndev_vec_isr(void *arg); static void ndev_irq_isr(void *arg); static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff); static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t); static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t); static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors); static void intel_ntb_free_msix_vec(struct ntb_softc *ntb); static void intel_ntb_get_msix_info(struct ntb_softc *ntb); static void intel_ntb_exchange_msix(void *); static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id); static void intel_ntb_detect_max_mw(struct ntb_softc *ntb); static int intel_ntb_detect_xeon(struct ntb_softc *ntb); static int intel_ntb_detect_atom(struct ntb_softc *ntb); static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb); static int intel_ntb_atom_init_dev(struct ntb_softc *ntb); static void intel_ntb_teardown_xeon(struct ntb_softc *ntb); static void configure_atom_secondary_side_bars(struct ntb_softc *ntb); static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx, enum ntb_bar regbar); static void xeon_set_sbar_base_and_limit(struct ntb_softc *, uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar); static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr, enum ntb_bar idx); static int xeon_setup_b2b_mw(struct ntb_softc *, const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr); static inline bool link_is_up(struct ntb_softc *ntb); static inline bool _xeon_link_is_up(struct ntb_softc *ntb); static inline bool atom_link_is_err(struct ntb_softc *ntb); static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *); static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *); static void atom_link_hb(void *arg); static void recover_atom_link(void *arg); static bool intel_ntb_poll_link(struct ntb_softc *ntb); static void save_bar_parameters(struct ntb_pci_bar_info *bar); static void intel_ntb_sysctl_init(struct ntb_softc *); static int sysctl_handle_features(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS); static int sysctl_handle_register(SYSCTL_HANDLER_ARGS); static unsigned g_ntb_hw_debug_level; SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose"); #define intel_ntb_printf(lvl, ...) do { \ if ((lvl) <= g_ntb_hw_debug_level) { \ device_printf(ntb->device, __VA_ARGS__); \ } \ } while (0) #define _NTB_PAT_UC 0 #define _NTB_PAT_WC 1 #define _NTB_PAT_WT 4 #define _NTB_PAT_WP 5 #define _NTB_PAT_WB 6 #define _NTB_PAT_UCM 7 static unsigned g_ntb_mw_pat = _NTB_PAT_UC; SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN, &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): " "UC: " __XSTRING(_NTB_PAT_UC) ", " "WC: " __XSTRING(_NTB_PAT_WC) ", " "WT: " __XSTRING(_NTB_PAT_WT) ", " "WP: " __XSTRING(_NTB_PAT_WP) ", " "WB: " __XSTRING(_NTB_PAT_WB) ", " "UC-: " __XSTRING(_NTB_PAT_UCM)); static inline vm_memattr_t intel_ntb_pat_flags(void) { switch (g_ntb_mw_pat) { case _NTB_PAT_WC: return (VM_MEMATTR_WRITE_COMBINING); case _NTB_PAT_WT: return (VM_MEMATTR_WRITE_THROUGH); case _NTB_PAT_WP: return (VM_MEMATTR_WRITE_PROTECTED); case _NTB_PAT_WB: return (VM_MEMATTR_WRITE_BACK); case _NTB_PAT_UCM: return (VM_MEMATTR_WEAK_UNCACHEABLE); case _NTB_PAT_UC: /* FALLTHROUGH */ default: return (VM_MEMATTR_UNCACHEABLE); } } /* * Well, this obviously doesn't belong here, but it doesn't seem to exist * anywhere better yet. */ static inline const char * intel_ntb_vm_memattr_to_str(vm_memattr_t pat) { switch (pat) { case VM_MEMATTR_WRITE_COMBINING: return ("WRITE_COMBINING"); case VM_MEMATTR_WRITE_THROUGH: return ("WRITE_THROUGH"); case VM_MEMATTR_WRITE_PROTECTED: return ("WRITE_PROTECTED"); case VM_MEMATTR_WRITE_BACK: return ("WRITE_BACK"); case VM_MEMATTR_WEAK_UNCACHEABLE: return ("UNCACHED"); case VM_MEMATTR_UNCACHEABLE: return ("UNCACHEABLE"); default: return ("UNKNOWN"); } } static int g_ntb_msix_idx = 1; SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx, 0, "Use this memory window to access the peer MSIX message complex on " "certain Xeon-based NTB systems, as a workaround for a hardware errata. " "Like b2b_mw_idx, negative values index from the last available memory " "window. (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)"); static int g_ntb_mw_idx = -1; SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx, 0, "Use this memory window to access the peer NTB registers. A " "non-negative value starts from the first MW index; a negative value " "starts from the last MW index. The default is -1, i.e., the last " "available memory window. Both sides of the NTB MUST set the same " "value here! (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)"); /* Hardware owns the low 16 bits of features. */ #define NTB_BAR_SIZE_4K (1 << 0) #define NTB_SDOORBELL_LOCKUP (1 << 1) #define NTB_SB01BASE_LOCKUP (1 << 2) #define NTB_B2BDOORBELL_BIT14 (1 << 3) /* Software/configuration owns the top 16 bits. */ #define NTB_SPLIT_BAR (1ull << 16) #define NTB_FEATURES_STR \ "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \ "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K" static struct ntb_hw_info pci_ids[] = { /* XXX: PS/SS IDs left out until they are supported. */ { 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B", NTB_ATOM, 0 }, { 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 }, { 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 }, { 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K }, { 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP }, { 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP }, { 0x00000000, NULL, NTB_ATOM, 0 } }; static const struct ntb_reg atom_reg = { .ntb_ctl = ATOM_NTBCNTL_OFFSET, .lnk_sta = ATOM_LINK_STATUS_OFFSET, .db_size = sizeof(uint64_t), .mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 }, }; static const struct ntb_alt_reg atom_pri_reg = { .db_bell = ATOM_PDOORBELL_OFFSET, .db_mask = ATOM_PDBMSK_OFFSET, .spad = ATOM_SPAD_OFFSET, }; static const struct ntb_alt_reg atom_b2b_reg = { .db_bell = ATOM_B2B_DOORBELL_OFFSET, .spad = ATOM_B2B_SPAD_OFFSET, }; static const struct ntb_xlat_reg atom_sec_xlat = { #if 0 /* "FIXME" says the Linux driver. */ .bar0_base = ATOM_SBAR0BASE_OFFSET, .bar2_base = ATOM_SBAR2BASE_OFFSET, .bar4_base = ATOM_SBAR4BASE_OFFSET, .bar2_limit = ATOM_SBAR2LMT_OFFSET, .bar4_limit = ATOM_SBAR4LMT_OFFSET, #endif .bar2_xlat = ATOM_SBAR2XLAT_OFFSET, .bar4_xlat = ATOM_SBAR4XLAT_OFFSET, }; static const struct ntb_reg xeon_reg = { .ntb_ctl = XEON_NTBCNTL_OFFSET, .lnk_sta = XEON_LINK_STATUS_OFFSET, .db_size = sizeof(uint16_t), .mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 }, }; static const struct ntb_alt_reg xeon_pri_reg = { .db_bell = XEON_PDOORBELL_OFFSET, .db_mask = XEON_PDBMSK_OFFSET, .spad = XEON_SPAD_OFFSET, }; static const struct ntb_alt_reg xeon_b2b_reg = { .db_bell = XEON_B2B_DOORBELL_OFFSET, .spad = XEON_B2B_SPAD_OFFSET, }; static const struct ntb_xlat_reg xeon_sec_xlat = { .bar0_base = XEON_SBAR0BASE_OFFSET, .bar2_base = XEON_SBAR2BASE_OFFSET, .bar4_base = XEON_SBAR4BASE_OFFSET, .bar5_base = XEON_SBAR5BASE_OFFSET, .bar2_limit = XEON_SBAR2LMT_OFFSET, .bar4_limit = XEON_SBAR4LMT_OFFSET, .bar5_limit = XEON_SBAR5LMT_OFFSET, .bar2_xlat = XEON_SBAR2XLAT_OFFSET, .bar4_xlat = XEON_SBAR4XLAT_OFFSET, .bar5_xlat = XEON_SBAR5XLAT_OFFSET, }; static struct ntb_b2b_addr xeon_b2b_usd_addr = { .bar0_addr = XEON_B2B_BAR0_ADDR, .bar2_addr64 = XEON_B2B_BAR2_ADDR64, .bar4_addr64 = XEON_B2B_BAR4_ADDR64, .bar4_addr32 = XEON_B2B_BAR4_ADDR32, .bar5_addr32 = XEON_B2B_BAR5_ADDR32, }; static struct ntb_b2b_addr xeon_b2b_dsd_addr = { .bar0_addr = XEON_B2B_BAR0_ADDR, .bar2_addr64 = XEON_B2B_BAR2_ADDR64, .bar4_addr64 = XEON_B2B_BAR4_ADDR64, .bar4_addr32 = XEON_B2B_BAR4_ADDR32, .bar5_addr32 = XEON_B2B_BAR5_ADDR32, }; SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0, "B2B MW segment overrides -- MUST be the same on both sides"); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon " "hardware, use this 64-bit address on the bus between the NTB devices for " "the window at BAR2, on the upstream side of the link. MUST be the same " "address on both sides."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon " "hardware, use this 64-bit address on the bus between the NTB devices for " "the window at BAR2, on the downstream side of the link. MUST be the same" " address on both sides."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 " "(split-BAR mode)."); /* * OS <-> Driver interface structures */ MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations"); /* * OS <-> Driver linkage functions */ static int intel_ntb_probe(device_t device) { struct ntb_hw_info *p; p = intel_ntb_get_device_info(pci_get_devid(device)); if (p == NULL) return (ENXIO); device_set_desc(device, p->desc); return (0); } static int intel_ntb_attach(device_t device) { struct ntb_softc *ntb; struct ntb_hw_info *p; int error; ntb = device_get_softc(device); p = intel_ntb_get_device_info(pci_get_devid(device)); ntb->device = device; ntb->type = p->type; ntb->features = p->features; ntb->b2b_mw_idx = B2B_MW_DISABLED; ntb->msix_mw_idx = B2B_MW_DISABLED; /* Heartbeat timer for NTB_ATOM since there is no link interrupt */ callout_init(&ntb->heartbeat_timer, 1); callout_init(&ntb->lr_timer, 1); callout_init(&ntb->peer_msix_work, 1); mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN); if (ntb->type == NTB_ATOM) error = intel_ntb_detect_atom(ntb); else error = intel_ntb_detect_xeon(ntb); if (error != 0) goto out; intel_ntb_detect_max_mw(ntb); pci_enable_busmaster(ntb->device); error = intel_ntb_map_pci_bars(ntb); if (error != 0) goto out; if (ntb->type == NTB_ATOM) error = intel_ntb_atom_init_dev(ntb); else error = intel_ntb_xeon_init_dev(ntb); if (error != 0) goto out; intel_ntb_spad_clear(device); intel_ntb_poll_link(ntb); intel_ntb_sysctl_init(ntb); /* Attach children to this controller */ error = ntb_register_device(device); out: if (error != 0) intel_ntb_detach(device); return (error); } static int intel_ntb_detach(device_t device) { struct ntb_softc *ntb; ntb = device_get_softc(device); /* Detach & delete all children */ ntb_unregister_device(device); if (ntb->self_reg != NULL) { DB_MASK_LOCK(ntb); db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask); DB_MASK_UNLOCK(ntb); } callout_drain(&ntb->heartbeat_timer); callout_drain(&ntb->lr_timer); callout_drain(&ntb->peer_msix_work); pci_disable_busmaster(ntb->device); if (ntb->type == NTB_XEON) intel_ntb_teardown_xeon(ntb); intel_ntb_teardown_interrupts(ntb); mtx_destroy(&ntb->db_mask_lock); intel_ntb_unmap_pci_bar(ntb); return (0); } /* * Driver internal routines */ static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw) { KASSERT(mw < ntb->mw_count, ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count)); KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw")); return (ntb->reg->mw_bar[mw]); } static inline bool bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar) { /* XXX This assertion could be stronger. */ KASSERT(bar < NTB_MAX_BARS, ("bogus bar")); return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR)); } static inline void bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base, uint32_t *xlat, uint32_t *lmt) { uint32_t basev, lmtv, xlatv; switch (bar) { case NTB_B2B_BAR_1: basev = ntb->xlat_reg->bar2_base; lmtv = ntb->xlat_reg->bar2_limit; xlatv = ntb->xlat_reg->bar2_xlat; break; case NTB_B2B_BAR_2: basev = ntb->xlat_reg->bar4_base; lmtv = ntb->xlat_reg->bar4_limit; xlatv = ntb->xlat_reg->bar4_xlat; break; case NTB_B2B_BAR_3: basev = ntb->xlat_reg->bar5_base; lmtv = ntb->xlat_reg->bar5_limit; xlatv = ntb->xlat_reg->bar5_xlat; break; default: KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS, ("bad bar")); basev = lmtv = xlatv = 0; break; } if (base != NULL) *base = basev; if (xlat != NULL) *xlat = xlatv; if (lmt != NULL) *lmt = lmtv; } static int intel_ntb_map_pci_bars(struct ntb_softc *ntb) { int rc; ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0); rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET; if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR)) goto out; ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]); ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET; out: if (rc != 0) device_printf(ntb->device, "unable to allocate pci resource\n"); return (rc); } static void print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar, const char *kind) { device_printf(ntb->device, "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), (uintmax_t)bar->size, kind); } static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar) { bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY, &bar->pci_resource_id, RF_ACTIVE); if (bar->pci_resource == NULL) return (ENXIO); save_bar_parameters(bar); bar->map_mode = VM_MEMATTR_UNCACHEABLE; print_map_success(ntb, bar, "mmr"); return (0); } static int map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar) { int rc; vm_memattr_t mapmode; uint8_t bar_size_bits = 0; bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY, &bar->pci_resource_id, RF_ACTIVE); if (bar->pci_resource == NULL) return (ENXIO); save_bar_parameters(bar); /* * Ivytown NTB BAR sizes are misreported by the hardware due to a * hardware issue. To work around this, query the size it should be * configured to by the device and modify the resource to correspond to * this new size. The BIOS on systems with this problem is required to * provide enough address space to allow the driver to make this change * safely. * * Ideally I could have just specified the size when I allocated the * resource like: * bus_alloc_resource(ntb->device, * SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul, * 1ul << bar_size_bits, RF_ACTIVE); * but the PCI driver does not honor the size in this call, so we have * to modify it after the fact. */ if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) { if (bar->pci_resource_id == PCIR_BAR(2)) bar_size_bits = pci_read_config(ntb->device, XEON_PBAR23SZ_OFFSET, 1); else bar_size_bits = pci_read_config(ntb->device, XEON_PBAR45SZ_OFFSET, 1); rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY, bar->pci_resource, bar->pbase, bar->pbase + (1ul << bar_size_bits) - 1); if (rc != 0) { device_printf(ntb->device, "unable to resize bar\n"); return (rc); } save_bar_parameters(bar); } bar->map_mode = VM_MEMATTR_UNCACHEABLE; print_map_success(ntb, bar, "mw"); /* * Optionally, mark MW BARs as anything other than UC to improve * performance. */ mapmode = intel_ntb_pat_flags(); if (mapmode == bar->map_mode) return (0); rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode); if (rc == 0) { bar->map_mode = mapmode; device_printf(ntb->device, "Marked BAR%d v:[%p-%p] p:[%p-%p] as " "%s.\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), intel_ntb_vm_memattr_to_str(mapmode)); } else device_printf(ntb->device, "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as " "%s: %d\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), intel_ntb_vm_memattr_to_str(mapmode), rc); /* Proceed anyway */ return (0); } static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb) { struct ntb_pci_bar_info *current_bar; int i; for (i = 0; i < NTB_MAX_BARS; i++) { current_bar = &ntb->bar_info[i]; if (current_bar->pci_resource != NULL) bus_release_resource(ntb->device, SYS_RES_MEMORY, current_bar->pci_resource_id, current_bar->pci_resource); } } static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors) { uint32_t i; int rc; for (i = 0; i < num_vectors; i++) { ntb->int_info[i].rid = i + 1; ntb->int_info[i].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE); if (ntb->int_info[i].res == NULL) { device_printf(ntb->device, "bus_alloc_resource failed\n"); return (ENOMEM); } ntb->int_info[i].tag = NULL; ntb->allocated_interrupts++; rc = bus_setup_intr(ntb->device, ntb->int_info[i].res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr, &ntb->msix_vec[i], &ntb->int_info[i].tag); if (rc != 0) { device_printf(ntb->device, "bus_setup_intr failed\n"); return (ENXIO); } } return (0); } /* * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector * cannot be allocated for each MSI-X message. JHB seems to think remapping * should be okay. This tunable should enable us to test that hypothesis * when someone gets their hands on some Xeon hardware. */ static int ntb_force_remap_mode; SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN, &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped" " to a smaller number of ithreads, even if the desired number are " "available"); /* * In case it is NOT ok, give consumers an abort button. */ static int ntb_prefer_intx; SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN, &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather " "than remapping MSI-X messages over available slots (match Linux driver " "behavior)"); /* * Remap the desired number of MSI-X messages to available ithreads in a simple * round-robin fashion. */ static int intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail) { u_int *vectors; uint32_t i; int rc; if (ntb_prefer_intx != 0) return (ENXIO); vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK); for (i = 0; i < desired; i++) vectors[i] = (i % avail) + 1; rc = pci_remap_msix(dev, desired, vectors); free(vectors, M_NTB); return (rc); } static int intel_ntb_init_isr(struct ntb_softc *ntb) { uint32_t desired_vectors, num_vectors; int rc; ntb->allocated_interrupts = 0; ntb->last_ts = ticks; /* * Mask all doorbell interrupts. (Except link events!) */ DB_MASK_LOCK(ntb); ntb->db_mask = ntb->db_valid_mask; db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device), ntb->db_count); if (desired_vectors >= 1) { rc = pci_alloc_msix(ntb->device, &num_vectors); if (ntb_force_remap_mode != 0 && rc == 0 && num_vectors == desired_vectors) num_vectors--; if (rc == 0 && num_vectors < desired_vectors) { rc = intel_ntb_remap_msix(ntb->device, desired_vectors, num_vectors); if (rc == 0) num_vectors = desired_vectors; else pci_release_msi(ntb->device); } if (rc != 0) num_vectors = 1; } else num_vectors = 1; if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) { if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround does not support MSI or INTX\n"); return (EINVAL); } ntb->db_vec_count = 1; ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT; rc = intel_ntb_setup_legacy_interrupt(ntb); } else { if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS && HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround expects %d doorbell bits\n", XEON_NONLINK_DB_MSIX_BITS); return (EINVAL); } intel_ntb_create_msix_vec(ntb, num_vectors); rc = intel_ntb_setup_msix(ntb, num_vectors); } if (rc != 0) { device_printf(ntb->device, "Error allocating interrupts: %d\n", rc); intel_ntb_free_msix_vec(ntb); } return (rc); } static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb) { int rc; ntb->int_info[0].rid = 0; ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE); if (ntb->int_info[0].res == NULL) { device_printf(ntb->device, "bus_alloc_resource failed\n"); return (ENOMEM); } ntb->int_info[0].tag = NULL; ntb->allocated_interrupts = 1; rc = bus_setup_intr(ntb->device, ntb->int_info[0].res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr, ntb, &ntb->int_info[0].tag); if (rc != 0) { device_printf(ntb->device, "bus_setup_intr failed\n"); return (ENXIO); } return (0); } static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb) { struct ntb_int_info *current_int; int i; for (i = 0; i < ntb->allocated_interrupts; i++) { current_int = &ntb->int_info[i]; if (current_int->tag != NULL) bus_teardown_intr(ntb->device, current_int->res, current_int->tag); if (current_int->res != NULL) bus_release_resource(ntb->device, SYS_RES_IRQ, rman_get_rid(current_int->res), current_int->res); } intel_ntb_free_msix_vec(ntb); pci_release_msi(ntb->device); } /* * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon. Abstract it * out to make code clearer. */ static inline uint64_t db_ioread(struct ntb_softc *ntb, uint64_t regoff) { if (ntb->type == NTB_ATOM) return (intel_ntb_reg_read(8, regoff)); KASSERT(ntb->type == NTB_XEON, ("bad ntb type")); return (intel_ntb_reg_read(2, regoff)); } static inline void db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val) { KASSERT((val & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(val & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); if (regoff == ntb->self_reg->db_mask) DB_MASK_ASSERT(ntb, MA_OWNED); db_iowrite_raw(ntb, regoff, val); } static inline void db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val) { if (ntb->type == NTB_ATOM) { intel_ntb_reg_write(8, regoff, val); return; } KASSERT(ntb->type == NTB_XEON, ("bad ntb type")); intel_ntb_reg_write(2, regoff, (uint16_t)val); } static void intel_ntb_db_set_mask(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); DB_MASK_LOCK(ntb); ntb->db_mask |= bits; if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); } static void intel_ntb_db_clear_mask(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); uint64_t ibits; int i; KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); DB_MASK_LOCK(ntb); ibits = ntb->fake_db_bell & ntb->db_mask & bits; ntb->db_mask &= ~bits; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { /* Simulate fake interrupts if unmasked DB bits are set. */ for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0) swi_sched(ntb->int_info[i].tag, 0); } } else { db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); } DB_MASK_UNLOCK(ntb); } static uint64_t intel_ntb_db_read(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) return (ntb->fake_db_bell); return (db_ioread(ntb, ntb->self_reg->db_bell)); } static void intel_ntb_db_clear(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { DB_MASK_LOCK(ntb); ntb->fake_db_bell &= ~bits; DB_MASK_UNLOCK(ntb); return; } db_iowrite(ntb, ntb->self_reg->db_bell, bits); } static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector) { uint64_t shift, mask; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { /* * Remap vectors in custom way to make at least first * three doorbells to not generate stray events. * This breaks Linux compatibility (if one existed) * when more then one DB is used (not by if_ntb). */ if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1) return (1 << db_vector); if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1) return (0x7ffc); } shift = ntb->db_vec_shift; mask = (1ull << shift) - 1; return (mask << (shift * db_vector)); } static void intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec) { uint64_t vec_mask; ntb->last_ts = ticks; vec_mask = intel_ntb_vec_mask(ntb, vec); if ((vec_mask & ntb->db_link_mask) != 0) { if (intel_ntb_poll_link(ntb)) ntb_link_event(ntb->device); } if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) && (vec_mask & ntb->db_link_mask) == 0) { DB_MASK_LOCK(ntb); /* Do not report same DB events again if not cleared yet. */ vec_mask &= ~ntb->fake_db_bell; /* Update our internal doorbell register. */ ntb->fake_db_bell |= vec_mask; /* Do not report masked DB events. */ vec_mask &= ~ntb->db_mask; DB_MASK_UNLOCK(ntb); } if ((vec_mask & ntb->db_valid_mask) != 0) ntb_db_event(ntb->device, vec); } static void ndev_vec_isr(void *arg) { struct ntb_vec *nvec = arg; intel_ntb_interrupt(nvec->ntb, nvec->num); } static void ndev_irq_isr(void *arg) { /* If we couldn't set up MSI-X, we only have the one vector. */ intel_ntb_interrupt(arg, 0); } static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors) { uint32_t i; ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB, M_ZERO | M_WAITOK); for (i = 0; i < num_vectors; i++) { ntb->msix_vec[i].num = i; ntb->msix_vec[i].ntb = ntb; } return (0); } static void intel_ntb_free_msix_vec(struct ntb_softc *ntb) { if (ntb->msix_vec == NULL) return; free(ntb->msix_vec, M_NTB); ntb->msix_vec = NULL; } static void intel_ntb_get_msix_info(struct ntb_softc *ntb) { struct pci_devinfo *dinfo; struct pcicfg_msix *msix; uint32_t laddr, data, i, offset; dinfo = device_get_ivars(ntb->device); msix = &dinfo->cfg.msix; CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data)); for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE; laddr = bus_read_4(msix->msix_table_res, offset + PCI_MSIX_ENTRY_LOWER_ADDR); intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr); KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE, ("local MSIX addr 0x%x not in MSI base 0x%x", laddr, MSI_INTEL_ADDR_BASE)); ntb->msix_data[i].nmd_ofs = laddr; data = bus_read_4(msix->msix_table_res, offset + PCI_MSIX_ENTRY_DATA); intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data); ntb->msix_data[i].nmd_data = data; } } static struct ntb_hw_info * intel_ntb_get_device_info(uint32_t device_id) { struct ntb_hw_info *ep = pci_ids; while (ep->device_id) { if (ep->device_id == device_id) return (ep); ++ep; } return (NULL); } static void intel_ntb_teardown_xeon(struct ntb_softc *ntb) { if (ntb->reg != NULL) intel_ntb_link_disable(ntb->device); } static void intel_ntb_detect_max_mw(struct ntb_softc *ntb) { if (ntb->type == NTB_ATOM) { ntb->mw_count = ATOM_MW_COUNT; return; } if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT; else ntb->mw_count = XEON_SNB_MW_COUNT; } static int intel_ntb_detect_xeon(struct ntb_softc *ntb) { uint8_t ppd, conn_type; ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1); ntb->ppd = ppd; if ((ppd & XEON_PPD_DEV_TYPE) != 0) ntb->dev_type = NTB_DEV_DSD; else ntb->dev_type = NTB_DEV_USD; if ((ppd & XEON_PPD_SPLIT_BAR) != 0) ntb->features |= NTB_SPLIT_BAR; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { device_printf(ntb->device, "Can not apply SB01BASE_LOCKUP workaround " "with split BARs disabled!\n"); device_printf(ntb->device, "Expect system hangs under heavy NTB traffic!\n"); ntb->features &= ~NTB_SB01BASE_LOCKUP; } /* * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP * errata workaround; only do one at a time. */ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) ntb->features &= ~NTB_SDOORBELL_LOCKUP; conn_type = ppd & XEON_PPD_CONN_TYPE; switch (conn_type) { case NTB_CONN_B2B: ntb->conn_type = conn_type; break; case NTB_CONN_RP: case NTB_CONN_TRANSPARENT: default: device_printf(ntb->device, "Unsupported connection type: %u\n", (unsigned)conn_type); return (ENXIO); } return (0); } static int intel_ntb_detect_atom(struct ntb_softc *ntb) { uint32_t ppd, conn_type; ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); ntb->ppd = ppd; if ((ppd & ATOM_PPD_DEV_TYPE) != 0) ntb->dev_type = NTB_DEV_DSD; else ntb->dev_type = NTB_DEV_USD; conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8; switch (conn_type) { case NTB_CONN_B2B: ntb->conn_type = conn_type; break; default: device_printf(ntb->device, "Unsupported NTB configuration\n"); return (ENXIO); } return (0); } static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb) { int rc; ntb->spad_count = XEON_SPAD_COUNT; ntb->db_count = XEON_DB_COUNT; ntb->db_link_mask = XEON_DB_LINK_BIT; ntb->db_vec_count = XEON_DB_MSIX_VECTOR_COUNT; ntb->db_vec_shift = XEON_DB_MSIX_VECTOR_SHIFT; if (ntb->conn_type != NTB_CONN_B2B) { device_printf(ntb->device, "Connection type %d not supported\n", ntb->conn_type); return (ENXIO); } ntb->reg = &xeon_reg; ntb->self_reg = &xeon_pri_reg; ntb->peer_reg = &xeon_b2b_reg; ntb->xlat_reg = &xeon_sec_xlat; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { ntb->fake_db_bell = 0; ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) % ntb->mw_count; intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n", g_ntb_msix_idx, ntb->msix_mw_idx); rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); } else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { /* * There is a Xeon hardware errata related to writes to SDOORBELL or * B2BDOORBELL in conjunction with inbound access to NTB MMIO space, * which may hang the system. To workaround this, use a memory * window to access the interrupt and scratch pad registers on the * remote system. */ ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) % ntb->mw_count; intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n", g_ntb_mw_idx, ntb->b2b_mw_idx); rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); } else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14)) /* * HW Errata on bit 14 of b2bdoorbell register. Writes will not be * mirrored to the remote system. Shrink the number of bits by one, * since bit 14 is the last bit. * * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register * anyway. Nor for non-B2B connection types. */ ntb->db_count = XEON_DB_COUNT - 1; ntb->db_valid_mask = (1ull << ntb->db_count) - 1; if (ntb->dev_type == NTB_DEV_USD) rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr, &xeon_b2b_usd_addr); else rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr, &xeon_b2b_dsd_addr); if (rc != 0) return (rc); /* Enable Bus Master and Memory Space on the secondary side */ intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET, PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); /* * Mask all doorbell interrupts. */ DB_MASK_LOCK(ntb); ntb->db_mask = ntb->db_valid_mask; db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); rc = intel_ntb_init_isr(ntb); return (rc); } static int intel_ntb_atom_init_dev(struct ntb_softc *ntb) { int error; KASSERT(ntb->conn_type == NTB_CONN_B2B, ("Unsupported NTB configuration (%d)\n", ntb->conn_type)); ntb->spad_count = ATOM_SPAD_COUNT; ntb->db_count = ATOM_DB_COUNT; ntb->db_vec_count = ATOM_DB_MSIX_VECTOR_COUNT; ntb->db_vec_shift = ATOM_DB_MSIX_VECTOR_SHIFT; ntb->db_valid_mask = (1ull << ntb->db_count) - 1; ntb->reg = &atom_reg; ntb->self_reg = &atom_pri_reg; ntb->peer_reg = &atom_b2b_reg; ntb->xlat_reg = &atom_sec_xlat; /* * FIXME - MSI-X bug on early Atom HW, remove once internal issue is * resolved. Mask transaction layer internal parity errors. */ pci_write_config(ntb->device, 0xFC, 0x4, 4); configure_atom_secondary_side_bars(ntb); /* Enable Bus Master and Memory Space on the secondary side */ intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET, PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); error = intel_ntb_init_isr(ntb); if (error != 0) return (error); /* Initiate PCI-E link training */ intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb); return (0); } /* XXX: Linux driver doesn't seem to do any of this for Atom. */ static void configure_atom_secondary_side_bars(struct ntb_softc *ntb) { if (ntb->dev_type == NTB_DEV_USD) { intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET, XEON_B2B_BAR4_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64); } else { intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET, XEON_B2B_BAR4_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64); } } /* * When working around Xeon SDOORBELL errata by remapping remote registers in a * MW, limit the B2B MW to half a MW. By sharing a MW, half the shared MW * remains for use by a higher layer. * * Will only be used if working around SDOORBELL errata and the BIOS-configured * MW size is sufficiently large. */ static unsigned int ntb_b2b_mw_share; SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share, 0, "If enabled (non-zero), prefer to share half of the B2B peer register " "MW with higher level consumers. Both sides of the NTB MUST set the same " "value here."); static void xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx, enum ntb_bar regbar) { struct ntb_pci_bar_info *bar; uint8_t bar_sz; if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3) return; bar = &ntb->bar_info[idx]; bar_sz = pci_read_config(ntb->device, bar->psz_off, 1); if (idx == regbar) { if (ntb->b2b_off != 0) bar_sz--; else bar_sz = 0; } pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1); bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1); (void)bar_sz; } static void xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr, enum ntb_bar idx, enum ntb_bar regbar) { uint64_t reg_val; uint32_t base_reg, lmt_reg; bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg); if (idx == regbar) { if (ntb->b2b_off) bar_addr += ntb->b2b_off; else bar_addr = 0; } if (!bar_is_64bit(ntb, idx)) { intel_ntb_reg_write(4, base_reg, bar_addr); reg_val = intel_ntb_reg_read(4, base_reg); (void)reg_val; intel_ntb_reg_write(4, lmt_reg, bar_addr); reg_val = intel_ntb_reg_read(4, lmt_reg); (void)reg_val; } else { intel_ntb_reg_write(8, base_reg, bar_addr); reg_val = intel_ntb_reg_read(8, base_reg); (void)reg_val; intel_ntb_reg_write(8, lmt_reg, bar_addr); reg_val = intel_ntb_reg_read(8, lmt_reg); (void)reg_val; } } static void xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx) { struct ntb_pci_bar_info *bar; bar = &ntb->bar_info[idx]; if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) { intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr); base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off); } else { intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr); base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off); } (void)base_addr; } static int xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr) { struct ntb_pci_bar_info *b2b_bar; vm_size_t bar_size; uint64_t bar_addr; enum ntb_bar b2b_bar_num, i; if (ntb->b2b_mw_idx == B2B_MW_DISABLED) { b2b_bar = NULL; b2b_bar_num = NTB_CONFIG_BAR; ntb->b2b_off = 0; } else { b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx); KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS, ("invalid b2b mw bar")); b2b_bar = &ntb->bar_info[b2b_bar_num]; bar_size = b2b_bar->size; if (ntb_b2b_mw_share != 0 && (bar_size >> 1) >= XEON_B2B_MIN_SIZE) ntb->b2b_off = bar_size >> 1; else if (bar_size >= XEON_B2B_MIN_SIZE) { ntb->b2b_off = 0; } else { device_printf(ntb->device, "B2B bar size is too small!\n"); return (EIO); } } /* * Reset the secondary bar sizes to match the primary bar sizes. * (Except, disable or halve the size of the B2B secondary bar.) */ for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++) xeon_reset_sbar_size(ntb, i, b2b_bar_num); bar_addr = 0; if (b2b_bar_num == NTB_CONFIG_BAR) bar_addr = addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = addr->bar2_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = addr->bar4_addr32; else if (b2b_bar_num == NTB_B2B_BAR_3) bar_addr = addr->bar5_addr32; else KASSERT(false, ("invalid bar")); intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr); /* * Other SBARs are normally hit by the PBAR xlat, except for the b2b * register BAR. The B2B BAR is either disabled above or configured * half-size. It starts at PBAR xlat + offset. * * Also set up incoming BAR limits == base (zero length window). */ xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1, b2b_bar_num); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32, NTB_B2B_BAR_2, b2b_bar_num); xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32, NTB_B2B_BAR_3, b2b_bar_num); } else xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64, NTB_B2B_BAR_2, b2b_bar_num); /* Zero incoming translation addrs */ intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0); intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { uint32_t xlat_reg, lmt_reg; enum ntb_bar bar_num; /* * We point the chosen MSIX MW BAR xlat to remote LAPIC for * workaround */ bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx); bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg); if (bar_is_64bit(ntb, bar_num)) { intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE); ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg); intel_ntb_reg_write(8, lmt_reg, 0); } else { intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE); ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg); intel_ntb_reg_write(4, lmt_reg, 0); } ntb->peer_lapic_bar = &ntb->bar_info[bar_num]; } (void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET); (void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET); /* Zero outgoing translation limits (whole bar size windows) */ intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0); intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0); /* Set outgoing translation offsets */ xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2); xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3); } else xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2); /* Set the translation offset for B2B registers */ bar_addr = 0; if (b2b_bar_num == NTB_CONFIG_BAR) bar_addr = peer_addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = peer_addr->bar2_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = peer_addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = peer_addr->bar4_addr32; else if (b2b_bar_num == NTB_B2B_BAR_3) bar_addr = peer_addr->bar5_addr32; else KASSERT(false, ("invalid bar")); /* * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits * at a time. */ intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff); intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32); return (0); } static inline bool _xeon_link_is_up(struct ntb_softc *ntb) { if (ntb->conn_type == NTB_CONN_TRANSPARENT) return (true); return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0); } static inline bool link_is_up(struct ntb_softc *ntb) { if (ntb->type == NTB_XEON) return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good || !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))); KASSERT(ntb->type == NTB_ATOM, ("ntb type")); return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0); } static inline bool atom_link_is_err(struct ntb_softc *ntb) { uint32_t status; KASSERT(ntb->type == NTB_ATOM, ("ntb type")); status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET); if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0) return (true); status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET); return ((status & ATOM_IBIST_ERR_OFLOW) != 0); } /* Atom does not have link status interrupt, poll on that platform */ static void atom_link_hb(void *arg) { struct ntb_softc *ntb = arg; sbintime_t timo, poll_ts; timo = NTB_HB_TIMEOUT * hz; poll_ts = ntb->last_ts + timo; /* * Delay polling the link status if an interrupt was received, unless * the cached link status says the link is down. */ if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) { timo = poll_ts - ticks; goto out; } if (intel_ntb_poll_link(ntb)) ntb_link_event(ntb->device); if (!link_is_up(ntb) && atom_link_is_err(ntb)) { /* Link is down with error, proceed with recovery */ callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb); return; } out: callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb); } static void atom_perform_link_restart(struct ntb_softc *ntb) { uint32_t status; /* Driver resets the NTB ModPhy lanes - magic! */ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60); /* Driver waits 100ms to allow the NTB ModPhy to settle */ pause("ModPhy", hz / 10); /* Clear AER Errors, write to clear */ status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET); status &= PCIM_AER_COR_REPLAY_ROLLOVER; intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status); /* Clear unexpected electrical idle event in LTSSM, write to clear */ status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET); status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI; intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status); /* Clear DeSkew Buffer error, write to clear */ status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET); status |= ATOM_DESKEWSTS_DBERR; intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status); status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET); status &= ATOM_IBIST_ERR_OFLOW; intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status); /* Releases the NTB state machine to allow the link to retrain */ status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET); status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT; intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status); } static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused, enum ntb_width width __unused) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; intel_ntb_printf(2, "%s\n", __func__); if (ntb->type == NTB_ATOM) { pci_write_config(ntb->device, NTB_PPD_OFFSET, ntb->ppd | ATOM_PPD_INIT_LINK, 4); return (0); } if (ntb->conn_type == NTB_CONN_TRANSPARENT) { ntb_link_event(dev); return (0); } cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK); cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP; cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP; if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP; intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } static int intel_ntb_link_disable(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; intel_ntb_printf(2, "%s\n", __func__); if (ntb->conn_type == NTB_CONN_TRANSPARENT) { ntb_link_event(dev); return (0); } cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP); cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP); cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK; intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } static bool intel_ntb_link_enabled(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; if (ntb->type == NTB_ATOM) { cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); return ((cntl & ATOM_PPD_INIT_LINK) != 0); } if (ntb->conn_type == NTB_CONN_TRANSPARENT) return (true); cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); return ((cntl & NTB_CNTL_LINK_DISABLE) == 0); } static void recover_atom_link(void *arg) { struct ntb_softc *ntb = arg; unsigned speed, width, oldspeed, oldwidth; uint32_t status32; atom_perform_link_restart(ntb); /* * There is a potential race between the 2 NTB devices recovering at * the same time. If the times are the same, the link will not recover * and the driver will be stuck in this loop forever. Add a random * interval to the recovery time to prevent this race. */ status32 = arc4random() % ATOM_LINK_RECOVERY_TIME; pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000); if (atom_link_is_err(ntb)) goto retry; status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); if ((status32 & ATOM_CNTL_LINK_DOWN) != 0) goto out; status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta); width = NTB_LNK_STA_WIDTH(status32); speed = status32 & NTB_LINK_SPEED_MASK; oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta); oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK; if (oldwidth != width || oldspeed != speed) goto retry; out: callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb, ntb); return; retry: callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link, ntb); } /* * Polls the HW link status register(s); returns true if something has changed. */ static bool intel_ntb_poll_link(struct ntb_softc *ntb) { uint32_t ntb_cntl; uint16_t reg_val; if (ntb->type == NTB_ATOM) { ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); if (ntb_cntl == ntb->ntb_ctl) return (false); ntb->ntb_ctl = ntb_cntl; ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta); } else { db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask); reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2); if (reg_val == ntb->lnk_sta) return (false); ntb->lnk_sta = reg_val; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { if (_xeon_link_is_up(ntb)) { if (!ntb->peer_msix_good) { callout_reset(&ntb->peer_msix_work, 0, intel_ntb_exchange_msix, ntb); return (false); } } else { ntb->peer_msix_good = false; ntb->peer_msix_done = false; } } } return (true); } static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *ntb) { if (!link_is_up(ntb)) return (NTB_SPEED_NONE); return (ntb->lnk_sta & NTB_LINK_SPEED_MASK); } static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *ntb) { if (!link_is_up(ntb)) return (NTB_WIDTH_NONE); return (NTB_LNK_STA_WIDTH(ntb->lnk_sta)); } SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0, "Driver state, statistics, and HW registers"); #define NTB_REGSZ_MASK (3ul << 30) #define NTB_REG_64 (1ul << 30) #define NTB_REG_32 (2ul << 30) #define NTB_REG_16 (3ul << 30) #define NTB_REG_8 (0ul << 30) #define NTB_DB_READ (1ul << 29) #define NTB_PCI_REG (1ul << 28) #define NTB_REGFLAGS_MASK (NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG) static void intel_ntb_sysctl_init(struct ntb_softc *ntb) { struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree, *tmptree; ctx = device_get_sysctl_ctx(ntb->device); globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status", CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status_human, "A", "Link status (human readable)"); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active", CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status, "IU", "Link status (1=active, 0=inactive)"); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up", CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin, "IU", "Set/get interface status (1=UP, 0=DOWN)"); tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info", CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers"); tree_par = SYSCTL_CHILDREN(tree); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD, &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD, &ntb->dev_type, 0, "0 - USD; 1 - DSD"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD, &ntb->ppd, 0, "Raw PPD register (cached)"); if (ntb->b2b_mw_idx != B2B_MW_DISABLED) { SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD, &ntb->b2b_mw_idx, 0, "Index of the MW used for B2B remote register access"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off", CTLFLAG_RD, &ntb->b2b_off, "If non-zero, offset of B2B register region in shared MW"); } SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features", CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A", "Features/errata of this NTB device"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD, __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0, "NTB CTL register (cached)"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD, __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0, "LNK STA register (cached)"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD, &ntb->mw_count, 0, "MW count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD, &ntb->spad_count, 0, "Scratchpad count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD, &ntb->db_count, 0, "Doorbell count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD, &ntb->db_vec_count, 0, "Doorbell vector count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD, &ntb->db_vec_shift, 0, "Doorbell vector shift"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD, &ntb->db_valid_mask, "Doorbell valid mask"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD, &ntb->db_link_mask, "Doorbell link mask"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD, &ntb->db_mask, "Doorbell mask (cached)"); tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers", CTLFLAG_RD, NULL, "Raw HW registers (big-endian)"); regpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU", "NTB Control register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | 0x19c, sysctl_handle_register, "IU", "NTB Link Capabilities"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU", "NTB Link Control register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask, sysctl_handle_register, "QU", "Doorbell mask register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell, sysctl_handle_register, "QU", "Doorbell register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_xlat, sysctl_handle_register, "QU", "Incoming XLAT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_xlat, sysctl_handle_register, "IU", "Incoming XLAT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_xlat, sysctl_handle_register, "IU", "Incoming XLAT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_xlat, sysctl_handle_register, "QU", "Incoming XLAT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_limit, sysctl_handle_register, "QU", "Incoming LMT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_limit, sysctl_handle_register, "IU", "Incoming LMT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_limit, sysctl_handle_register, "IU", "Incoming LMT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_limit, sysctl_handle_register, "QU", "Incoming LMT45 register"); } if (ntb->type == NTB_ATOM) return; tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats", CTLFLAG_RD, NULL, "Xeon HW statistics"); statpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | XEON_USMEMMISS_OFFSET, sysctl_handle_register, "SU", "Upstream Memory Miss"); tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err", CTLFLAG_RD, NULL, "Xeon HW errors"); errpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET, sysctl_handle_register, "CU", "PPD"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET, sysctl_handle_register, "CU", "PBAR23 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET, sysctl_handle_register, "CU", "PBAR4 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET, sysctl_handle_register, "CU", "PBAR5 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET, sysctl_handle_register, "CU", "SBAR23 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET, sysctl_handle_register, "CU", "SBAR4 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET, sysctl_handle_register, "CU", "SBAR5 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET, sysctl_handle_register, "SU", "DEVSTS"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET, sysctl_handle_register, "SU", "LNKSTS"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET, sysctl_handle_register, "SU", "SLNKSTS"); SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET, sysctl_handle_register, "IU", "UNCERRSTS"); SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET, sysctl_handle_register, "IU", "CORERRSTS"); if (ntb->conn_type != NTB_CONN_B2B) return; SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off, sysctl_handle_register, "QU", "Outgoing XLAT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off, sysctl_handle_register, "IU", "Outgoing XLAT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off, sysctl_handle_register, "IU", "Outgoing XLAT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off, sysctl_handle_register, "QU", "Outgoing XLAT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | XEON_PBAR2LMT_OFFSET, sysctl_handle_register, "QU", "Outgoing LMT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | XEON_PBAR4LMT_OFFSET, sysctl_handle_register, "IU", "Outgoing LMT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | XEON_PBAR5LMT_OFFSET, sysctl_handle_register, "IU", "Outgoing LMT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | XEON_PBAR4LMT_OFFSET, sysctl_handle_register, "QU", "Outgoing LMT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar0_base, sysctl_handle_register, "QU", "Secondary BAR01 base register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_base, sysctl_handle_register, "QU", "Secondary BAR23 base register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_base, sysctl_handle_register, "IU", "Secondary BAR4 base register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_base, sysctl_handle_register, "IU", "Secondary BAR5 base register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_base, sysctl_handle_register, "QU", "Secondary BAR45 base register"); } } static int sysctl_handle_features(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; struct sbuf sb; int error; sbuf_new_for_sysctl(&sb, NULL, 256, req); sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR); error = sbuf_finish(&sb); sbuf_delete(&sb); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; unsigned old, new; int error; old = intel_ntb_link_enabled(ntb->device); error = SYSCTL_OUT(req, &old, sizeof(old)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &new, sizeof(new)); if (error != 0) return (error); intel_ntb_printf(0, "Admin set interface state to '%sabled'\n", (new != 0)? "en" : "dis"); if (new != 0) error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); else error = intel_ntb_link_disable(ntb->device); return (error); } static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; struct sbuf sb; enum ntb_speed speed; enum ntb_width width; int error; sbuf_new_for_sysctl(&sb, NULL, 32, req); if (intel_ntb_link_is_up(ntb->device, &speed, &width)) sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u", (unsigned)speed, (unsigned)width); else sbuf_printf(&sb, "down"); error = sbuf_finish(&sb); sbuf_delete(&sb); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; unsigned res; int error; res = intel_ntb_link_is_up(ntb->device, NULL, NULL); error = SYSCTL_OUT(req, &res, sizeof(res)); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_register(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb; const void *outp; uintptr_t sz; uint64_t umv; char be[sizeof(umv)]; size_t outsz; uint32_t reg; bool db, pci; int error; ntb = arg1; reg = arg2 & ~NTB_REGFLAGS_MASK; sz = arg2 & NTB_REGSZ_MASK; db = (arg2 & NTB_DB_READ) != 0; pci = (arg2 & NTB_PCI_REG) != 0; KASSERT(!(db && pci), ("bogus")); if (db) { KASSERT(sz == NTB_REG_64, ("bogus")); umv = db_ioread(ntb, reg); outsz = sizeof(uint64_t); } else { switch (sz) { case NTB_REG_64: if (pci) umv = pci_read_config(ntb->device, reg, 8); else umv = intel_ntb_reg_read(8, reg); outsz = sizeof(uint64_t); break; case NTB_REG_32: if (pci) umv = pci_read_config(ntb->device, reg, 4); else umv = intel_ntb_reg_read(4, reg); outsz = sizeof(uint32_t); break; case NTB_REG_16: if (pci) umv = pci_read_config(ntb->device, reg, 2); else umv = intel_ntb_reg_read(2, reg); outsz = sizeof(uint16_t); break; case NTB_REG_8: if (pci) umv = pci_read_config(ntb->device, reg, 1); else umv = intel_ntb_reg_read(1, reg); outsz = sizeof(uint8_t); break; default: panic("bogus"); break; } } /* Encode bigendian so that sysctl -x is legible. */ be64enc(be, umv); outp = ((char *)be) + sizeof(umv) - outsz; error = SYSCTL_OUT(req, outp, outsz); if (error || !req->newptr) return (error); return (EINVAL); } static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx) { if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 && uidx >= ntb->b2b_mw_idx) || (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx)) uidx++; if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 && uidx >= ntb->b2b_mw_idx) && (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx)) uidx++; return (uidx); } +#ifndef EARLY_AP_STARTUP +static int msix_ready; + static void +intel_ntb_msix_ready(void *arg __unused) +{ + + msix_ready = 1; +} +SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY, + intel_ntb_msix_ready, NULL); +#endif + +static void intel_ntb_exchange_msix(void *ctx) { struct ntb_softc *ntb; uint32_t val; unsigned i; ntb = ctx; if (ntb->peer_msix_good) goto msix_good; if (ntb->peer_msix_done) goto msix_done; + +#ifndef EARLY_AP_STARTUP + /* Block MSIX negotiation until SMP started and IRQ reshuffled. */ + if (!msix_ready) + goto reschedule; +#endif intel_ntb_get_msix_info(ntb); for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i, ntb->msix_data[i].nmd_data); intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i, ntb->msix_data[i].nmd_ofs - ntb->msix_xlat); } intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD); intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val); if (val != NTB_MSIX_VER_GUARD) goto reschedule; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val); intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_data = val; intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val); intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_ofs = val; } ntb->peer_msix_done = true; msix_done: intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED); intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val); if (val != NTB_MSIX_RECEIVED) goto reschedule; intel_ntb_spad_clear(ntb->device); ntb->peer_msix_good = true; /* Give peer time to see our NTB_MSIX_RECEIVED. */ goto reschedule; msix_good: intel_ntb_poll_link(ntb); ntb_link_event(ntb->device); return; reschedule: ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2); if (_xeon_link_is_up(ntb)) { callout_reset(&ntb->peer_msix_work, hz * (ntb->peer_msix_good ? 2 : 1) / 100, intel_ntb_exchange_msix, ntb); } else intel_ntb_spad_clear(ntb->device); } /* * Public API to the rest of the OS */ static uint8_t intel_ntb_spad_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->spad_count); } static uint8_t intel_ntb_mw_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint8_t res; res = ntb->mw_count; if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0) res--; if (ntb->msix_mw_idx != B2B_MW_DISABLED) res--; return (res); } static int intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val); return (0); } /* * Zeros the local scratchpad. */ static void intel_ntb_spad_clear(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); unsigned i; for (i = 0; i < ntb->spad_count; i++) intel_ntb_spad_write(dev, i, 0); } static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); *val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4); return (0); } static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val); else intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val); return (0); } static int intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) *val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4); else *val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4); return (0); } static int intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; bus_addr_t limit; size_t bar_b2b_off; enum ntb_bar bar_num; if (mw_idx >= intel_ntb_mw_count(dev)) return (EINVAL); mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx); bar_num = intel_ntb_mw_to_bar(ntb, mw_idx); bar = &ntb->bar_info[bar_num]; bar_b2b_off = 0; if (mw_idx == ntb->b2b_mw_idx) { KASSERT(ntb->b2b_off != 0, ("user shouldn't get non-shared b2b mw")); bar_b2b_off = ntb->b2b_off; } if (bar_is_64bit(ntb, bar_num)) limit = BUS_SPACE_MAXADDR; else limit = BUS_SPACE_MAXADDR_32BIT; if (base != NULL) *base = bar->pbase + bar_b2b_off; if (vbase != NULL) *vbase = bar->vbase + bar_b2b_off; if (size != NULL) *size = bar->size - bar_b2b_off; if (align != NULL) *align = bar->size; if (align_size != NULL) *align_size = 1; if (plimit != NULL) *plimit = limit; return (0); } static int intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t base, limit, reg_val; size_t bar_size, mw_size; uint32_t base_reg, xlat_reg, limit_reg; enum ntb_bar bar_num; if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); bar_num = intel_ntb_mw_to_bar(ntb, idx); bar = &ntb->bar_info[bar_num]; bar_size = bar->size; if (idx == ntb->b2b_mw_idx) mw_size = bar_size - ntb->b2b_off; else mw_size = bar_size; /* Hardware requires that addr is aligned to bar size */ if ((addr & (bar_size - 1)) != 0) return (EINVAL); if (size > mw_size) return (EINVAL); bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg); limit = 0; if (bar_is_64bit(ntb, bar_num)) { base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK; if (limit_reg != 0 && size != mw_size) limit = base + size; /* Set and verify translation address */ intel_ntb_reg_write(8, xlat_reg, addr); reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK; if (reg_val != addr) { intel_ntb_reg_write(8, xlat_reg, 0); return (EIO); } /* Set and verify the limit */ intel_ntb_reg_write(8, limit_reg, limit); reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK; if (reg_val != limit) { intel_ntb_reg_write(8, limit_reg, base); intel_ntb_reg_write(8, xlat_reg, 0); return (EIO); } } else { /* Configure 32-bit (split) BAR MW */ if ((addr & UINT32_MAX) != addr) return (ERANGE); if (((addr + size) & UINT32_MAX) != (addr + size)) return (ERANGE); base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK; if (limit_reg != 0 && size != mw_size) limit = base + size; /* Set and verify translation address */ intel_ntb_reg_write(4, xlat_reg, addr); reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK; if (reg_val != addr) { intel_ntb_reg_write(4, xlat_reg, 0); return (EIO); } /* Set and verify the limit */ intel_ntb_reg_write(4, limit_reg, limit); reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK; if (reg_val != limit) { intel_ntb_reg_write(4, limit_reg, base); intel_ntb_reg_write(4, xlat_reg, 0); return (EIO); } } return (0); } static int intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx) { return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0)); } static int intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)]; *mode = bar->map_mode; return (0); } static int intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); return (intel_ntb_mw_set_wc_internal(ntb, idx, mode)); } static int intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode) { struct ntb_pci_bar_info *bar; int rc; bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)]; if (bar->map_mode == mode) return (0); rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode); if (rc == 0) bar->map_mode = mode; return (rc); } static void intel_ntb_peer_db_set(device_t dev, uint64_t bit) { struct ntb_softc *ntb = device_get_softc(dev); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { struct ntb_pci_bar_info *lapic; unsigned i; lapic = ntb->peer_lapic_bar; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0) bus_space_write_4(lapic->pci_bus_tag, lapic->pci_bus_handle, ntb->peer_msix_data[i].nmd_ofs, ntb->peer_msix_data[i].nmd_data); } return; } if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit); return; } db_iowrite(ntb, ntb->peer_reg->db_bell, bit); } static int intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t regoff; KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL")); if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { bar = &ntb->bar_info[NTB_CONFIG_BAR]; regoff = ntb->peer_reg->db_bell; } else { KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED, ("invalid b2b idx")); bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)]; regoff = XEON_PDOORBELL_OFFSET; } KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh")); /* HACK: Specific to current x86 bus implementation. */ *db_addr = ((uint64_t)bar->pci_bus_handle + regoff); *db_size = ntb->reg->db_size; return (0); } static uint64_t intel_ntb_db_valid_mask(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->db_valid_mask); } static int intel_ntb_db_vector_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->db_vec_count); } static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector) { struct ntb_softc *ntb = device_get_softc(dev); if (vector > ntb->db_vec_count) return (0); return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector)); } static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width) { struct ntb_softc *ntb = device_get_softc(dev); if (speed != NULL) *speed = intel_ntb_link_sta_speed(ntb); if (width != NULL) *width = intel_ntb_link_sta_width(ntb); return (link_is_up(ntb)); } static void save_bar_parameters(struct ntb_pci_bar_info *bar) { bar->pci_bus_tag = rman_get_bustag(bar->pci_resource); bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource); bar->pbase = rman_get_start(bar->pci_resource); bar->size = rman_get_size(bar->pci_resource); bar->vbase = rman_get_virtual(bar->pci_resource); } static device_method_t ntb_intel_methods[] = { /* Device interface */ DEVMETHOD(device_probe, intel_ntb_probe), DEVMETHOD(device_attach, intel_ntb_attach), DEVMETHOD(device_detach, intel_ntb_detach), /* NTB interface */ DEVMETHOD(ntb_link_is_up, intel_ntb_link_is_up), DEVMETHOD(ntb_link_enable, intel_ntb_link_enable), DEVMETHOD(ntb_link_disable, intel_ntb_link_disable), DEVMETHOD(ntb_link_enabled, intel_ntb_link_enabled), DEVMETHOD(ntb_mw_count, intel_ntb_mw_count), DEVMETHOD(ntb_mw_get_range, intel_ntb_mw_get_range), DEVMETHOD(ntb_mw_set_trans, intel_ntb_mw_set_trans), DEVMETHOD(ntb_mw_clear_trans, intel_ntb_mw_clear_trans), DEVMETHOD(ntb_mw_get_wc, intel_ntb_mw_get_wc), DEVMETHOD(ntb_mw_set_wc, intel_ntb_mw_set_wc), DEVMETHOD(ntb_spad_count, intel_ntb_spad_count), DEVMETHOD(ntb_spad_clear, intel_ntb_spad_clear), DEVMETHOD(ntb_spad_write, intel_ntb_spad_write), DEVMETHOD(ntb_spad_read, intel_ntb_spad_read), DEVMETHOD(ntb_peer_spad_write, intel_ntb_peer_spad_write), DEVMETHOD(ntb_peer_spad_read, intel_ntb_peer_spad_read), DEVMETHOD(ntb_db_valid_mask, intel_ntb_db_valid_mask), DEVMETHOD(ntb_db_vector_count, intel_ntb_db_vector_count), DEVMETHOD(ntb_db_vector_mask, intel_ntb_db_vector_mask), DEVMETHOD(ntb_db_clear, intel_ntb_db_clear), DEVMETHOD(ntb_db_clear_mask, intel_ntb_db_clear_mask), DEVMETHOD(ntb_db_read, intel_ntb_db_read), DEVMETHOD(ntb_db_set_mask, intel_ntb_db_set_mask), DEVMETHOD(ntb_peer_db_addr, intel_ntb_peer_db_addr), DEVMETHOD(ntb_peer_db_set, intel_ntb_peer_db_set), DEVMETHOD_END }; static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods, sizeof(struct ntb_softc)); DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL); MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1); MODULE_VERSION(ntb_intel, 1); Index: user/alc/PQ_LAUNDRY/sys/dev/pci/pci_pci.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/dev/pci/pci_pci.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/dev/pci/pci_pci.c (revision 303642) @@ -1,2807 +1,2809 @@ /*- * Copyright (c) 1994,1995 Stefan Esser, Wolfgang StanglMeier * Copyright (c) 2000 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * PCI:PCI bridge support. */ #include "opt_pci.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" static int pcib_probe(device_t dev); static int pcib_suspend(device_t dev); static int pcib_resume(device_t dev); static int pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate); static int pcib_ari_get_id(device_t pcib, device_t dev, enum pci_id_type type, uintptr_t *id); static uint32_t pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width); static void pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width); static int pcib_ari_maxslots(device_t dev); static int pcib_ari_maxfuncs(device_t dev); static int pcib_try_enable_ari(device_t pcib, device_t dev); static int pcib_ari_enabled(device_t pcib); static void pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, int *func); #ifdef PCI_HP static void pcib_pcie_ab_timeout(void *arg); static void pcib_pcie_cc_timeout(void *arg); static void pcib_pcie_dll_timeout(void *arg); #endif static device_method_t pcib_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcib_probe), DEVMETHOD(device_attach, pcib_attach), DEVMETHOD(device_detach, pcib_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, pcib_suspend), DEVMETHOD(device_resume, pcib_resume), /* Bus interface */ DEVMETHOD(bus_child_present, pcib_child_present), DEVMETHOD(bus_read_ivar, pcib_read_ivar), DEVMETHOD(bus_write_ivar, pcib_write_ivar), DEVMETHOD(bus_alloc_resource, pcib_alloc_resource), #ifdef NEW_PCIB DEVMETHOD(bus_adjust_resource, pcib_adjust_resource), DEVMETHOD(bus_release_resource, pcib_release_resource), #else DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), #endif DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, pcib_ari_maxslots), DEVMETHOD(pcib_maxfuncs, pcib_ari_maxfuncs), DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcib_route_interrupt), DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, pcib_map_msi), DEVMETHOD(pcib_power_for_sleep, pcib_power_for_sleep), DEVMETHOD(pcib_get_id, pcib_ari_get_id), DEVMETHOD(pcib_try_enable_ari, pcib_try_enable_ari), DEVMETHOD(pcib_ari_enabled, pcib_ari_enabled), DEVMETHOD(pcib_decode_rid, pcib_ari_decode_rid), DEVMETHOD_END }; static devclass_t pcib_devclass; DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc)); DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, NULL, NULL); -#ifdef NEW_PCIB +#if defined(NEW_PCIB) || defined(PCI_HP) SYSCTL_DECL(_hw_pci); +#endif +#ifdef NEW_PCIB static int pci_clear_pcib; SYSCTL_INT(_hw_pci, OID_AUTO, clear_pcib, CTLFLAG_RDTUN, &pci_clear_pcib, 0, "Clear firmware-assigned resources for PCI-PCI bridge I/O windows."); /* * Is a resource from a child device sub-allocated from one of our * resource managers? */ static int pcib_is_resource_managed(struct pcib_softc *sc, int type, struct resource *r) { switch (type) { #ifdef PCI_RES_BUS case PCI_RES_BUS: return (rman_is_region_manager(r, &sc->bus.rman)); #endif case SYS_RES_IOPORT: return (rman_is_region_manager(r, &sc->io.rman)); case SYS_RES_MEMORY: /* Prefetchable resources may live in either memory rman. */ if (rman_get_flags(r) & RF_PREFETCHABLE && rman_is_region_manager(r, &sc->pmem.rman)) return (1); return (rman_is_region_manager(r, &sc->mem.rman)); } return (0); } static int pcib_is_window_open(struct pcib_window *pw) { return (pw->valid && pw->base < pw->limit); } /* * XXX: If RF_ACTIVE did not also imply allocating a bus space tag and * handle for the resource, we could pass RF_ACTIVE up to the PCI bus * when allocating the resource windows and rely on the PCI bus driver * to do this for us. */ static void pcib_activate_window(struct pcib_softc *sc, int type) { PCI_ENABLE_IO(device_get_parent(sc->dev), sc->dev, type); } static void pcib_write_windows(struct pcib_softc *sc, int mask) { device_t dev; uint32_t val; dev = sc->dev; if (sc->io.valid && mask & WIN_IO) { val = pci_read_config(dev, PCIR_IOBASEL_1, 1); if ((val & PCIM_BRIO_MASK) == PCIM_BRIO_32) { pci_write_config(dev, PCIR_IOBASEH_1, sc->io.base >> 16, 2); pci_write_config(dev, PCIR_IOLIMITH_1, sc->io.limit >> 16, 2); } pci_write_config(dev, PCIR_IOBASEL_1, sc->io.base >> 8, 1); pci_write_config(dev, PCIR_IOLIMITL_1, sc->io.limit >> 8, 1); } if (mask & WIN_MEM) { pci_write_config(dev, PCIR_MEMBASE_1, sc->mem.base >> 16, 2); pci_write_config(dev, PCIR_MEMLIMIT_1, sc->mem.limit >> 16, 2); } if (sc->pmem.valid && mask & WIN_PMEM) { val = pci_read_config(dev, PCIR_PMBASEL_1, 2); if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { pci_write_config(dev, PCIR_PMBASEH_1, sc->pmem.base >> 32, 4); pci_write_config(dev, PCIR_PMLIMITH_1, sc->pmem.limit >> 32, 4); } pci_write_config(dev, PCIR_PMBASEL_1, sc->pmem.base >> 16, 2); pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmem.limit >> 16, 2); } } /* * This is used to reject I/O port allocations that conflict with an * ISA alias range. */ static int pcib_is_isa_range(struct pcib_softc *sc, rman_res_t start, rman_res_t end, rman_res_t count) { rman_res_t next_alias; if (!(sc->bridgectl & PCIB_BCR_ISA_ENABLE)) return (0); /* Only check fixed ranges for overlap. */ if (start + count - 1 != end) return (0); /* ISA aliases are only in the lower 64KB of I/O space. */ if (start >= 65536) return (0); /* Check for overlap with 0x000 - 0x0ff as a special case. */ if (start < 0x100) goto alias; /* * If the start address is an alias, the range is an alias. * Otherwise, compute the start of the next alias range and * check if it is before the end of the candidate range. */ if ((start & 0x300) != 0) goto alias; next_alias = (start & ~0x3fful) | 0x100; if (next_alias <= end) goto alias; return (0); alias: if (bootverbose) device_printf(sc->dev, "I/O range %#jx-%#jx overlaps with an ISA alias\n", start, end); return (1); } static void pcib_add_window_resources(struct pcib_window *w, struct resource **res, int count) { struct resource **newarray; int error, i; newarray = malloc(sizeof(struct resource *) * (w->count + count), M_DEVBUF, M_WAITOK); if (w->res != NULL) bcopy(w->res, newarray, sizeof(struct resource *) * w->count); bcopy(res, newarray + w->count, sizeof(struct resource *) * count); free(w->res, M_DEVBUF); w->res = newarray; w->count += count; for (i = 0; i < count; i++) { error = rman_manage_region(&w->rman, rman_get_start(res[i]), rman_get_end(res[i])); if (error) panic("Failed to add resource to rman"); } } typedef void (nonisa_callback)(rman_res_t start, rman_res_t end, void *arg); static void pcib_walk_nonisa_ranges(rman_res_t start, rman_res_t end, nonisa_callback *cb, void *arg) { rman_res_t next_end; /* * If start is within an ISA alias range, move up to the start * of the next non-alias range. As a special case, addresses * in the range 0x000 - 0x0ff should also be skipped since * those are used for various system I/O devices in ISA * systems. */ if (start <= 65535) { if (start < 0x100 || (start & 0x300) != 0) { start &= ~0x3ff; start += 0x400; } } /* ISA aliases are only in the lower 64KB of I/O space. */ while (start <= MIN(end, 65535)) { next_end = MIN(start | 0xff, end); cb(start, next_end, arg); start += 0x400; } if (start <= end) cb(start, end, arg); } static void count_ranges(rman_res_t start, rman_res_t end, void *arg) { int *countp; countp = arg; (*countp)++; } struct alloc_state { struct resource **res; struct pcib_softc *sc; int count, error; }; static void alloc_ranges(rman_res_t start, rman_res_t end, void *arg) { struct alloc_state *as; struct pcib_window *w; int rid; as = arg; if (as->error != 0) return; w = &as->sc->io; rid = w->reg; if (bootverbose) device_printf(as->sc->dev, "allocating non-ISA range %#jx-%#jx\n", start, end); as->res[as->count] = bus_alloc_resource(as->sc->dev, SYS_RES_IOPORT, &rid, start, end, end - start + 1, 0); if (as->res[as->count] == NULL) as->error = ENXIO; else as->count++; } static int pcib_alloc_nonisa_ranges(struct pcib_softc *sc, rman_res_t start, rman_res_t end) { struct alloc_state as; int i, new_count; /* First, see how many ranges we need. */ new_count = 0; pcib_walk_nonisa_ranges(start, end, count_ranges, &new_count); /* Second, allocate the ranges. */ as.res = malloc(sizeof(struct resource *) * new_count, M_DEVBUF, M_WAITOK); as.sc = sc; as.count = 0; as.error = 0; pcib_walk_nonisa_ranges(start, end, alloc_ranges, &as); if (as.error != 0) { for (i = 0; i < as.count; i++) bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io.reg, as.res[i]); free(as.res, M_DEVBUF); return (as.error); } KASSERT(as.count == new_count, ("%s: count mismatch", __func__)); /* Third, add the ranges to the window. */ pcib_add_window_resources(&sc->io, as.res, as.count); free(as.res, M_DEVBUF); return (0); } static void pcib_alloc_window(struct pcib_softc *sc, struct pcib_window *w, int type, int flags, pci_addr_t max_address) { struct resource *res; char buf[64]; int error, rid; if (max_address != (rman_res_t)max_address) max_address = ~0; w->rman.rm_start = 0; w->rman.rm_end = max_address; w->rman.rm_type = RMAN_ARRAY; snprintf(buf, sizeof(buf), "%s %s window", device_get_nameunit(sc->dev), w->name); w->rman.rm_descr = strdup(buf, M_DEVBUF); error = rman_init(&w->rman); if (error) panic("Failed to initialize %s %s rman", device_get_nameunit(sc->dev), w->name); if (!pcib_is_window_open(w)) return; if (w->base > max_address || w->limit > max_address) { device_printf(sc->dev, "initial %s window has too many bits, ignoring\n", w->name); return; } if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE) (void)pcib_alloc_nonisa_ranges(sc, w->base, w->limit); else { rid = w->reg; res = bus_alloc_resource(sc->dev, type, &rid, w->base, w->limit, w->limit - w->base + 1, flags); if (res != NULL) pcib_add_window_resources(w, &res, 1); } if (w->res == NULL) { device_printf(sc->dev, "failed to allocate initial %s window: %#jx-%#jx\n", w->name, (uintmax_t)w->base, (uintmax_t)w->limit); w->base = max_address; w->limit = 0; pcib_write_windows(sc, w->mask); return; } pcib_activate_window(sc, type); } /* * Initialize I/O windows. */ static void pcib_probe_windows(struct pcib_softc *sc) { pci_addr_t max; device_t dev; uint32_t val; dev = sc->dev; if (pci_clear_pcib) { pcib_bridge_init(dev); } /* Determine if the I/O port window is implemented. */ val = pci_read_config(dev, PCIR_IOBASEL_1, 1); if (val == 0) { /* * If 'val' is zero, then only 16-bits of I/O space * are supported. */ pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1); if (pci_read_config(dev, PCIR_IOBASEL_1, 1) != 0) { sc->io.valid = 1; pci_write_config(dev, PCIR_IOBASEL_1, 0, 1); } } else sc->io.valid = 1; /* Read the existing I/O port window. */ if (sc->io.valid) { sc->io.reg = PCIR_IOBASEL_1; sc->io.step = 12; sc->io.mask = WIN_IO; sc->io.name = "I/O port"; if ((val & PCIM_BRIO_MASK) == PCIM_BRIO_32) { sc->io.base = PCI_PPBIOBASE( pci_read_config(dev, PCIR_IOBASEH_1, 2), val); sc->io.limit = PCI_PPBIOLIMIT( pci_read_config(dev, PCIR_IOLIMITH_1, 2), pci_read_config(dev, PCIR_IOLIMITL_1, 1)); max = 0xffffffff; } else { sc->io.base = PCI_PPBIOBASE(0, val); sc->io.limit = PCI_PPBIOLIMIT(0, pci_read_config(dev, PCIR_IOLIMITL_1, 1)); max = 0xffff; } pcib_alloc_window(sc, &sc->io, SYS_RES_IOPORT, 0, max); } /* Read the existing memory window. */ sc->mem.valid = 1; sc->mem.reg = PCIR_MEMBASE_1; sc->mem.step = 20; sc->mem.mask = WIN_MEM; sc->mem.name = "memory"; sc->mem.base = PCI_PPBMEMBASE(0, pci_read_config(dev, PCIR_MEMBASE_1, 2)); sc->mem.limit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_MEMLIMIT_1, 2)); pcib_alloc_window(sc, &sc->mem, SYS_RES_MEMORY, 0, 0xffffffff); /* Determine if the prefetchable memory window is implemented. */ val = pci_read_config(dev, PCIR_PMBASEL_1, 2); if (val == 0) { /* * If 'val' is zero, then only 32-bits of memory space * are supported. */ pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2); if (pci_read_config(dev, PCIR_PMBASEL_1, 2) != 0) { sc->pmem.valid = 1; pci_write_config(dev, PCIR_PMBASEL_1, 0, 2); } } else sc->pmem.valid = 1; /* Read the existing prefetchable memory window. */ if (sc->pmem.valid) { sc->pmem.reg = PCIR_PMBASEL_1; sc->pmem.step = 20; sc->pmem.mask = WIN_PMEM; sc->pmem.name = "prefetch"; if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { sc->pmem.base = PCI_PPBMEMBASE( pci_read_config(dev, PCIR_PMBASEH_1, 4), val); sc->pmem.limit = PCI_PPBMEMLIMIT( pci_read_config(dev, PCIR_PMLIMITH_1, 4), pci_read_config(dev, PCIR_PMLIMITL_1, 2)); max = 0xffffffffffffffff; } else { sc->pmem.base = PCI_PPBMEMBASE(0, val); sc->pmem.limit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_PMLIMITL_1, 2)); max = 0xffffffff; } pcib_alloc_window(sc, &sc->pmem, SYS_RES_MEMORY, RF_PREFETCHABLE, max); } } static void pcib_release_window(struct pcib_softc *sc, struct pcib_window *w, int type) { device_t dev; int error, i; if (!w->valid) return; dev = sc->dev; error = rman_fini(&w->rman); if (error) { device_printf(dev, "failed to release %s rman\n", w->name); return; } free(__DECONST(char *, w->rman.rm_descr), M_DEVBUF); for (i = 0; i < w->count; i++) { error = bus_free_resource(dev, type, w->res[i]); if (error) device_printf(dev, "failed to release %s resource: %d\n", w->name, error); } free(w->res, M_DEVBUF); } static void pcib_free_windows(struct pcib_softc *sc) { pcib_release_window(sc, &sc->pmem, SYS_RES_MEMORY); pcib_release_window(sc, &sc->mem, SYS_RES_MEMORY); pcib_release_window(sc, &sc->io, SYS_RES_IOPORT); } #ifdef PCI_RES_BUS /* * Allocate a suitable secondary bus for this bridge if needed and * initialize the resource manager for the secondary bus range. Note * that the minimum count is a desired value and this may allocate a * smaller range. */ void pcib_setup_secbus(device_t dev, struct pcib_secbus *bus, int min_count) { char buf[64]; int error, rid, sec_reg; switch (pci_read_config(dev, PCIR_HDRTYPE, 1) & PCIM_HDRTYPE) { case PCIM_HDRTYPE_BRIDGE: sec_reg = PCIR_SECBUS_1; bus->sub_reg = PCIR_SUBBUS_1; break; case PCIM_HDRTYPE_CARDBUS: sec_reg = PCIR_SECBUS_2; bus->sub_reg = PCIR_SUBBUS_2; break; default: panic("not a PCI bridge"); } bus->sec = pci_read_config(dev, sec_reg, 1); bus->sub = pci_read_config(dev, bus->sub_reg, 1); bus->dev = dev; bus->rman.rm_start = 0; bus->rman.rm_end = PCI_BUSMAX; bus->rman.rm_type = RMAN_ARRAY; snprintf(buf, sizeof(buf), "%s bus numbers", device_get_nameunit(dev)); bus->rman.rm_descr = strdup(buf, M_DEVBUF); error = rman_init(&bus->rman); if (error) panic("Failed to initialize %s bus number rman", device_get_nameunit(dev)); /* * Allocate a bus range. This will return an existing bus range * if one exists, or a new bus range if one does not. */ rid = 0; bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid, min_count, 0); if (bus->res == NULL) { /* * Fall back to just allocating a range of a single bus * number. */ bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid, 1, 0); } else if (rman_get_size(bus->res) < min_count) /* * Attempt to grow the existing range to satisfy the * minimum desired count. */ (void)bus_adjust_resource(dev, PCI_RES_BUS, bus->res, rman_get_start(bus->res), rman_get_start(bus->res) + min_count - 1); /* * Add the initial resource to the rman. */ if (bus->res != NULL) { error = rman_manage_region(&bus->rman, rman_get_start(bus->res), rman_get_end(bus->res)); if (error) panic("Failed to add resource to rman"); bus->sec = rman_get_start(bus->res); bus->sub = rman_get_end(bus->res); } } void pcib_free_secbus(device_t dev, struct pcib_secbus *bus) { int error; error = rman_fini(&bus->rman); if (error) { device_printf(dev, "failed to release bus number rman\n"); return; } free(__DECONST(char *, bus->rman.rm_descr), M_DEVBUF); error = bus_free_resource(dev, PCI_RES_BUS, bus->res); if (error) device_printf(dev, "failed to release bus numbers resource: %d\n", error); } static struct resource * pcib_suballoc_bus(struct pcib_secbus *bus, device_t child, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; res = rman_reserve_resource(&bus->rman, start, end, count, flags, child); if (res == NULL) return (NULL); if (bootverbose) device_printf(bus->dev, "allocated bus range (%ju-%ju) for rid %d of %s\n", rman_get_start(res), rman_get_end(res), *rid, pcib_child_name(child)); rman_set_rid(res, *rid); return (res); } /* * Attempt to grow the secondary bus range. This is much simpler than * for I/O windows as the range can only be grown by increasing * subbus. */ static int pcib_grow_subbus(struct pcib_secbus *bus, rman_res_t new_end) { rman_res_t old_end; int error; old_end = rman_get_end(bus->res); KASSERT(new_end > old_end, ("attempt to shrink subbus")); error = bus_adjust_resource(bus->dev, PCI_RES_BUS, bus->res, rman_get_start(bus->res), new_end); if (error) return (error); if (bootverbose) device_printf(bus->dev, "grew bus range to %ju-%ju\n", rman_get_start(bus->res), rman_get_end(bus->res)); error = rman_manage_region(&bus->rman, old_end + 1, rman_get_end(bus->res)); if (error) panic("Failed to add resource to rman"); bus->sub = rman_get_end(bus->res); pci_write_config(bus->dev, bus->sub_reg, bus->sub, 1); return (0); } struct resource * pcib_alloc_subbus(struct pcib_secbus *bus, device_t child, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; rman_res_t start_free, end_free, new_end; /* * First, see if the request can be satisified by the existing * bus range. */ res = pcib_suballoc_bus(bus, child, rid, start, end, count, flags); if (res != NULL) return (res); /* * Figure out a range to grow the bus range. First, find the * first bus number after the last allocated bus in the rman and * enforce that as a minimum starting point for the range. */ if (rman_last_free_region(&bus->rman, &start_free, &end_free) != 0 || end_free != bus->sub) start_free = bus->sub + 1; if (start_free < start) start_free = start; new_end = start_free + count - 1; /* * See if this new range would satisfy the request if it * succeeds. */ if (new_end > end) return (NULL); /* Finally, attempt to grow the existing resource. */ if (bootverbose) { device_printf(bus->dev, "attempting to grow bus range for %ju buses\n", count); printf("\tback candidate range: %ju-%ju\n", start_free, new_end); } if (pcib_grow_subbus(bus, new_end) == 0) return (pcib_suballoc_bus(bus, child, rid, start, end, count, flags)); return (NULL); } #endif #else /* * Is the prefetch window open (eg, can we allocate memory in it?) */ static int pcib_is_prefetch_open(struct pcib_softc *sc) { return (sc->pmembase > 0 && sc->pmembase < sc->pmemlimit); } /* * Is the nonprefetch window open (eg, can we allocate memory in it?) */ static int pcib_is_nonprefetch_open(struct pcib_softc *sc) { return (sc->membase > 0 && sc->membase < sc->memlimit); } /* * Is the io window open (eg, can we allocate ports in it?) */ static int pcib_is_io_open(struct pcib_softc *sc) { return (sc->iobase > 0 && sc->iobase < sc->iolimit); } /* * Get current I/O decode. */ static void pcib_get_io_decode(struct pcib_softc *sc) { device_t dev; uint32_t iolow; dev = sc->dev; iolow = pci_read_config(dev, PCIR_IOBASEL_1, 1); if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) sc->iobase = PCI_PPBIOBASE( pci_read_config(dev, PCIR_IOBASEH_1, 2), iolow); else sc->iobase = PCI_PPBIOBASE(0, iolow); iolow = pci_read_config(dev, PCIR_IOLIMITL_1, 1); if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) sc->iolimit = PCI_PPBIOLIMIT( pci_read_config(dev, PCIR_IOLIMITH_1, 2), iolow); else sc->iolimit = PCI_PPBIOLIMIT(0, iolow); } /* * Get current memory decode. */ static void pcib_get_mem_decode(struct pcib_softc *sc) { device_t dev; pci_addr_t pmemlow; dev = sc->dev; sc->membase = PCI_PPBMEMBASE(0, pci_read_config(dev, PCIR_MEMBASE_1, 2)); sc->memlimit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_MEMLIMIT_1, 2)); pmemlow = pci_read_config(dev, PCIR_PMBASEL_1, 2); if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64) sc->pmembase = PCI_PPBMEMBASE( pci_read_config(dev, PCIR_PMBASEH_1, 4), pmemlow); else sc->pmembase = PCI_PPBMEMBASE(0, pmemlow); pmemlow = pci_read_config(dev, PCIR_PMLIMITL_1, 2); if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64) sc->pmemlimit = PCI_PPBMEMLIMIT( pci_read_config(dev, PCIR_PMLIMITH_1, 4), pmemlow); else sc->pmemlimit = PCI_PPBMEMLIMIT(0, pmemlow); } /* * Restore previous I/O decode. */ static void pcib_set_io_decode(struct pcib_softc *sc) { device_t dev; uint32_t iohi; dev = sc->dev; iohi = sc->iobase >> 16; if (iohi > 0) pci_write_config(dev, PCIR_IOBASEH_1, iohi, 2); pci_write_config(dev, PCIR_IOBASEL_1, sc->iobase >> 8, 1); iohi = sc->iolimit >> 16; if (iohi > 0) pci_write_config(dev, PCIR_IOLIMITH_1, iohi, 2); pci_write_config(dev, PCIR_IOLIMITL_1, sc->iolimit >> 8, 1); } /* * Restore previous memory decode. */ static void pcib_set_mem_decode(struct pcib_softc *sc) { device_t dev; pci_addr_t pmemhi; dev = sc->dev; pci_write_config(dev, PCIR_MEMBASE_1, sc->membase >> 16, 2); pci_write_config(dev, PCIR_MEMLIMIT_1, sc->memlimit >> 16, 2); pmemhi = sc->pmembase >> 32; if (pmemhi > 0) pci_write_config(dev, PCIR_PMBASEH_1, pmemhi, 4); pci_write_config(dev, PCIR_PMBASEL_1, sc->pmembase >> 16, 2); pmemhi = sc->pmemlimit >> 32; if (pmemhi > 0) pci_write_config(dev, PCIR_PMLIMITH_1, pmemhi, 4); pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmemlimit >> 16, 2); } #endif #ifdef PCI_HP /* * PCI-express HotPlug support. */ static int pci_enable_pcie_hp = 1; SYSCTL_INT(_hw_pci, OID_AUTO, enable_pcie_hp, CTLFLAG_RDTUN, &pci_enable_pcie_hp, 0, "Enable support for native PCI-express HotPlug."); static void pcib_probe_hotplug(struct pcib_softc *sc) { device_t dev; if (!pci_enable_pcie_hp) return; dev = sc->dev; if (pci_find_cap(dev, PCIY_EXPRESS, NULL) != 0) return; if (!(pcie_read_config(dev, PCIER_FLAGS, 2) & PCIEM_FLAGS_SLOT)) return; sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4); sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4); if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC) sc->flags |= PCIB_HOTPLUG; } /* * Send a HotPlug command to the slot control register. If this slot * uses command completion interrupts and a previous command is still * in progress, then the command is dropped. Once the previous * command completes or times out, pcib_pcie_hotplug_update() will be * invoked to post a new command based on the slot's state at that * time. */ static void pcib_pcie_hotplug_command(struct pcib_softc *sc, uint16_t val, uint16_t mask) { device_t dev; uint16_t ctl, new; dev = sc->dev; if (sc->flags & PCIB_HOTPLUG_CMD_PENDING) return; ctl = pcie_read_config(dev, PCIER_SLOT_CTL, 2); new = (ctl & ~mask) | val; if (new == ctl) return; pcie_write_config(dev, PCIER_SLOT_CTL, new, 2); if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS) && (ctl & new) & PCIEM_SLOT_CTL_CCIE) { sc->flags |= PCIB_HOTPLUG_CMD_PENDING; if (!cold) callout_reset(&sc->pcie_cc_timer, hz, pcib_pcie_cc_timeout, sc); } } static void pcib_pcie_hotplug_command_completed(struct pcib_softc *sc) { device_t dev; dev = sc->dev; if (bootverbose) device_printf(dev, "Command Completed\n"); if (!(sc->flags & PCIB_HOTPLUG_CMD_PENDING)) return; callout_stop(&sc->pcie_cc_timer); sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING; wakeup(sc); } /* * Returns true if a card is fully inserted from the user's * perspective. It may not yet be ready for access, but the driver * can now start enabling access if necessary. */ static bool pcib_hotplug_inserted(struct pcib_softc *sc) { /* Pretend the card isn't present if a detach is forced. */ if (sc->flags & PCIB_DETACHING) return (false); /* Card must be present in the slot. */ if ((sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS) == 0) return (false); /* A power fault implicitly turns off power to the slot. */ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD) return (false); /* If the MRL is disengaged, the slot is powered off. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP && (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS) != 0) return (false); return (true); } /* * Returns -1 if the card is fully inserted, powered, and ready for * access. Otherwise, returns 0. */ static int pcib_hotplug_present(struct pcib_softc *sc) { device_t dev; dev = sc->dev; /* Card must be inserted. */ if (!pcib_hotplug_inserted(sc)) return (0); /* * Require the Electromechanical Interlock to be engaged if * present. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP && (sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS) == 0) return (0); /* Require the Data Link Layer to be active. */ if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) { if (!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE)) return (0); } return (-1); } static void pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask, bool schedule_task) { bool card_inserted; /* Clear DETACHING if Present Detect has cleared. */ if ((sc->pcie_slot_sta & (PCIEM_SLOT_STA_PDC | PCIEM_SLOT_STA_PDS)) == PCIEM_SLOT_STA_PDC) sc->flags &= ~PCIB_DETACHING; card_inserted = pcib_hotplug_inserted(sc); /* Turn the power indicator on if a card is inserted. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PIP) { mask |= PCIEM_SLOT_CTL_PIC; if (card_inserted) val |= PCIEM_SLOT_CTL_PI_ON; else if (sc->flags & PCIB_DETACH_PENDING) val |= PCIEM_SLOT_CTL_PI_BLINK; else val |= PCIEM_SLOT_CTL_PI_OFF; } /* Turn the power on via the Power Controller if a card is inserted. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP) { mask |= PCIEM_SLOT_CTL_PCC; if (card_inserted) val |= PCIEM_SLOT_CTL_PC_ON; else val |= PCIEM_SLOT_CTL_PC_OFF; } /* * If a card is inserted, enable the Electromechanical * Interlock. If a card is not inserted (or we are in the * process of detaching), disable the Electromechanical * Interlock. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP) { mask |= PCIEM_SLOT_CTL_EIC; if (card_inserted != !(sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS)) val |= PCIEM_SLOT_CTL_EIC; } /* * Start a timer to see if the Data Link Layer times out. * Note that we only start the timer if Presence Detect * changed on this interrupt. Stop any scheduled timer if * the Data Link Layer is active. */ if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) { if (card_inserted && !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC) { if (cold) device_printf(sc->dev, "Data Link Layer inactive\n"); else callout_reset(&sc->pcie_dll_timer, hz, pcib_pcie_dll_timeout, sc); } else if (sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) callout_stop(&sc->pcie_dll_timer); } pcib_pcie_hotplug_command(sc, val, mask); /* * During attach the child "pci" device is added sychronously; * otherwise, the task is scheduled to manage the child * device. */ if (schedule_task && (pcib_hotplug_present(sc) != 0) != (sc->child != NULL)) taskqueue_enqueue(taskqueue_thread, &sc->pcie_hp_task); } static void pcib_pcie_intr(void *arg) { struct pcib_softc *sc; device_t dev; sc = arg; dev = sc->dev; sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); /* Clear the events just reported. */ pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_ABP) { if (sc->flags & PCIB_DETACH_PENDING) { device_printf(dev, "Attention Button Pressed: Detach Cancelled\n"); sc->flags &= ~PCIB_DETACH_PENDING; callout_stop(&sc->pcie_ab_timer); } else { device_printf(dev, "Attention Button Pressed: Detaching in 5 seconds\n"); sc->flags |= PCIB_DETACH_PENDING; callout_reset(&sc->pcie_ab_timer, 5 * hz, pcib_pcie_ab_timeout, sc); } } if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD) device_printf(dev, "Power Fault Detected\n"); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSC) device_printf(dev, "MRL Sensor Changed to %s\n", sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS ? "open" : "closed"); if (bootverbose && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC) device_printf(dev, "Present Detect Changed to %s\n", sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS ? "card present" : "empty"); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_CC) pcib_pcie_hotplug_command_completed(sc); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_DLLSC) { sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2); if (bootverbose) device_printf(dev, "Data Link Layer State Changed to %s\n", sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE ? "active" : "inactive"); } pcib_pcie_hotplug_update(sc, 0, 0, true); } static void pcib_pcie_hotplug_task(void *context, int pending) { struct pcib_softc *sc; device_t dev; sc = context; mtx_lock(&Giant); dev = sc->dev; if (pcib_hotplug_present(sc) != 0) { if (sc->child == NULL) { sc->child = device_add_child(dev, "pci", -1); bus_generic_attach(dev); } } else { if (sc->child != NULL) { if (device_delete_child(dev, sc->child) == 0) sc->child = NULL; } } mtx_unlock(&Giant); } static void pcib_pcie_ab_timeout(void *arg) { struct pcib_softc *sc; device_t dev; sc = arg; dev = sc->dev; mtx_assert(&Giant, MA_OWNED); if (sc->flags & PCIB_DETACH_PENDING) { sc->flags |= PCIB_DETACHING; sc->flags &= ~PCIB_DETACH_PENDING; pcib_pcie_hotplug_update(sc, 0, 0, true); } } static void pcib_pcie_cc_timeout(void *arg) { struct pcib_softc *sc; device_t dev; uint16_t sta; sc = arg; dev = sc->dev; mtx_assert(&Giant, MA_OWNED); sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); if (!(sta & PCIEM_SLOT_STA_CC)) { device_printf(dev, "Hotplug Command Timed Out - forcing detach\n"); sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING); sc->flags |= PCIB_DETACHING; pcib_pcie_hotplug_update(sc, 0, 0, true); } else { device_printf(dev, "Missed HotPlug interrupt waiting for Command Completion\n"); pcib_pcie_intr(sc); } } static void pcib_pcie_dll_timeout(void *arg) { struct pcib_softc *sc; device_t dev; uint16_t sta; sc = arg; dev = sc->dev; mtx_assert(&Giant, MA_OWNED); sta = pcie_read_config(dev, PCIER_LINK_STA, 2); if (!(sta & PCIEM_LINK_STA_DL_ACTIVE)) { device_printf(dev, "Timed out waiting for Data Link Layer Active\n"); sc->flags |= PCIB_DETACHING; pcib_pcie_hotplug_update(sc, 0, 0, true); } else if (sta != sc->pcie_link_sta) { device_printf(dev, "Missed HotPlug interrupt waiting for DLL Active\n"); pcib_pcie_intr(sc); } } static int pcib_alloc_pcie_irq(struct pcib_softc *sc) { device_t dev; int count, error, rid; rid = -1; dev = sc->dev; /* * For simplicity, only use MSI-X if there is a single message. * To support a device with multiple messages we would have to * use remap intr if the MSI number is not 0. */ count = pci_msix_count(dev); if (count == 1) { error = pci_alloc_msix(dev, &count); if (error == 0) rid = 1; } if (rid < 0 && pci_msi_count(dev) > 0) { count = 1; error = pci_alloc_msi(dev, &count); if (error == 0) rid = 1; } if (rid < 0) rid = 0; sc->pcie_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->pcie_irq == NULL) { device_printf(dev, "Failed to allocate interrupt for PCI-e events\n"); if (rid > 0) pci_release_msi(dev); return (ENXIO); } error = bus_setup_intr(dev, sc->pcie_irq, INTR_TYPE_MISC, NULL, pcib_pcie_intr, sc, &sc->pcie_ihand); if (error) { device_printf(dev, "Failed to setup PCI-e interrupt handler\n"); bus_release_resource(dev, SYS_RES_IRQ, rid, sc->pcie_irq); if (rid > 0) pci_release_msi(dev); return (error); } return (0); } static int pcib_release_pcie_irq(struct pcib_softc *sc) { device_t dev; int error; dev = sc->dev; error = bus_teardown_intr(dev, sc->pcie_irq, sc->pcie_ihand); if (error) return (error); error = bus_free_resource(dev, SYS_RES_IRQ, sc->pcie_irq); if (error) return (error); return (pci_release_msi(dev)); } static void pcib_setup_hotplug(struct pcib_softc *sc) { device_t dev; uint16_t mask, val; dev = sc->dev; callout_init(&sc->pcie_ab_timer, 0); callout_init(&sc->pcie_cc_timer, 0); callout_init(&sc->pcie_dll_timer, 0); TASK_INIT(&sc->pcie_hp_task, 0, pcib_pcie_hotplug_task, sc); /* Allocate IRQ. */ if (pcib_alloc_pcie_irq(sc) != 0) return; sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2); sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); /* Clear any events previously pending. */ pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2); /* Enable HotPlug events. */ mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE | PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE | PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE; val = PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_HPIE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_APB) val |= PCIEM_SLOT_CTL_ABPE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP) val |= PCIEM_SLOT_CTL_PFDE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) val |= PCIEM_SLOT_CTL_MRLSCE; if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS)) val |= PCIEM_SLOT_CTL_CCIE; if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) val |= PCIEM_SLOT_CTL_DLLSCE; /* Turn the attention indicator off. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) { mask |= PCIEM_SLOT_CTL_AIC; val |= PCIEM_SLOT_CTL_AI_OFF; } pcib_pcie_hotplug_update(sc, val, mask, false); } static int pcib_detach_hotplug(struct pcib_softc *sc) { uint16_t mask, val; int error; /* Disable the card in the slot and force it to detach. */ if (sc->flags & PCIB_DETACH_PENDING) { sc->flags &= ~PCIB_DETACH_PENDING; callout_stop(&sc->pcie_ab_timer); } sc->flags |= PCIB_DETACHING; if (sc->flags & PCIB_HOTPLUG_CMD_PENDING) { callout_stop(&sc->pcie_cc_timer); tsleep(sc, 0, "hpcmd", hz); sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING; } /* Disable HotPlug events. */ mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE | PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE | PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE; val = 0; /* Turn the attention indicator off. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) { mask |= PCIEM_SLOT_CTL_AIC; val |= PCIEM_SLOT_CTL_AI_OFF; } pcib_pcie_hotplug_update(sc, val, mask, false); error = pcib_release_pcie_irq(sc); if (error) return (error); taskqueue_drain(taskqueue_thread, &sc->pcie_hp_task); callout_drain(&sc->pcie_ab_timer); callout_drain(&sc->pcie_cc_timer); callout_drain(&sc->pcie_dll_timer); return (0); } #endif /* * Get current bridge configuration. */ static void pcib_cfg_save(struct pcib_softc *sc) { #ifndef NEW_PCIB device_t dev; uint16_t command; dev = sc->dev; command = pci_read_config(dev, PCIR_COMMAND, 2); if (command & PCIM_CMD_PORTEN) pcib_get_io_decode(sc); if (command & PCIM_CMD_MEMEN) pcib_get_mem_decode(sc); #endif } /* * Restore previous bridge configuration. */ static void pcib_cfg_restore(struct pcib_softc *sc) { device_t dev; #ifndef NEW_PCIB uint16_t command; #endif dev = sc->dev; #ifdef NEW_PCIB pcib_write_windows(sc, WIN_IO | WIN_MEM | WIN_PMEM); #else command = pci_read_config(dev, PCIR_COMMAND, 2); if (command & PCIM_CMD_PORTEN) pcib_set_io_decode(sc); if (command & PCIM_CMD_MEMEN) pcib_set_mem_decode(sc); #endif } /* * Generic device interface */ static int pcib_probe(device_t dev) { if ((pci_get_class(dev) == PCIC_BRIDGE) && (pci_get_subclass(dev) == PCIS_BRIDGE_PCI)) { device_set_desc(dev, "PCI-PCI bridge"); return(-10000); } return(ENXIO); } void pcib_attach_common(device_t dev) { struct pcib_softc *sc; struct sysctl_ctx_list *sctx; struct sysctl_oid *soid; int comma; sc = device_get_softc(dev); sc->dev = dev; /* * Get current bridge configuration. */ sc->domain = pci_get_domain(dev); #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS)) sc->bus.sec = pci_read_config(dev, PCIR_SECBUS_1, 1); sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1); #endif sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2); pcib_cfg_save(sc); /* * The primary bus register should always be the bus of the * parent. */ sc->pribus = pci_get_bus(dev); pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1); /* * Setup sysctl reporting nodes */ sctx = device_get_sysctl_ctx(dev); soid = device_get_sysctl_tree(dev); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "domain", CTLFLAG_RD, &sc->domain, 0, "Domain number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "pribus", CTLFLAG_RD, &sc->pribus, 0, "Primary bus number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "secbus", CTLFLAG_RD, &sc->bus.sec, 0, "Secondary bus number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus", CTLFLAG_RD, &sc->bus.sub, 0, "Subordinate bus number"); /* * Quirk handling. */ switch (pci_get_devid(dev)) { #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS)) case 0x12258086: /* Intel 82454KX/GX (Orion) */ { uint8_t supbus; supbus = pci_read_config(dev, 0x41, 1); if (supbus != 0xff) { sc->bus.sec = supbus + 1; sc->bus.sub = supbus + 1; } break; } #endif /* * The i82380FB mobile docking controller is a PCI-PCI bridge, * and it is a subtractive bridge. However, the ProgIf is wrong * so the normal setting of PCIB_SUBTRACTIVE bit doesn't * happen. There are also Toshiba and Cavium ThunderX bridges * that behave this way. */ case 0xa002177d: /* Cavium ThunderX */ case 0x124b8086: /* Intel 82380FB Mobile */ case 0x060513d7: /* Toshiba ???? */ sc->flags |= PCIB_SUBTRACTIVE; break; #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS)) /* Compaq R3000 BIOS sets wrong subordinate bus number. */ case 0x00dd10de: { char *cp; if ((cp = kern_getenv("smbios.planar.maker")) == NULL) break; if (strncmp(cp, "Compal", 6) != 0) { freeenv(cp); break; } freeenv(cp); if ((cp = kern_getenv("smbios.planar.product")) == NULL) break; if (strncmp(cp, "08A0", 4) != 0) { freeenv(cp); break; } freeenv(cp); if (sc->bus.sub < 0xa) { pci_write_config(dev, PCIR_SUBBUS_1, 0xa, 1); sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1); } break; } #endif } if (pci_msi_device_blacklisted(dev)) sc->flags |= PCIB_DISABLE_MSI; if (pci_msix_device_blacklisted(dev)) sc->flags |= PCIB_DISABLE_MSIX; /* * Intel 815, 845 and other chipsets say they are PCI-PCI bridges, * but have a ProgIF of 0x80. The 82801 family (AA, AB, BAM/CAM, * BA/CA/DB and E) PCI bridges are HUB-PCI bridges, in Intelese. * This means they act as if they were subtractively decoding * bridges and pass all transactions. Mark them and real ProgIf 1 * parts as subtractive. */ if ((pci_get_devid(dev) & 0xff00ffff) == 0x24008086 || pci_read_config(dev, PCIR_PROGIF, 1) == PCIP_BRIDGE_PCI_SUBTRACTIVE) sc->flags |= PCIB_SUBTRACTIVE; #ifdef PCI_HP pcib_probe_hotplug(sc); #endif #ifdef NEW_PCIB #ifdef PCI_RES_BUS pcib_setup_secbus(dev, &sc->bus, 1); #endif pcib_probe_windows(sc); #endif #ifdef PCI_HP if (sc->flags & PCIB_HOTPLUG) pcib_setup_hotplug(sc); #endif if (bootverbose) { device_printf(dev, " domain %d\n", sc->domain); device_printf(dev, " secondary bus %d\n", sc->bus.sec); device_printf(dev, " subordinate bus %d\n", sc->bus.sub); #ifdef NEW_PCIB if (pcib_is_window_open(&sc->io)) device_printf(dev, " I/O decode 0x%jx-0x%jx\n", (uintmax_t)sc->io.base, (uintmax_t)sc->io.limit); if (pcib_is_window_open(&sc->mem)) device_printf(dev, " memory decode 0x%jx-0x%jx\n", (uintmax_t)sc->mem.base, (uintmax_t)sc->mem.limit); if (pcib_is_window_open(&sc->pmem)) device_printf(dev, " prefetched decode 0x%jx-0x%jx\n", (uintmax_t)sc->pmem.base, (uintmax_t)sc->pmem.limit); #else if (pcib_is_io_open(sc)) device_printf(dev, " I/O decode 0x%x-0x%x\n", sc->iobase, sc->iolimit); if (pcib_is_nonprefetch_open(sc)) device_printf(dev, " memory decode 0x%jx-0x%jx\n", (uintmax_t)sc->membase, (uintmax_t)sc->memlimit); if (pcib_is_prefetch_open(sc)) device_printf(dev, " prefetched decode 0x%jx-0x%jx\n", (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit); #endif if (sc->bridgectl & (PCIB_BCR_ISA_ENABLE | PCIB_BCR_VGA_ENABLE) || sc->flags & PCIB_SUBTRACTIVE) { device_printf(dev, " special decode "); comma = 0; if (sc->bridgectl & PCIB_BCR_ISA_ENABLE) { printf("ISA"); comma = 1; } if (sc->bridgectl & PCIB_BCR_VGA_ENABLE) { printf("%sVGA", comma ? ", " : ""); comma = 1; } if (sc->flags & PCIB_SUBTRACTIVE) printf("%ssubtractive", comma ? ", " : ""); printf("\n"); } } /* * Always enable busmastering on bridges so that transactions * initiated on the secondary bus are passed through to the * primary bus. */ pci_enable_busmaster(dev); } #ifdef PCI_HP static int pcib_present(struct pcib_softc *sc) { if (sc->flags & PCIB_HOTPLUG) return (pcib_hotplug_present(sc) != 0); return (1); } #endif int pcib_attach_child(device_t dev) { struct pcib_softc *sc; sc = device_get_softc(dev); if (sc->bus.sec == 0) { /* no secondary bus; we should have fixed this */ return(0); } #ifdef PCI_HP if (!pcib_present(sc)) { /* An empty HotPlug slot, so don't add a PCI bus yet. */ return (0); } #endif sc->child = device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); } int pcib_attach(device_t dev) { pcib_attach_common(dev); return (pcib_attach_child(dev)); } int pcib_detach(device_t dev) { #if defined(PCI_HP) || defined(NEW_PCIB) struct pcib_softc *sc; #endif int error; #if defined(PCI_HP) || defined(NEW_PCIB) sc = device_get_softc(dev); #endif error = bus_generic_detach(dev); if (error) return (error); #ifdef PCI_HP if (sc->flags & PCIB_HOTPLUG) { error = pcib_detach_hotplug(sc); if (error) return (error); } #endif error = device_delete_children(dev); if (error) return (error); #ifdef NEW_PCIB pcib_free_windows(sc); #ifdef PCI_RES_BUS pcib_free_secbus(dev, &sc->bus); #endif #endif return (0); } int pcib_suspend(device_t dev) { pcib_cfg_save(device_get_softc(dev)); return (bus_generic_suspend(dev)); } int pcib_resume(device_t dev) { pcib_cfg_restore(device_get_softc(dev)); return (bus_generic_resume(dev)); } void pcib_bridge_init(device_t dev) { pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1); pci_write_config(dev, PCIR_IOBASEH_1, 0xffff, 2); pci_write_config(dev, PCIR_IOLIMITL_1, 0, 1); pci_write_config(dev, PCIR_IOLIMITH_1, 0, 2); pci_write_config(dev, PCIR_MEMBASE_1, 0xffff, 2); pci_write_config(dev, PCIR_MEMLIMIT_1, 0, 2); pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2); pci_write_config(dev, PCIR_PMBASEH_1, 0xffffffff, 4); pci_write_config(dev, PCIR_PMLIMITL_1, 0, 2); pci_write_config(dev, PCIR_PMLIMITH_1, 0, 4); } int pcib_child_present(device_t dev, device_t child) { #ifdef PCI_HP struct pcib_softc *sc = device_get_softc(dev); int retval; retval = bus_child_present(dev); if (retval != 0 && sc->flags & PCIB_HOTPLUG) retval = pcib_hotplug_present(sc); return (retval); #else return (bus_child_present(dev)); #endif } int pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct pcib_softc *sc = device_get_softc(dev); switch (which) { case PCIB_IVAR_DOMAIN: *result = sc->domain; return(0); case PCIB_IVAR_BUS: *result = sc->bus.sec; return(0); } return(ENOENT); } int pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { switch (which) { case PCIB_IVAR_DOMAIN: return(EINVAL); case PCIB_IVAR_BUS: return(EINVAL); } return(ENOENT); } #ifdef NEW_PCIB /* * Attempt to allocate a resource from the existing resources assigned * to a window. */ static struct resource * pcib_suballoc_resource(struct pcib_softc *sc, struct pcib_window *w, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; if (!pcib_is_window_open(w)) return (NULL); res = rman_reserve_resource(&w->rman, start, end, count, flags & ~RF_ACTIVE, child); if (res == NULL) return (NULL); if (bootverbose) device_printf(sc->dev, "allocated %s range (%#jx-%#jx) for rid %x of %s\n", w->name, rman_get_start(res), rman_get_end(res), *rid, pcib_child_name(child)); rman_set_rid(res, *rid); /* * If the resource should be active, pass that request up the * tree. This assumes the parent drivers can handle * activating sub-allocated resources. */ if (flags & RF_ACTIVE) { if (bus_activate_resource(child, type, *rid, res) != 0) { rman_release_resource(res); return (NULL); } } return (res); } /* Allocate a fresh resource range for an unconfigured window. */ static int pcib_alloc_new_window(struct pcib_softc *sc, struct pcib_window *w, int type, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; rman_res_t base, limit, wmask; int rid; /* * If this is an I/O window on a bridge with ISA enable set * and the start address is below 64k, then try to allocate an * initial window of 0x1000 bytes long starting at address * 0xf000 and walking down. Note that if the original request * was larger than the non-aliased range size of 0x100 our * caller would have raised the start address up to 64k * already. */ if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE && start < 65536) { for (base = 0xf000; (long)base >= 0; base -= 0x1000) { limit = base + 0xfff; /* * Skip ranges that wouldn't work for the * original request. Note that the actual * window that overlaps are the non-alias * ranges within [base, limit], so this isn't * quite a simple comparison. */ if (start + count > limit - 0x400) continue; if (base == 0) { /* * The first open region for the window at * 0 is 0x400-0x4ff. */ if (end - count + 1 < 0x400) continue; } else { if (end - count + 1 < base) continue; } if (pcib_alloc_nonisa_ranges(sc, base, limit) == 0) { w->base = base; w->limit = limit; return (0); } } return (ENOSPC); } wmask = ((rman_res_t)1 << w->step) - 1; if (RF_ALIGNMENT(flags) < w->step) { flags &= ~RF_ALIGNMENT_MASK; flags |= RF_ALIGNMENT_LOG2(w->step); } start &= ~wmask; end |= wmask; count = roundup2(count, (rman_res_t)1 << w->step); rid = w->reg; res = bus_alloc_resource(sc->dev, type, &rid, start, end, count, flags & ~RF_ACTIVE); if (res == NULL) return (ENOSPC); pcib_add_window_resources(w, &res, 1); pcib_activate_window(sc, type); w->base = rman_get_start(res); w->limit = rman_get_end(res); return (0); } /* Try to expand an existing window to the requested base and limit. */ static int pcib_expand_window(struct pcib_softc *sc, struct pcib_window *w, int type, rman_res_t base, rman_res_t limit) { struct resource *res; int error, i, force_64k_base; KASSERT(base <= w->base && limit >= w->limit, ("attempting to shrink window")); /* * XXX: pcib_grow_window() doesn't try to do this anyway and * the error handling for all the edge cases would be tedious. */ KASSERT(limit == w->limit || base == w->base, ("attempting to grow both ends of a window")); /* * Yet more special handling for requests to expand an I/O * window behind an ISA-enabled bridge. Since I/O windows * have to grow in 0x1000 increments and the end of the 0xffff * range is an alias, growing a window below 64k will always * result in allocating new resources and never adjusting an * existing resource. */ if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE && (limit <= 65535 || (base <= 65535 && base != w->base))) { KASSERT(limit == w->limit || limit <= 65535, ("attempting to grow both ends across 64k ISA alias")); if (base != w->base) error = pcib_alloc_nonisa_ranges(sc, base, w->base - 1); else error = pcib_alloc_nonisa_ranges(sc, w->limit + 1, limit); if (error == 0) { w->base = base; w->limit = limit; } return (error); } /* * Find the existing resource to adjust. Usually there is only one, * but for an ISA-enabled bridge we might be growing the I/O window * above 64k and need to find the existing resource that maps all * of the area above 64k. */ for (i = 0; i < w->count; i++) { if (rman_get_end(w->res[i]) == w->limit) break; } KASSERT(i != w->count, ("did not find existing resource")); res = w->res[i]; /* * Usually the resource we found should match the window's * existing range. The one exception is the ISA-enabled case * mentioned above in which case the resource should start at * 64k. */ if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE && w->base <= 65535) { KASSERT(rman_get_start(res) == 65536, ("existing resource mismatch")); force_64k_base = 1; } else { KASSERT(w->base == rman_get_start(res), ("existing resource mismatch")); force_64k_base = 0; } error = bus_adjust_resource(sc->dev, type, res, force_64k_base ? rman_get_start(res) : base, limit); if (error) return (error); /* Add the newly allocated region to the resource manager. */ if (w->base != base) { error = rman_manage_region(&w->rman, base, w->base - 1); w->base = base; } else { error = rman_manage_region(&w->rman, w->limit + 1, limit); w->limit = limit; } if (error) { if (bootverbose) device_printf(sc->dev, "failed to expand %s resource manager\n", w->name); (void)bus_adjust_resource(sc->dev, type, res, force_64k_base ? rman_get_start(res) : w->base, w->limit); } return (error); } /* * Attempt to grow a window to make room for a given resource request. */ static int pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { rman_res_t align, start_free, end_free, front, back, wmask; int error; /* * Clamp the desired resource range to the maximum address * this window supports. Reject impossible requests. * * For I/O port requests behind a bridge with the ISA enable * bit set, force large allocations to start above 64k. */ if (!w->valid) return (EINVAL); if (sc->bridgectl & PCIB_BCR_ISA_ENABLE && count > 0x100 && start < 65536) start = 65536; if (end > w->rman.rm_end) end = w->rman.rm_end; if (start + count - 1 > end || start + count < start) return (EINVAL); wmask = ((rman_res_t)1 << w->step) - 1; /* * If there is no resource at all, just try to allocate enough * aligned space for this resource. */ if (w->res == NULL) { error = pcib_alloc_new_window(sc, w, type, start, end, count, flags); if (error) { if (bootverbose) device_printf(sc->dev, "failed to allocate initial %s window (%#jx-%#jx,%#jx)\n", w->name, start, end, count); return (error); } if (bootverbose) device_printf(sc->dev, "allocated initial %s window of %#jx-%#jx\n", w->name, (uintmax_t)w->base, (uintmax_t)w->limit); goto updatewin; } /* * See if growing the window would help. Compute the minimum * amount of address space needed on both the front and back * ends of the existing window to satisfy the allocation. * * For each end, build a candidate region adjusting for the * required alignment, etc. If there is a free region at the * edge of the window, grow from the inner edge of the free * region. Otherwise grow from the window boundary. * * Growing an I/O window below 64k for a bridge with the ISA * enable bit doesn't require any special magic as the step * size of an I/O window (1k) always includes multiple * non-alias ranges when it is grown in either direction. * * XXX: Special case: if w->res is completely empty and the * request size is larger than w->res, we should find the * optimal aligned buffer containing w->res and allocate that. */ if (bootverbose) device_printf(sc->dev, "attempting to grow %s window for (%#jx-%#jx,%#jx)\n", w->name, start, end, count); align = (rman_res_t)1 << RF_ALIGNMENT(flags); if (start < w->base) { if (rman_first_free_region(&w->rman, &start_free, &end_free) != 0 || start_free != w->base) end_free = w->base; if (end_free > end) end_free = end + 1; /* Move end_free down until it is properly aligned. */ end_free &= ~(align - 1); end_free--; front = end_free - (count - 1); /* * The resource would now be allocated at (front, * end_free). Ensure that fits in the (start, end) * bounds. end_free is checked above. If 'front' is * ok, ensure it is properly aligned for this window. * Also check for underflow. */ if (front >= start && front <= end_free) { if (bootverbose) printf("\tfront candidate range: %#jx-%#jx\n", front, end_free); front &= ~wmask; front = w->base - front; } else front = 0; } else front = 0; if (end > w->limit) { if (rman_last_free_region(&w->rman, &start_free, &end_free) != 0 || end_free != w->limit) start_free = w->limit + 1; if (start_free < start) start_free = start; /* Move start_free up until it is properly aligned. */ start_free = roundup2(start_free, align); back = start_free + count - 1; /* * The resource would now be allocated at (start_free, * back). Ensure that fits in the (start, end) * bounds. start_free is checked above. If 'back' is * ok, ensure it is properly aligned for this window. * Also check for overflow. */ if (back <= end && start_free <= back) { if (bootverbose) printf("\tback candidate range: %#jx-%#jx\n", start_free, back); back |= wmask; back -= w->limit; } else back = 0; } else back = 0; /* * Try to allocate the smallest needed region first. * If that fails, fall back to the other region. */ error = ENOSPC; while (front != 0 || back != 0) { if (front != 0 && (front <= back || back == 0)) { error = pcib_expand_window(sc, w, type, w->base - front, w->limit); if (error == 0) break; front = 0; } else { error = pcib_expand_window(sc, w, type, w->base, w->limit + back); if (error == 0) break; back = 0; } } if (error) return (error); if (bootverbose) device_printf(sc->dev, "grew %s window to %#jx-%#jx\n", w->name, (uintmax_t)w->base, (uintmax_t)w->limit); updatewin: /* Write the new window. */ KASSERT((w->base & wmask) == 0, ("start address is not aligned")); KASSERT((w->limit & wmask) == wmask, ("end address is not aligned")); pcib_write_windows(sc, w->mask); return (0); } /* * We have to trap resource allocation requests and ensure that the bridge * is set up to, or capable of handling them. */ struct resource * pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct pcib_softc *sc; struct resource *r; sc = device_get_softc(dev); /* * VGA resources are decoded iff the VGA enable bit is set in * the bridge control register. VGA resources do not fall into * the resource windows and are passed up to the parent. */ if ((type == SYS_RES_IOPORT && pci_is_vga_ioport_range(start, end)) || (type == SYS_RES_MEMORY && pci_is_vga_memory_range(start, end))) { if (sc->bridgectl & PCIB_BCR_VGA_ENABLE) return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); else return (NULL); } switch (type) { #ifdef PCI_RES_BUS case PCI_RES_BUS: return (pcib_alloc_subbus(&sc->bus, child, rid, start, end, count, flags)); #endif case SYS_RES_IOPORT: if (pcib_is_isa_range(sc, start, end, count)) return (NULL); r = pcib_suballoc_resource(sc, &sc->io, child, type, rid, start, end, count, flags); if (r != NULL || (sc->flags & PCIB_SUBTRACTIVE) != 0) break; if (pcib_grow_window(sc, &sc->io, type, start, end, count, flags) == 0) r = pcib_suballoc_resource(sc, &sc->io, child, type, rid, start, end, count, flags); break; case SYS_RES_MEMORY: /* * For prefetchable resources, prefer the prefetchable * memory window, but fall back to the regular memory * window if that fails. Try both windows before * attempting to grow a window in case the firmware * has used a range in the regular memory window to * map a prefetchable BAR. */ if (flags & RF_PREFETCHABLE) { r = pcib_suballoc_resource(sc, &sc->pmem, child, type, rid, start, end, count, flags); if (r != NULL) break; } r = pcib_suballoc_resource(sc, &sc->mem, child, type, rid, start, end, count, flags); if (r != NULL || (sc->flags & PCIB_SUBTRACTIVE) != 0) break; if (flags & RF_PREFETCHABLE) { if (pcib_grow_window(sc, &sc->pmem, type, start, end, count, flags) == 0) { r = pcib_suballoc_resource(sc, &sc->pmem, child, type, rid, start, end, count, flags); if (r != NULL) break; } } if (pcib_grow_window(sc, &sc->mem, type, start, end, count, flags & ~RF_PREFETCHABLE) == 0) r = pcib_suballoc_resource(sc, &sc->mem, child, type, rid, start, end, count, flags); break; default: return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } /* * If attempts to suballocate from the window fail but this is a * subtractive bridge, pass the request up the tree. */ if (sc->flags & PCIB_SUBTRACTIVE && r == NULL) return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); return (r); } int pcib_adjust_resource(device_t bus, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end) { struct pcib_softc *sc; sc = device_get_softc(bus); if (pcib_is_resource_managed(sc, type, r)) return (rman_adjust_resource(r, start, end)); return (bus_generic_adjust_resource(bus, child, type, r, start, end)); } int pcib_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { struct pcib_softc *sc; int error; sc = device_get_softc(dev); if (pcib_is_resource_managed(sc, type, r)) { if (rman_get_flags(r) & RF_ACTIVE) { error = bus_deactivate_resource(child, type, rid, r); if (error) return (error); } return (rman_release_resource(r)); } return (bus_generic_release_resource(dev, child, type, rid, r)); } #else /* * We have to trap resource allocation requests and ensure that the bridge * is set up to, or capable of handling them. */ struct resource * pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct pcib_softc *sc = device_get_softc(dev); const char *name, *suffix; int ok; /* * Fail the allocation for this range if it's not supported. */ name = device_get_nameunit(child); if (name == NULL) { name = ""; suffix = ""; } else suffix = " "; switch (type) { case SYS_RES_IOPORT: ok = 0; if (!pcib_is_io_open(sc)) break; ok = (start >= sc->iobase && end <= sc->iolimit); /* * Make sure we allow access to VGA I/O addresses when the * bridge has the "VGA Enable" bit set. */ if (!ok && pci_is_vga_ioport_range(start, end)) ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0; if ((sc->flags & PCIB_SUBTRACTIVE) == 0) { if (!ok) { if (start < sc->iobase) start = sc->iobase; if (end > sc->iolimit) end = sc->iolimit; if (start < end) ok = 1; } } else { ok = 1; #if 0 /* * If we overlap with the subtractive range, then * pick the upper range to use. */ if (start < sc->iolimit && end > sc->iobase) start = sc->iolimit + 1; #endif } if (end < start) { device_printf(dev, "ioport: end (%jx) < start (%jx)\n", end, start); start = 0; end = 0; ok = 0; } if (!ok) { device_printf(dev, "%s%srequested unsupported I/O " "range 0x%jx-0x%jx (decoding 0x%x-0x%x)\n", name, suffix, start, end, sc->iobase, sc->iolimit); return (NULL); } if (bootverbose) device_printf(dev, "%s%srequested I/O range 0x%jx-0x%jx: in range\n", name, suffix, start, end); break; case SYS_RES_MEMORY: ok = 0; if (pcib_is_nonprefetch_open(sc)) ok = ok || (start >= sc->membase && end <= sc->memlimit); if (pcib_is_prefetch_open(sc)) ok = ok || (start >= sc->pmembase && end <= sc->pmemlimit); /* * Make sure we allow access to VGA memory addresses when the * bridge has the "VGA Enable" bit set. */ if (!ok && pci_is_vga_memory_range(start, end)) ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0; if ((sc->flags & PCIB_SUBTRACTIVE) == 0) { if (!ok) { ok = 1; if (flags & RF_PREFETCHABLE) { if (pcib_is_prefetch_open(sc)) { if (start < sc->pmembase) start = sc->pmembase; if (end > sc->pmemlimit) end = sc->pmemlimit; } else { ok = 0; } } else { /* non-prefetchable */ if (pcib_is_nonprefetch_open(sc)) { if (start < sc->membase) start = sc->membase; if (end > sc->memlimit) end = sc->memlimit; } else { ok = 0; } } } } else if (!ok) { ok = 1; /* subtractive bridge: always ok */ #if 0 if (pcib_is_nonprefetch_open(sc)) { if (start < sc->memlimit && end > sc->membase) start = sc->memlimit + 1; } if (pcib_is_prefetch_open(sc)) { if (start < sc->pmemlimit && end > sc->pmembase) start = sc->pmemlimit + 1; } #endif } if (end < start) { device_printf(dev, "memory: end (%jx) < start (%jx)\n", end, start); start = 0; end = 0; ok = 0; } if (!ok && bootverbose) device_printf(dev, "%s%srequested unsupported memory range %#jx-%#jx " "(decoding %#jx-%#jx, %#jx-%#jx)\n", name, suffix, start, end, (uintmax_t)sc->membase, (uintmax_t)sc->memlimit, (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit); if (!ok) return (NULL); if (bootverbose) device_printf(dev,"%s%srequested memory range " "0x%jx-0x%jx: good\n", name, suffix, start, end); break; default: break; } /* * Bridge is OK decoding this resource, so pass it up. */ return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } #endif /* * If ARI is enabled on this downstream port, translate the function number * to the non-ARI slot/function. The downstream port will convert it back in * hardware. If ARI is not enabled slot and func are not modified. */ static __inline void pcib_xlate_ari(device_t pcib, int bus, int *slot, int *func) { struct pcib_softc *sc; int ari_func; sc = device_get_softc(pcib); ari_func = *func; if (sc->flags & PCIB_ENABLE_ARI) { KASSERT(*slot == 0, ("Non-zero slot number with ARI enabled!")); *slot = PCIE_ARI_SLOT(ari_func); *func = PCIE_ARI_FUNC(ari_func); } } static void pcib_enable_ari(struct pcib_softc *sc, uint32_t pcie_pos) { uint32_t ctl2; ctl2 = pci_read_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, 4); ctl2 |= PCIEM_CTL2_ARI; pci_write_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, ctl2, 4); sc->flags |= PCIB_ENABLE_ARI; } /* * PCIB interface. */ int pcib_maxslots(device_t dev) { return (PCI_SLOTMAX); } static int pcib_ari_maxslots(device_t dev) { struct pcib_softc *sc; sc = device_get_softc(dev); if (sc->flags & PCIB_ENABLE_ARI) return (PCIE_ARI_SLOTMAX); else return (PCI_SLOTMAX); } static int pcib_ari_maxfuncs(device_t dev) { struct pcib_softc *sc; sc = device_get_softc(dev); if (sc->flags & PCIB_ENABLE_ARI) return (PCIE_ARI_FUNCMAX); else return (PCI_FUNCMAX); } static void pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, int *func) { struct pcib_softc *sc; sc = device_get_softc(pcib); *bus = PCI_RID2BUS(rid); if (sc->flags & PCIB_ENABLE_ARI) { *slot = PCIE_ARI_RID2SLOT(rid); *func = PCIE_ARI_RID2FUNC(rid); } else { *slot = PCI_RID2SLOT(rid); *func = PCI_RID2FUNC(rid); } } /* * Since we are a child of a PCI bus, its parent must support the pcib interface. */ static uint32_t pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width) { #ifdef PCI_HP struct pcib_softc *sc; sc = device_get_softc(dev); if (!pcib_present(sc)) { switch (width) { case 2: return (0xffff); case 1: return (0xff); default: return (0xffffffff); } } #endif pcib_xlate_ari(dev, b, &s, &f); return(PCIB_READ_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, width)); } static void pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width) { #ifdef PCI_HP struct pcib_softc *sc; sc = device_get_softc(dev); if (!pcib_present(sc)) return; #endif pcib_xlate_ari(dev, b, &s, &f); PCIB_WRITE_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, val, width); } /* * Route an interrupt across a PCI bridge. */ int pcib_route_interrupt(device_t pcib, device_t dev, int pin) { device_t bus; int parent_intpin; int intnum; /* * * The PCI standard defines a swizzle of the child-side device/intpin to * the parent-side intpin as follows. * * device = device on child bus * child_intpin = intpin on child bus slot (0-3) * parent_intpin = intpin on parent bus slot (0-3) * * parent_intpin = (device + child_intpin) % 4 */ parent_intpin = (pci_get_slot(dev) + (pin - 1)) % 4; /* * Our parent is a PCI bus. Its parent must export the pcib interface * which includes the ability to route interrupts. */ bus = device_get_parent(pcib); intnum = PCIB_ROUTE_INTERRUPT(device_get_parent(bus), pcib, parent_intpin + 1); if (PCI_INTERRUPT_VALID(intnum) && bootverbose) { device_printf(pcib, "slot %d INT%c is routed to irq %d\n", pci_get_slot(dev), 'A' + pin - 1, intnum); } return(intnum); } /* Pass request to alloc MSI/MSI-X messages up to the parent bridge. */ int pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { struct pcib_softc *sc = device_get_softc(pcib); device_t bus; if (sc->flags & PCIB_DISABLE_MSI) return (ENXIO); bus = device_get_parent(pcib); return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount, irqs)); } /* Pass request to release MSI/MSI-X messages up to the parent bridge. */ int pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) { device_t bus; bus = device_get_parent(pcib); return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs)); } /* Pass request to alloc an MSI-X message up to the parent bridge. */ int pcib_alloc_msix(device_t pcib, device_t dev, int *irq) { struct pcib_softc *sc = device_get_softc(pcib); device_t bus; if (sc->flags & PCIB_DISABLE_MSIX) return (ENXIO); bus = device_get_parent(pcib); return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq)); } /* Pass request to release an MSI-X message up to the parent bridge. */ int pcib_release_msix(device_t pcib, device_t dev, int irq) { device_t bus; bus = device_get_parent(pcib); return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq)); } /* Pass request to map MSI/MSI-X message up to parent bridge. */ int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { device_t bus; int error; bus = device_get_parent(pcib); error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data); if (error) return (error); pci_ht_map_msi(pcib, *addr); return (0); } /* Pass request for device power state up to parent bridge. */ int pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate) { device_t bus; bus = device_get_parent(pcib); return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate)); } static int pcib_ari_enabled(device_t pcib) { struct pcib_softc *sc; sc = device_get_softc(pcib); return ((sc->flags & PCIB_ENABLE_ARI) != 0); } static int pcib_ari_get_id(device_t pcib, device_t dev, enum pci_id_type type, uintptr_t *id) { struct pcib_softc *sc; device_t bus_dev; uint8_t bus, slot, func; if (type != PCI_ID_RID) { bus_dev = device_get_parent(pcib); return (PCIB_GET_ID(device_get_parent(bus_dev), dev, type, id)); } sc = device_get_softc(pcib); if (sc->flags & PCIB_ENABLE_ARI) { bus = pci_get_bus(dev); func = pci_get_function(dev); *id = (PCI_ARI_RID(bus, func)); } else { bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); *id = (PCI_RID(bus, slot, func)); } return (0); } /* * Check that the downstream port (pcib) and the endpoint device (dev) both * support ARI. If so, enable it and return 0, otherwise return an error. */ static int pcib_try_enable_ari(device_t pcib, device_t dev) { struct pcib_softc *sc; int error; uint32_t cap2; int ari_cap_off; uint32_t ari_ver; uint32_t pcie_pos; sc = device_get_softc(pcib); /* * ARI is controlled in a register in the PCIe capability structure. * If the downstream port does not have the PCIe capability structure * then it does not support ARI. */ error = pci_find_cap(pcib, PCIY_EXPRESS, &pcie_pos); if (error != 0) return (ENODEV); /* Check that the PCIe port advertises ARI support. */ cap2 = pci_read_config(pcib, pcie_pos + PCIER_DEVICE_CAP2, 4); if (!(cap2 & PCIEM_CAP2_ARI)) return (ENODEV); /* * Check that the endpoint device advertises ARI support via the ARI * extended capability structure. */ error = pci_find_extcap(dev, PCIZ_ARI, &ari_cap_off); if (error != 0) return (ENODEV); /* * Finally, check that the endpoint device supports the same version * of ARI that we do. */ ari_ver = pci_read_config(dev, ari_cap_off, 4); if (PCI_EXTCAP_VER(ari_ver) != PCIB_SUPPORTED_ARI_VER) { if (bootverbose) device_printf(pcib, "Unsupported version of ARI (%d) detected\n", PCI_EXTCAP_VER(ari_ver)); return (ENXIO); } pcib_enable_ari(sc, pcie_pos); return (0); } Index: user/alc/PQ_LAUNDRY/sys/geom/geom_disk.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/geom/geom_disk.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/geom/geom_disk.c (revision 303642) @@ -1,1003 +1,1010 @@ /*- * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_geom.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct g_disk_softc { struct mtx done_mtx; struct disk *dp; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; char led[64]; uint32_t state; struct mtx start_mtx; }; static g_access_t g_disk_access; static g_start_t g_disk_start; static g_ioctl_t g_disk_ioctl; static g_dumpconf_t g_disk_dumpconf; static g_provgone_t g_disk_providergone; static struct g_class g_disk_class = { .name = G_DISK_CLASS_NAME, .version = G_VERSION, .start = g_disk_start, .access = g_disk_access, .ioctl = g_disk_ioctl, .providergone = g_disk_providergone, .dumpconf = g_disk_dumpconf, }; SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW, 0, "GEOM_DISK stuff"); DECLARE_GEOM_CLASS(g_disk_class, g_disk); static int g_disk_access(struct g_provider *pp, int r, int w, int e) { struct disk *dp; struct g_disk_softc *sc; int error; g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)", pp->name, r, w, e); g_topology_assert(); sc = pp->private; if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) { /* * Allow decreasing access count even if disk is not * available anymore. */ if (r <= 0 && w <= 0 && e <= 0) return (0); return (ENXIO); } r += pp->acr; w += pp->acw; e += pp->ace; error = 0; if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { if (dp->d_open != NULL) { error = dp->d_open(dp); if (bootverbose && error != 0) printf("Opened disk %s -> %d\n", pp->name, error); if (error != 0) return (error); } pp->sectorsize = dp->d_sectorsize; if (dp->d_maxsize == 0) { printf("WARNING: Disk drive %s%d has no d_maxsize\n", dp->d_name, dp->d_unit); dp->d_maxsize = DFLTPHYS; } if (dp->d_delmaxsize == 0) { if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) { printf("WARNING: Disk drive %s%d has no " "d_delmaxsize\n", dp->d_name, dp->d_unit); } dp->d_delmaxsize = dp->d_maxsize; } pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; dp->d_flags |= DISKFLAG_OPEN; - g_resize_provider(pp, dp->d_mediasize); + /* + * Do not invoke resize event when initial size was zero. + * Some disks report its size only after first opening. + */ + if (pp->mediasize == 0) + pp->mediasize = dp->d_mediasize; + else + g_resize_provider(pp, dp->d_mediasize); } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { if (dp->d_close != NULL) { error = dp->d_close(dp); if (error != 0) printf("Closed disk %s -> %d\n", pp->name, error); } sc->state = G_STATE_ACTIVE; if (sc->led[0] != 0) led_set(sc->led, "0"); dp->d_flags &= ~DISKFLAG_OPEN; } return (error); } static void g_disk_kerneldump(struct bio *bp, struct disk *dp) { struct g_kerneldump *gkd; struct g_geom *gp; gkd = (struct g_kerneldump*)bp->bio_data; gp = bp->bio_to->geom; g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)", gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length); if (dp->d_dump == NULL) { g_io_deliver(bp, ENODEV); return; } gkd->di.dumper = dp->d_dump; gkd->di.priv = dp; gkd->di.blocksize = dp->d_sectorsize; gkd->di.maxiosize = dp->d_maxsize; gkd->di.mediaoffset = gkd->offset; if ((gkd->offset + gkd->length) > dp->d_mediasize) gkd->length = dp->d_mediasize - gkd->offset; gkd->di.mediasize = gkd->length; g_io_deliver(bp, 0); } static void g_disk_setstate(struct bio *bp, struct g_disk_softc *sc) { const char *cmd; memcpy(&sc->state, bp->bio_data, sizeof(sc->state)); if (sc->led[0] != 0) { switch (sc->state) { case G_STATE_FAILED: cmd = "1"; break; case G_STATE_REBUILD: cmd = "f5"; break; case G_STATE_RESYNC: cmd = "f1"; break; default: cmd = "0"; break; } led_set(sc->led, cmd); } g_io_deliver(bp, 0); } static void g_disk_done(struct bio *bp) { struct bintime now; struct bio *bp2; struct g_disk_softc *sc; /* See "notes" for why we need a mutex here */ /* XXX: will witness accept a mix of Giant/unGiant drivers here ? */ bp2 = bp->bio_parent; sc = bp2->bio_to->private; bp->bio_completed = bp->bio_length - bp->bio_resid; binuptime(&now); mtx_lock(&sc->done_mtx); if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_completed; switch (bp->bio_cmd) { case BIO_ZONE: bcopy(&bp->bio_zone, &bp2->bio_zone, sizeof(bp->bio_zone)); /*FALLTHROUGH*/ case BIO_READ: case BIO_WRITE: case BIO_DELETE: case BIO_FLUSH: devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now); break; default: break; } bp2->bio_inbed++; if (bp2->bio_children == bp2->bio_inbed) { mtx_unlock(&sc->done_mtx); bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed; g_io_deliver(bp2, bp2->bio_error); } else mtx_unlock(&sc->done_mtx); g_destroy_bio(bp); } static int g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td) { struct disk *dp; struct g_disk_softc *sc; int error; sc = pp->private; dp = sc->dp; if (dp->d_ioctl == NULL) return (ENOIOCTL); error = dp->d_ioctl(dp, cmd, data, fflag, td); return (error); } static off_t g_disk_maxsize(struct disk *dp, struct bio *bp) { if (bp->bio_cmd == BIO_DELETE) return (dp->d_delmaxsize); return (dp->d_maxsize); } static int g_disk_maxsegs(struct disk *dp, struct bio *bp) { return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1); } static void g_disk_advance(struct disk *dp, struct bio *bp, off_t off) { bp->bio_offset += off; bp->bio_length -= off; if ((bp->bio_flags & BIO_VLIST) != 0) { bus_dma_segment_t *seg, *end; seg = (bus_dma_segment_t *)bp->bio_data; end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n; off += bp->bio_ma_offset; while (off >= seg->ds_len) { KASSERT((seg != end), ("vlist request runs off the end")); off -= seg->ds_len; seg++; } bp->bio_ma_offset = off; bp->bio_ma_n = end - seg; bp->bio_data = (void *)seg; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma += off / PAGE_SIZE; bp->bio_ma_offset += off; bp->bio_ma_offset %= PAGE_SIZE; bp->bio_ma_n -= off / PAGE_SIZE; } else { bp->bio_data += off; } } static void g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset, off_t *plength, int *ppages) { uintptr_t seg_page_base; uintptr_t seg_page_end; off_t offset; off_t length; int seg_pages; offset = *poffset; length = *plength; if (length > seg->ds_len - offset) length = seg->ds_len - offset; seg_page_base = trunc_page(seg->ds_addr + offset); seg_page_end = round_page(seg->ds_addr + offset + length); seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT; if (seg_pages > *ppages) { seg_pages = *ppages; length = (seg_page_base + (seg_pages << PAGE_SHIFT)) - (seg->ds_addr + offset); } *poffset = 0; *plength -= length; *ppages -= seg_pages; } static off_t g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg) { bus_dma_segment_t *seg, *end; off_t residual; off_t offset; int pages; seg = (bus_dma_segment_t *)bp->bio_data; end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n; residual = bp->bio_length; offset = bp->bio_ma_offset; pages = g_disk_maxsegs(dp, bp); while (residual != 0 && pages != 0) { KASSERT((seg != end), ("vlist limit runs off the end")); g_disk_seg_limit(seg, &offset, &residual, &pages); seg++; } if (pendseg != NULL) *pendseg = seg; return (residual); } static bool g_disk_limit(struct disk *dp, struct bio *bp) { bool limited = false; off_t maxsz; maxsz = g_disk_maxsize(dp, bp); /* * XXX: If we have a stripesize we should really use it here. * Care should be taken in the delete case if this is done * as deletes can be very sensitive to size given how they * are processed. */ if (bp->bio_length > maxsz) { bp->bio_length = maxsz; limited = true; } if ((bp->bio_flags & BIO_VLIST) != 0) { bus_dma_segment_t *firstseg, *endseg; off_t residual; firstseg = (bus_dma_segment_t*)bp->bio_data; residual = g_disk_vlist_limit(dp, bp, &endseg); if (residual != 0) { bp->bio_ma_n = endseg - firstseg; bp->bio_length -= residual; limited = true; } } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma_n = howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE); } return (limited); } static void g_disk_start(struct bio *bp) { struct bio *bp2, *bp3; struct disk *dp; struct g_disk_softc *sc; int error; off_t off; sc = bp->bio_to->private; if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) { g_io_deliver(bp, ENXIO); return; } error = EJUSTRETURN; switch(bp->bio_cmd) { case BIO_DELETE: if (!(dp->d_flags & DISKFLAG_CANDELETE)) { error = EOPNOTSUPP; break; } /* fall-through */ case BIO_READ: case BIO_WRITE: KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 || (bp->bio_flags & BIO_UNMAPPED) == 0, ("unmapped bio not supported by disk %s", dp->d_name)); off = 0; bp3 = NULL; bp2 = g_clone_bio(bp); if (bp2 == NULL) { error = ENOMEM; break; } for (;;) { if (g_disk_limit(dp, bp2)) { off += bp2->bio_length; /* * To avoid a race, we need to grab the next bio * before we schedule this one. See "notes". */ bp3 = g_clone_bio(bp); if (bp3 == NULL) bp->bio_error = ENOMEM; } bp2->bio_done = g_disk_done; bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize; bp2->bio_bcount = bp2->bio_length; bp2->bio_disk = dp; mtx_lock(&sc->start_mtx); devstat_start_transaction_bio(dp->d_devstat, bp2); mtx_unlock(&sc->start_mtx); dp->d_strategy(bp2); if (bp3 == NULL) break; bp2 = bp3; bp3 = NULL; g_disk_advance(dp, bp2, off); } break; case BIO_GETATTR: /* Give the driver a chance to override */ if (dp->d_getattr != NULL) { if (bp->bio_disk == NULL) bp->bio_disk = dp; error = dp->d_getattr(bp); if (error != -1) break; error = EJUSTRETURN; } if (g_handleattr_int(bp, "GEOM::candelete", (dp->d_flags & DISKFLAG_CANDELETE) != 0)) break; else if (g_handleattr_int(bp, "GEOM::fwsectors", dp->d_fwsectors)) break; else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads)) break; else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0)) break; else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor", dp->d_hba_vendor)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_device", dp->d_hba_device)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor", dp->d_hba_subvendor)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice", dp->d_hba_subdevice)) break; else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump")) g_disk_kerneldump(bp, dp); else if (!strcmp(bp->bio_attribute, "GEOM::setstate")) g_disk_setstate(bp, sc); else if (g_handleattr_uint16_t(bp, "GEOM::rotation_rate", dp->d_rotation_rate)) break; else error = ENOIOCTL; break; case BIO_FLUSH: g_trace(G_T_BIO, "g_disk_flushcache(%s)", bp->bio_to->name); if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) { error = EOPNOTSUPP; break; } /*FALLTHROUGH*/ case BIO_ZONE: if (bp->bio_cmd == BIO_ZONE) { if (!(dp->d_flags & DISKFLAG_CANZONE)) { error = EOPNOTSUPP; break; } g_trace(G_T_BIO, "g_disk_zone(%s)", bp->bio_to->name); } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_disk_done; bp2->bio_disk = dp; mtx_lock(&sc->start_mtx); devstat_start_transaction_bio(dp->d_devstat, bp2); mtx_unlock(&sc->start_mtx); dp->d_strategy(bp2); break; default: error = EOPNOTSUPP; break; } if (error != EJUSTRETURN) g_io_deliver(bp, error); return; } static void g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct bio *bp; struct disk *dp; struct g_disk_softc *sc; char *buf; int res = 0; sc = gp->softc; if (sc == NULL || (dp = sc->dp) == NULL) return; if (indent == NULL) { sbuf_printf(sb, " hd %u", dp->d_fwheads); sbuf_printf(sb, " sc %u", dp->d_fwsectors); return; } if (pp != NULL) { sbuf_printf(sb, "%s%u\n", indent, dp->d_fwheads); sbuf_printf(sb, "%s%u\n", indent, dp->d_fwsectors); /* * "rotationrate" is a little complicated, because the value * returned by the drive might not be the RPM; 0 and 1 are * special cases, and there's also a valid range. */ sbuf_printf(sb, "%s", indent); if (dp->d_rotation_rate == 0) /* Old drives don't */ sbuf_printf(sb, "unknown"); /* report RPM. */ else if (dp->d_rotation_rate == 1) /* Since 0 is used */ sbuf_printf(sb, "0"); /* above, SSDs use 1. */ else if ((dp->d_rotation_rate >= 0x041) && (dp->d_rotation_rate <= 0xfffe)) sbuf_printf(sb, "%u", dp->d_rotation_rate); else sbuf_printf(sb, "invalid"); sbuf_printf(sb, "\n"); if (dp->d_getattr != NULL) { buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK); bp = g_alloc_bio(); bp->bio_disk = dp; bp->bio_attribute = "GEOM::ident"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; res = dp->d_getattr(bp); sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", res == 0 ? buf: dp->d_ident); sbuf_printf(sb, "\n"); bp->bio_attribute = "GEOM::lunid"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; if (dp->d_getattr(bp) == 0) { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", buf); sbuf_printf(sb, "\n"); } bp->bio_attribute = "GEOM::lunname"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; if (dp->d_getattr(bp) == 0) { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", buf); sbuf_printf(sb, "\n"); } g_destroy_bio(bp); g_free(buf); } else { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", dp->d_ident); sbuf_printf(sb, "\n"); } sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", dp->d_descr); sbuf_printf(sb, "\n"); } } static void g_disk_resize(void *ptr, int flag) { struct disk *dp; struct g_geom *gp; struct g_provider *pp; if (flag == EV_CANCEL) return; g_topology_assert(); dp = ptr; gp = dp->d_geom; if (dp->d_destroyed || gp == NULL) return; LIST_FOREACH(pp, &gp->provider, provider) { if (pp->sectorsize != 0 && pp->sectorsize != dp->d_sectorsize) g_wither_provider(pp, ENXIO); else g_resize_provider(pp, dp->d_mediasize); } } static void g_disk_create(void *arg, int flag) { struct g_geom *gp; struct g_provider *pp; struct disk *dp; struct g_disk_softc *sc; char tmpstr[80]; if (flag == EV_CANCEL) return; g_topology_assert(); dp = arg; mtx_pool_lock(mtxpool_sleep, dp); dp->d_init_level = DISK_INIT_START; /* * If the disk has already gone away, we can just stop here and * call the user's callback to tell him we've cleaned things up. */ if (dp->d_goneflag != 0) { mtx_pool_unlock(mtxpool_sleep, dp); if (dp->d_gone != NULL) dp->d_gone(dp); return; } mtx_pool_unlock(mtxpool_sleep, dp); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF); mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF); sc->dp = dp; gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit); gp->softc = sc; pp = g_new_providerf(gp, "%s", gp->name); devstat_remove_entry(pp->stat); pp->stat = NULL; dp->d_devstat->id = pp; pp->mediasize = dp->d_mediasize; pp->sectorsize = dp->d_sectorsize; pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0) pp->flags |= G_PF_ACCEPT_UNMAPPED; if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0) pp->flags |= G_PF_DIRECT_SEND; pp->flags |= G_PF_DIRECT_RECEIVE; if (bootverbose) printf("GEOM: new disk %s\n", gp->name); sysctl_ctx_init(&sc->sysctl_ctx); snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name, CTLFLAG_RD, 0, tmpstr); if (sc->sysctl_tree != NULL) { SYSCTL_ADD_STRING(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led", CTLFLAG_RWTUN, sc->led, sizeof(sc->led), "LED name"); } pp->private = sc; dp->d_geom = gp; g_error_provider(pp, 0); mtx_pool_lock(mtxpool_sleep, dp); dp->d_init_level = DISK_INIT_DONE; /* * If the disk has gone away at this stage, start the withering * process for it. */ if (dp->d_goneflag != 0) { mtx_pool_unlock(mtxpool_sleep, dp); g_wither_provider(pp, ENXIO); return; } mtx_pool_unlock(mtxpool_sleep, dp); } /* * We get this callback after all of the consumers have gone away, and just * before the provider is freed. If the disk driver provided a d_gone * callback, let them know that it is okay to free resources -- they won't * be getting any more accesses from GEOM. */ static void g_disk_providergone(struct g_provider *pp) { struct disk *dp; struct g_disk_softc *sc; sc = (struct g_disk_softc *)pp->private; dp = sc->dp; if (dp != NULL && dp->d_gone != NULL) dp->d_gone(dp); if (sc->sysctl_tree != NULL) { sysctl_ctx_free(&sc->sysctl_ctx); sc->sysctl_tree = NULL; } if (sc->led[0] != 0) { led_set(sc->led, "0"); sc->led[0] = 0; } pp->private = NULL; pp->geom->softc = NULL; mtx_destroy(&sc->done_mtx); mtx_destroy(&sc->start_mtx); g_free(sc); } static void g_disk_destroy(void *ptr, int flag) { struct disk *dp; struct g_geom *gp; struct g_disk_softc *sc; g_topology_assert(); dp = ptr; gp = dp->d_geom; if (gp != NULL) { sc = gp->softc; if (sc != NULL) sc->dp = NULL; dp->d_geom = NULL; g_wither_geom(gp, ENXIO); } g_free(dp); } /* * We only allow printable characters in disk ident, * the rest is converted to 'x'. */ static void g_disk_ident_adjust(char *ident, size_t size) { char *p, tmp[4], newid[DISK_IDENT_SIZE]; newid[0] = '\0'; for (p = ident; *p != '\0'; p++) { if (isprint(*p)) { tmp[0] = *p; tmp[1] = '\0'; } else { snprintf(tmp, sizeof(tmp), "x%02hhx", *(unsigned char *)p); } if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid)) break; } bzero(ident, size); strlcpy(ident, newid, size); } struct disk * disk_alloc(void) { return (g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO)); } void disk_create(struct disk *dp, int version) { if (version != DISK_VERSION) { printf("WARNING: Attempt to add disk %s%d %s", dp->d_name, dp->d_unit, " using incompatible ABI version of disk(9)\n"); printf("WARNING: Ignoring disk %s%d\n", dp->d_name, dp->d_unit); return; } if (dp->d_flags & DISKFLAG_RESERVED) { printf("WARNING: Attempt to add non-MPSAFE disk %s%d\n", dp->d_name, dp->d_unit); printf("WARNING: Ignoring disk %s%d\n", dp->d_name, dp->d_unit); return; } KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy")); KASSERT(dp->d_name != NULL, ("disk_create need d_name")); KASSERT(*dp->d_name != 0, ("disk_create need d_name")); KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long")); if (dp->d_devstat == NULL) dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit, dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); dp->d_geom = NULL; dp->d_init_level = DISK_INIT_NONE; g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident)); g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL); } void disk_destroy(struct disk *dp) { g_cancel_event(dp); dp->d_destroyed = 1; if (dp->d_devstat != NULL) devstat_remove_entry(dp->d_devstat); g_post_event(g_disk_destroy, dp, M_WAITOK, NULL); } void disk_gone(struct disk *dp) { struct g_geom *gp; struct g_provider *pp; mtx_pool_lock(mtxpool_sleep, dp); dp->d_goneflag = 1; /* * If we're still in the process of creating this disk (the * g_disk_create() function is still queued, or is in * progress), the init level will not yet be DISK_INIT_DONE. * * If that is the case, g_disk_create() will see d_goneflag * and take care of cleaning things up. * * If the disk has already been created, we default to * withering the provider as usual below. * * If the caller has not set a d_gone() callback, he will * not be any worse off by returning here, because the geom * has not been fully setup in any case. */ if (dp->d_init_level < DISK_INIT_DONE) { mtx_pool_unlock(mtxpool_sleep, dp); return; } mtx_pool_unlock(mtxpool_sleep, dp); gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_wither_provider(pp, ENXIO); } } } void disk_attr_changed(struct disk *dp, const char *attr, int flag) { struct g_geom *gp; struct g_provider *pp; char devnamebuf[128]; gp = dp->d_geom; if (gp != NULL) LIST_FOREACH(pp, &gp->provider, provider) (void)g_attr_changed(pp, attr, flag); snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name, dp->d_unit); devctl_notify("GEOM", "disk", attr, devnamebuf); } void disk_media_changed(struct disk *dp, int flag) { struct g_geom *gp; struct g_provider *pp; gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_media_changed(pp, flag); } } } void disk_media_gone(struct disk *dp, int flag) { struct g_geom *gp; struct g_provider *pp; gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_media_gone(pp, flag); } } } int disk_resize(struct disk *dp, int flag) { if (dp->d_destroyed || dp->d_geom == NULL) return (0); return (g_post_event(g_disk_resize, dp, flag, NULL)); } static void g_kern_disks(void *p, int flag __unused) { struct sbuf *sb; struct g_geom *gp; char *sp; sb = p; sp = ""; g_topology_assert(); LIST_FOREACH(gp, &g_disk_class.geom, geom) { sbuf_printf(sb, "%s%s", sp, gp->name); sp = " "; } sbuf_finish(sb); } static int sysctl_disks(SYSCTL_HANDLER_ARGS) { int error; struct sbuf *sb; sb = sbuf_new_auto(); g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); return error; } SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_disks, "A", "names of available disks"); Index: user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c (revision 303642) @@ -1,1047 +1,1047 @@ /*- * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ */ /* * Machine independent bits of mutex implementation. */ #include __FBSDID("$FreeBSD$"); #include "opt_adaptive_mutexes.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) #define ADAPTIVE_MUTEXES #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , lock, failed); #endif /* * Return the mutex address when the lock cookie address is provided. * This functionality assumes that struct mtx* have a member named mtx_lock. */ #define mtxlock2mtx(c) (__containerof(c, struct mtx, mtx_lock)) /* * Internal utility macros. */ #define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) #define mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED) #define mtx_owner(m) ((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK)) static void assert_mtx(const struct lock_object *lock, int what); #ifdef DDB static void db_show_mtx(const struct lock_object *lock); #endif static void lock_mtx(struct lock_object *lock, uintptr_t how); static void lock_spin(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_mtx(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_mtx(struct lock_object *lock); static uintptr_t unlock_spin(struct lock_object *lock); /* * Lock classes for sleep and spin mutexes. */ struct lock_class lock_class_mtx_sleep = { .lc_name = "sleep mutex", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_mtx, .lc_unlock = unlock_mtx, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; struct lock_class lock_class_mtx_spin = { .lc_name = "spin mutex", .lc_flags = LC_SPINLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_spin, .lc_unlock = unlock_spin, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; /* * System-wide mutexes */ struct mtx blocked_lock; struct mtx Giant; void assert_mtx(const struct lock_object *lock, int what) { mtx_assert((const struct mtx *)lock, what); } void lock_mtx(struct lock_object *lock, uintptr_t how) { mtx_lock((struct mtx *)lock); } void lock_spin(struct lock_object *lock, uintptr_t how) { panic("spin locks can only use msleep_spin"); } uintptr_t unlock_mtx(struct lock_object *lock) { struct mtx *m; m = (struct mtx *)lock; mtx_assert(m, MA_OWNED | MA_NOTRECURSED); mtx_unlock(m); return (0); } uintptr_t unlock_spin(struct lock_object *lock) { panic("spin locks can only use msleep_spin"); } #ifdef KDTRACE_HOOKS int owner_mtx(const struct lock_object *lock, struct thread **owner) { const struct mtx *m = (const struct mtx *)lock; *owner = mtx_owner(m); return (mtx_unowned(m) == 0); } #endif /* * Function versions of the inlined __mtx_* macros. These are used by * modules and can also be called from assembly language if needed. */ void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d", curthread, m->lock_object.lo_name, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __mtx_lock(m, curthread, opts, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, file, line); TD_LOCKS_INC(curthread); } void __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); __mtx_unlock(m, curthread, opts, file, line); TD_LOCKS_DEC(curthread); } void __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_lock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); opts &= ~MTX_RECURSE; WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __mtx_lock_spin(m, curthread, opts, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } int __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return (1); m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_trylock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); KASSERT((opts & MTX_RECURSE) == 0, ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); if (__mtx_trylock_spin(m, curthread, opts, file, line)) { LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); return (1); } LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line); return (0); } void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); __mtx_unlock_spin(m); } /* * The important part of mtx_trylock{,_flags}() * Tries to acquire lock `m.' If this function is called on a mutex that * is already owned, it will recursively acquire the lock. */ int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int rval; if (SCHEDULER_STOPPED()) return (1); m = mtxlock2mtx(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d", curthread, m->lock_object.lo_name, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_trylock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0)) { m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); rval = 1; } else rval = _mtx_obtain_lock(m, (uintptr_t)curthread); opts &= ~MTX_RECURSE; LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line); if (rval) { WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); TD_LOCKS_INC(curthread); if (m->mtx_recurse == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, waittime, file, line); } return (rval); } /* * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. * * We call this if the lock is either contested (i.e. we need to go to * sleep waiting for it), or if we need to recurse on it. */ void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; uintptr_t v; #ifdef ADAPTIVE_MUTEXES volatile struct thread *owner; #endif #ifdef KTR int cont_logged = 0; #endif #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int spin_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); if (mtx_owned(m)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); opts &= ~MTX_RECURSE; m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); return; } opts &= ~MTX_RECURSE; #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR4(KTR_LOCK, "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", m->lock_object.lo_name, (void *)m->mtx_lock, file, line); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&m->lock_object); #endif for (;;) { if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid)) break; #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef ADAPTIVE_MUTEXES /* * If the owner is running on another CPU, spin until the * owner stops running or the state of the lock changes. */ v = m->mtx_lock; if (v != MTX_UNOWNED) { owner = (struct thread *)(v & ~MTX_FLAGMASK); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&m->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, m, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "spinning", "lockname:\"%s\"", m->lock_object.lo_name); while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); continue; } } #endif ts = turnstile_trywait(&m->lock_object); v = m->mtx_lock; /* * Check if the lock has been released while spinning for * the turnstile chain lock. */ if (v == MTX_UNOWNED) { turnstile_cancel(ts); continue; } #ifdef ADAPTIVE_MUTEXES /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ owner = (struct thread *)(v & ~MTX_FLAGMASK); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } #endif /* * If the mutex isn't already contested and a failure occurs * setting the contested bit, the mutex was either released * or the state of the MTX_RECURSED bit changed. */ if ((v & MTX_CONTESTED) == 0 && !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) { turnstile_cancel(ts); continue; } /* * We definitely must sleep for this lock. */ mtx_assert(m, MA_NOTOWNED); #ifdef KTR if (!cont_logged) { CTR6(KTR_CONTENTION, "contention: %p at %s:%d wants %s, taken by %s:%d", (void *)tid, file, line, m->lock_object.lo_name, WITNESS_FILE(&m->lock_object), WITNESS_LINE(&m->lock_object)); cont_logged = 1; } #endif /* * Block on the turnstile. */ #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&m->lock_object); #endif turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&m->lock_object); sleep_cnt++; #endif } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&m->lock_object); #endif #ifdef KTR if (cont_logged) { CTR4(KTR_CONTENTION, "contention end: %s acquired by %p at %s:%d", m->lock_object.lo_name, (void *)tid, file, line); } #endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(adaptive__block, m, sleep_time); /* * Only record the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time); #endif } static void _mtx_lock_spin_failed(struct mtx *m) { struct thread *td; td = mtx_owner(m); /* If the mutex is unlocked, try again. */ if (td == NULL) return; printf( "spin lock %p (%s) held by %p (tid %d) too long\n", m, m->lock_object.lo_name, td, td->td_tid); #ifdef WITNESS witness_display_spinlock(&m->lock_object, td, printf); #endif panic("spin lock held too long"); } #ifdef SMP /* * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock. * * This is only called if we need to actually spin for the lock. Recursion * is handled inline. */ void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, const char *file, int line) { struct mtx *m; int i = 0; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS int64_t spin_time = 0; #endif if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "spinning", "lockname:\"%s\"", m->lock_object.lo_name); #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); #ifdef KDTRACE_HOOKS spin_time -= lockstat_nsecs(&m->lock_object); #endif for (;;) { if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid)) break; /* Give interrupts a chance while we spin. */ spinlock_exit(); while (m->mtx_lock != MTX_UNOWNED) { if (i++ < 10000000) { cpu_spinwait(); continue; } if (i < 60000000 || kdb_active || panicstr != NULL) DELAY(1); else _mtx_lock_spin_failed(m); cpu_spinwait(); } spinlock_enter(); } #ifdef KDTRACE_HOOKS spin_time += lockstat_nsecs(&m->lock_object); #endif if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); #ifdef KDTRACE_HOOKS LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m, contested, waittime, file, line); if (spin_time != 0) LOCKSTAT_RECORD1(spin__spin, m, spin_time); #endif } #endif /* SMP */ void thread_lock_flags_(struct thread *td, int opts, const char *file, int line) { struct mtx *m; uintptr_t tid; int i; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS int64_t spin_time = 0; #endif i = 0; tid = (uintptr_t)curthread; if (SCHEDULER_STOPPED()) { /* * Ensure that spinlock sections are balanced even when the * scheduler is stopped, since we may otherwise inadvertently * re-enable interrupts while dumping core. */ spinlock_enter(); return; } #ifdef KDTRACE_HOOKS spin_time -= lockstat_nsecs(&td->td_lock->lock_object); #endif for (;;) { retry: spinlock_enter(); m = td->td_lock; KASSERT(m->mtx_lock != MTX_DESTROYED, ("thread_lock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("thread_lock() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); for (;;) { if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid)) break; if (m->mtx_lock == tid) { m->mtx_recurse++; break; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); /* Give interrupts a chance while we spin. */ spinlock_exit(); while (m->mtx_lock != MTX_UNOWNED) { if (i++ < 10000000) cpu_spinwait(); else if (i < 60000000 || kdb_active || panicstr != NULL) DELAY(1); else _mtx_lock_spin_failed(m); cpu_spinwait(); if (m != td->td_lock) goto retry; } spinlock_enter(); } if (m == td->td_lock) break; __mtx_unlock_spin(m); /* does spinlock_exit() */ } #ifdef KDTRACE_HOOKS spin_time += lockstat_nsecs(&m->lock_object); #endif if (m->mtx_recurse == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m, contested, waittime, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); #ifdef KDTRACE_HOOKS if (spin_time != 0) LOCKSTAT_RECORD1(thread__spin, m, spin_time); #endif } struct mtx * thread_lock_block(struct thread *td) { struct mtx *lock; THREAD_LOCK_ASSERT(td, MA_OWNED); lock = td->td_lock; td->td_lock = &blocked_lock; mtx_unlock_spin(lock); return (lock); } void thread_lock_unblock(struct thread *td, struct mtx *new) { mtx_assert(new, MA_OWNED); MPASS(td->td_lock == &blocked_lock); atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new); } void thread_lock_set(struct thread *td, struct mtx *new) { struct mtx *lock; mtx_assert(new, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); lock = td->td_lock; td->td_lock = new; mtx_unlock_spin(lock); } /* * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. * * We are only called here if the lock is recursed or contested (i.e. we * need to wake up a blocked thread). */ void __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); if (mtx_recursed(m)) { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); return; } /* * We have to lock the chain before the turnstile so this turnstile * can be removed from the hash list if it is empty. */ turnstile_chain_lock(&m->lock_object); ts = turnstile_lookup(&m->lock_object); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); MPASS(ts != NULL); turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); _mtx_release_lock_quick(m); /* * This turnstile is now no longer associated with the mutex. We can * unlock the chain lock so a new turnstile may take it's place. */ turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); turnstile_chain_unlock(&m->lock_object); } /* * All the unlocking of MTX_SPIN locks is done inline. * See the __mtx_unlock_spin() macro for the details. */ /* * The backing function for the INVARIANTS-enabled mtx_assert() */ #ifdef INVARIANT_SUPPORT void __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct mtx *m; if (panicstr != NULL || dumping) return; m = mtxlock2mtx(c); switch (what) { case MA_OWNED: case MA_OWNED | MA_RECURSED: case MA_OWNED | MA_NOTRECURSED: if (!mtx_owned(m)) panic("mutex %s not owned at %s:%d", m->lock_object.lo_name, file, line); if (mtx_recursed(m)) { if ((what & MA_NOTRECURSED) != 0) panic("mutex %s recursed at %s:%d", m->lock_object.lo_name, file, line); } else if ((what & MA_RECURSED) != 0) { panic("mutex %s unrecursed at %s:%d", m->lock_object.lo_name, file, line); } break; case MA_NOTOWNED: if (mtx_owned(m)) panic("mutex %s owned at %s:%d", m->lock_object.lo_name, file, line); break; default: panic("unknown mtx_assert at %s:%d", file, line); } } #endif /* * General init routine used by the MTX_SYSINIT() macro. */ void mtx_sysinit(void *arg) { struct mtx_args *margs = arg; mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); } /* * Mutex initialization routine; initialize lock `m' of type contained in * `opts' with options contained in `opts' and name `name.' The optional * lock type `type' is used as a general lock category name for use with * witness. */ void _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts) { struct mtx *m; struct lock_class *class; int flags; m = mtxlock2mtx(c); MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock, ("%s: mtx_lock not aligned for %s: %p", __func__, name, &m->mtx_lock)); /* Determine lock class and lock flags. */ if (opts & MTX_SPIN) class = &lock_class_mtx_spin; else class = &lock_class_mtx_sleep; flags = 0; if (opts & MTX_QUIET) flags |= LO_QUIET; if (opts & MTX_RECURSE) flags |= LO_RECURSABLE; if ((opts & MTX_NOWITNESS) == 0) flags |= LO_WITNESS; if (opts & MTX_DUPOK) flags |= LO_DUPOK; if (opts & MTX_NOPROFILE) flags |= LO_NOPROFILE; if (opts & MTX_NEW) flags |= LO_NEW; /* Initialize mutex. */ lock_init(&m->lock_object, class, name, type, flags); m->mtx_lock = MTX_UNOWNED; m->mtx_recurse = 0; } /* * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be * passed in as a flag here because if the corresponding mtx_init() was * called with MTX_QUIET set, then it will already be set in the mutex's * flags. */ void _mtx_destroy(volatile uintptr_t *c) { struct mtx *m; m = mtxlock2mtx(c); if (!mtx_owned(m)) MPASS(mtx_unowned(m)); else { MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); /* Perform the non-mtx related part of mtx_unlock_spin(). */ if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin) spinlock_exit(); else TD_LOCKS_DEC(curthread); lock_profile_release_lock(&m->lock_object); /* Tell witness this isn't locked to make it happy. */ WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__, __LINE__); } m->mtx_lock = MTX_DESTROYED; lock_destroy(&m->lock_object); } /* * Intialize the mutex code and system mutexes. This is called from the MD * startup code prior to mi_startup(). The per-CPU data space needs to be * setup before this is called. */ void mutex_init(void) { /* Setup turnstiles so that sleep mutexes work. */ init_turnstiles(); /* * Initialize mutexes. */ mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN); blocked_lock.mtx_lock = 0xdeadc0de; /* Always blocked. */ mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN); mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN); mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN); mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN); mtx_init(&devmtx, "cdev", NULL, MTX_DEF); mtx_lock(&Giant); } #ifdef DDB void db_show_mtx(const struct lock_object *lock) { struct thread *td; const struct mtx *m; m = (const struct mtx *)lock; db_printf(" flags: {"); if (LOCK_CLASS(lock) == &lock_class_mtx_spin) db_printf("SPIN"); else db_printf("DEF"); if (m->lock_object.lo_flags & LO_RECURSABLE) db_printf(", RECURSE"); if (m->lock_object.lo_flags & LO_DUPOK) db_printf(", DUPOK"); db_printf("}\n"); db_printf(" state: {"); if (mtx_unowned(m)) db_printf("UNOWNED"); else if (mtx_destroyed(m)) db_printf("DESTROYED"); else { db_printf("OWNED"); if (m->mtx_lock & MTX_CONTESTED) db_printf(", CONTESTED"); if (m->mtx_lock & MTX_RECURSED) db_printf(", RECURSED"); } db_printf("}\n"); if (!mtx_unowned(m) && !mtx_destroyed(m)) { td = mtx_owner(m); db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (mtx_recursed(m)) db_printf(" recursed: %d\n", m->mtx_recurse); } } #endif Index: user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c (revision 303642) @@ -1,1277 +1,1277 @@ /*- * Copyright (c) 2006 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Machine independent bits of reader/writer lock implementation. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_no_adaptive_rwlocks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) #define ADAPTIVE_RWLOCKS #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif /* * Return the rwlock address when the lock cookie address is provided. * This functionality assumes that struct rwlock* have a member named rw_lock. */ #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) #ifdef ADAPTIVE_RWLOCKS static int rowner_retries = 10; static int rowner_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging"); SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); #endif #ifdef DDB #include static void db_show_rwlock(const struct lock_object *lock); #endif static void assert_rw(const struct lock_object *lock, int what); static void lock_rw(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_rw(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_rw(struct lock_object *lock); struct lock_class lock_class_rw = { .lc_name = "rw", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, .lc_assert = assert_rw, #ifdef DDB .lc_ddb_show = db_show_rwlock, #endif .lc_lock = lock_rw, .lc_unlock = unlock_rw, #ifdef KDTRACE_HOOKS .lc_owner = owner_rw, #endif }; /* * Return a pointer to the owning thread if the lock is write-locked or * NULL if the lock is unlocked or read-locked. */ #define rw_wowner(rw) \ ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ (struct thread *)RW_OWNER((rw)->rw_lock)) /* * Returns if a write owner is recursed. Write ownership is not assured * here and should be previously checked. */ #define rw_recursed(rw) ((rw)->rw_recurse != 0) /* * Return true if curthread helds the lock. */ #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) /* * Return a pointer to the owning thread for this lock who should receive * any priority lent by threads that block on this lock. Currently this * is identical to rw_wowner(). */ #define rw_owner(rw) rw_wowner(rw) #ifndef INVARIANTS #define __rw_assert(c, what, file, line) #endif void assert_rw(const struct lock_object *lock, int what) { rw_assert((const struct rwlock *)lock, what); } void lock_rw(struct lock_object *lock, uintptr_t how) { struct rwlock *rw; rw = (struct rwlock *)lock; if (how) rw_rlock(rw); else rw_wlock(rw); } uintptr_t unlock_rw(struct lock_object *lock) { struct rwlock *rw; rw = (struct rwlock *)lock; rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); if (rw->rw_lock & RW_LOCK_READ) { rw_runlock(rw); return (1); } else { rw_wunlock(rw); return (0); } } #ifdef KDTRACE_HOOKS int owner_rw(const struct lock_object *lock, struct thread **owner) { const struct rwlock *rw = (const struct rwlock *)lock; uintptr_t x = rw->rw_lock; *owner = rw_wowner(rw); return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : (*owner != NULL)); } #endif void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) { struct rwlock *rw; int flags; rw = rwlock2rw(c); MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | RW_RECURSE | RW_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, ("%s: rw_lock not aligned for %s: %p", __func__, name, &rw->rw_lock)); flags = LO_UPGRADABLE; if (opts & RW_DUPOK) flags |= LO_DUPOK; if (opts & RW_NOPROFILE) flags |= LO_NOPROFILE; if (!(opts & RW_NOWITNESS)) flags |= LO_WITNESS; if (opts & RW_RECURSE) flags |= LO_RECURSABLE; if (opts & RW_QUIET) flags |= LO_QUIET; if (opts & RW_NEW) flags |= LO_NEW; lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); rw->rw_lock = RW_UNLOCKED; rw->rw_recurse = 0; } void _rw_destroy(volatile uintptr_t *c) { struct rwlock *rw; rw = rwlock2rw(c); KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); rw->rw_lock = RW_DESTROYED; lock_destroy(&rw->lock_object); } void rw_sysinit(void *arg) { struct rw_args *args = arg; rw_init((struct rwlock *)args->ra_rw, args->ra_desc); } void rw_sysinit_flags(void *arg) { struct rw_args_flags *args = arg; rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, args->ra_flags); } int _rw_wowned(const volatile uintptr_t *c) { return (rw_wowner(rwlock2rw(c)) == curthread); } void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __rw_wlock(rw, curthread, file, line); LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_INC(curthread); } int __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; int rval; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); if (rw_wlocked(rw) && (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) { rw->rw_recurse++; rval = 1; } else rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED, (uintptr_t)curthread); LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); if (!rw_recursed(rw)) LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 0, 0, file, line, LOCKSTAT_WRITER); TD_LOCKS_INC(curthread); } return (rval); } void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_WLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); __rw_wunlock(rw, curthread, file, line); TD_LOCKS_DEC(curthread); } /* * Determines whether a new reader can acquire a lock. Succeeds if the * reader already owns a read lock and the lock is locked for read to * prevent deadlock from reader recursion. Also succeeds if the lock * is unlocked and has no writer waiters or spinners. Failing otherwise * prioritizes writers before readers. */ #define RW_CAN_READ(_rw) \ ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) & \ (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == \ RW_LOCK_READ) void __rw_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; int spintries = 0; int i; #endif #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif uintptr_t v; #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int spin_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); KASSERT(rw_wowner(rw) != curthread, ("rw_rlock: wlock already held for %s @ %s:%d", rw->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&rw->lock_object); state = rw->rw_lock; #endif for (;;) { /* * Handle the easy case. If no other thread has a write * lock, then try to bump up the count of read locks. Note * that we have to preserve the current state of the * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a * read lock, then rw_lock must have changed, so restart * the loop. Note that this handles the case of a * completely unlocked rwlock since such a lock is encoded * as a read lock with no waiters. */ v = rw->rw_lock; if (RW_CAN_READ(v)) { /* * The RW_LOCK_READ_WAITERS flag should only be set * if the lock has been unlocked and write waiters * were present. */ if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, v + RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, rw, (void *)v, (void *)(v + RW_ONE_READER)); break; } continue; } #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if ((v & RW_LOCK_READ) == 0) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } } else if (spintries < rowner_retries) { spintries++; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); for (i = 0; i < rowner_loops; i++) { v = rw->rw_lock; if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v)) break; cpu_spinwait(); } #ifdef KDTRACE_HOOKS spin_cnt += rowner_loops - i; #endif KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); if (i != rowner_loops) continue; } #endif /* * Okay, now it's the hard case. Some other thread already * has a write lock or there are write waiters present, * acquire the turnstile lock so we can begin the process * of blocking. */ ts = turnstile_trywait(&rw->lock_object); /* * The lock might have been released while we spun, so * recheck its state and restart the loop if needed. */ v = rw->rw_lock; if (RW_CAN_READ(v)) { turnstile_cancel(ts); continue; } #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if ((v & RW_LOCK_READ) == 0) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * The lock is held in write mode or it already has waiters. */ MPASS(!RW_CAN_READ(v)); /* * If the RW_LOCK_READ_WAITERS flag is already set, then * we can go ahead and block. If it is not set then try * to set it. If we fail to set it drop the turnstile * lock and restart the loop. */ if (!(v & RW_LOCK_READ_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_READ_WAITERS)) { turnstile_cancel(ts); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set read waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the read waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&rw->lock_object); #endif turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&rw->lock_object); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&rw->lock_object); if (sleep_time) LOCKSTAT_RECORD4(rw__block, rw, sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); #endif /* * TODO: acquire "owner of record" here. Here be turnstile dragons * however. turnstiles don't like owners changing between calls to * turnstile_wait() currently. */ LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, waittime, file, line, LOCKSTAT_READER); LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); WITNESS_LOCK(&rw->lock_object, 0, file, line); TD_LOCKS_INC(curthread); curthread->td_rw_rlocks++; } int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; uintptr_t x; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); for (;;) { x = rw->rw_lock; KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); if (!(x & RW_LOCK_READ)) break; if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) { LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, line); WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 0, 0, file, line, LOCKSTAT_READER); TD_LOCKS_INC(curthread); curthread->td_rw_rlocks++; return (1); } } LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); return (0); } void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t x, v, queue; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_RLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, 0, file, line); LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); /* TODO: drop "owner of record" here. */ for (;;) { /* * See if there is more than one read lock held. If so, * just drop one and return. */ x = rw->rw_lock; if (RW_READERS(x) > 1) { if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, x - RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeeded %p -> %p", __func__, rw, (void *)x, (void *)(x - RW_ONE_READER)); break; } continue; } /* * If there aren't any waiters for a write lock, then try * to drop it quickly. */ if (!(x & RW_LOCK_WAITERS)) { MPASS((x & ~RW_LOCK_WRITE_SPINNER) == RW_READERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, RW_UNLOCKED)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded", __func__, rw); break; } continue; } /* * Ok, we know we have waiters and we think we are the * last reader, so grab the turnstile lock. */ turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); MPASS(v & RW_LOCK_WAITERS); /* * Try to drop our lock leaving the lock in a unlocked * state. * * If you wanted to do explicit lock handoff you'd have to * do it here. You'd also want to use turnstile_signal() * and you'd have to handle the race where a higher * priority thread blocks on the write lock before the * thread you wakeup actually runs and have the new thread * "steal" the lock. For now it's a lot simpler to just * wakeup all of the waiters. * * As above, if we fail, then another thread might have * acquired a read lock, so drop the turnstile lock and * restart. */ x = RW_UNLOCKED; if (v & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; x |= (v & RW_LOCK_READ_WAITERS); } else queue = TS_SHARED_QUEUE; if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, x)) { turnstile_chain_unlock(&rw->lock_object); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", __func__, rw); /* * Ok. The lock is released and all that's left is to * wake up the waiters. Note that the lock might not be * free anymore, but in that case the writers will just * block again if they run before the new lock holder(s) * release the lock. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); turnstile_broadcast(ts, queue); turnstile_unpend(ts, TS_SHARED_LOCK); turnstile_chain_unlock(&rw->lock_object); break; } LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); TD_LOCKS_DEC(curthread); curthread->td_rw_rlocks--; } /* * This function is called when we are unable to obtain a write lock on the * first try. This means that at least one other thread holds either a * read or write lock. */ void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; int spintries = 0; int i; #endif uintptr_t v, x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int spin_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (rw_wlocked(rw)) { KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, ("%s: recursing but non-recursive rw %s @ %s:%d\n", __func__, rw->lock_object.lo_name, file, line)); rw->rw_recurse++; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); return; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&rw->lock_object); state = rw->rw_lock; #endif for (;;) { if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid)) break; #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ v = rw->rw_lock; owner = (struct thread *)RW_OWNER(v); if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } if ((v & RW_LOCK_READ) && RW_READERS(v) && spintries < rowner_retries) { if (!(v & RW_LOCK_WRITE_SPINNER)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_SPINNER)) { continue; } } spintries++; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); for (i = 0; i < rowner_loops; i++) { if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0) break; cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); #ifdef KDTRACE_HOOKS spin_cnt += rowner_loops - i; #endif if (i != rowner_loops) continue; } #endif ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (!(v & RW_LOCK_READ)) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * Check for the waiters flags about this rwlock. * If the lock was released, without maintain any pending * waiters queue, simply try to acquire it. * If a pending waiters queue is present, claim the lock * ownership and maintain the pending queue. */ x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); if ((v & ~x) == RW_UNLOCKED) { x &= ~RW_LOCK_WRITE_SPINNER; if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { if (x) turnstile_claim(ts); else turnstile_cancel(ts); break; } turnstile_cancel(ts); continue; } /* * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to * set it. If we fail to set it, then loop back and try * again. */ if (!(v & RW_LOCK_WRITE_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_WAITERS)) { turnstile_cancel(ts); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set write waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the write waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&rw->lock_object); #endif turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&rw->lock_object); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); #ifdef ADAPTIVE_RWLOCKS spintries = 0; #endif } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&rw->lock_object); if (sleep_time) LOCKSTAT_RECORD4(rw__block, rw, sleep_time, LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, - LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, + LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); #endif LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, waittime, file, line, LOCKSTAT_WRITER); } /* * This function is called if the first try at releasing a write lock failed. * This means that one of the 2 waiter bits must be set indicating that at * least one thread is waiting on this lock. */ void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t v; int queue; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (rw_wlocked(rw) && rw_recursed(rw)) { rw->rw_recurse--; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); return; } KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), ("%s: neither of the waiter flags are set", __func__)); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); turnstile_chain_lock(&rw->lock_object); ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); /* * Use the same algo as sx locks for now. Prefer waking up shared * waiters if we have any over writers. This is probably not ideal. * * 'v' is the value we are going to write back to rw_lock. If we * have waiters on both queues, we need to preserve the state of * the waiter flag for the queue we don't wake up. For now this is * hardcoded for the algorithm mentioned above. * * In the case of both readers and writers waiting we wakeup the * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a * new writer comes in before a reader it will claim the lock up * above. There is probably a potential priority inversion in * there that could be worked around either by waking both queues * of waiters or doing some complicated lock handoff gymnastics. */ v = RW_UNLOCKED; if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; v |= (rw->rw_lock & RW_LOCK_READ_WAITERS); } else queue = TS_SHARED_QUEUE; /* Wake up all waiters for the specific queue. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, queue == TS_SHARED_QUEUE ? "read" : "write"); turnstile_broadcast(ts, queue); atomic_store_rel_ptr(&rw->rw_lock, v); turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); turnstile_chain_unlock(&rw->lock_object); } /* * Attempt to do a non-blocking upgrade from a read lock to a write * lock. This will only succeed if this thread holds a single read * lock. Returns true if the upgrade succeeded and false otherwise. */ int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; uintptr_t v, x, tid; struct turnstile *ts; int success; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_RLOCKED, file, line); /* * Attempt to switch from one reader to a writer. If there * are any write waiters, then we will have to lock the * turnstile first to prevent races with another writer * calling turnstile_wait() before we have claimed this * turnstile. So, do the simple case of no waiters first. */ tid = (uintptr_t)curthread; success = 0; for (;;) { v = rw->rw_lock; if (RW_READERS(v) > 1) break; if (!(v & RW_LOCK_WAITERS)) { success = atomic_cmpset_ptr(&rw->rw_lock, v, tid); if (!success) continue; break; } /* * Ok, we think we have waiters, so lock the turnstile. */ ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; if (RW_READERS(v) > 1) { turnstile_cancel(ts); break; } /* * Try to switch from one reader to a writer again. This time * we honor the current state of the waiters flags. * If we obtain the lock with the flags set, then claim * ownership of the turnstile. */ x = rw->rw_lock & RW_LOCK_WAITERS; success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x); if (success) { if (x) turnstile_claim(ts); else turnstile_cancel(ts); break; } turnstile_cancel(ts); } LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); if (success) { curthread->td_rw_rlocks--; WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); LOCKSTAT_RECORD0(rw__upgrade, rw); } return (success); } /* * Downgrade a write lock into a single read lock. */ void __rw_downgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t tid, v; int rwait, wwait; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line); #ifndef INVARIANTS if (rw_recursed(rw)) panic("downgrade of a recursed lock"); #endif WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); /* * Convert from a writer to a single reader. First we handle * the easy case with no waiters. If there are any waiters, we * lock the turnstile and "disown" the lock. */ tid = (uintptr_t)curthread; if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) goto out; /* * Ok, we think we have waiters, so lock the turnstile so we can * read the waiter flags without any races. */ turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock & RW_LOCK_WAITERS; rwait = v & RW_LOCK_READ_WAITERS; wwait = v & RW_LOCK_WRITE_WAITERS; MPASS(rwait | wwait); /* * Downgrade from a write lock while preserving waiters flag * and give up ownership of the turnstile. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); if (!wwait) v &= ~RW_LOCK_READ_WAITERS; atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); /* * Wake other readers if there are no writers pending. Otherwise they * won't be able to acquire the lock anyway. */ if (rwait && !wwait) { turnstile_broadcast(ts, TS_SHARED_QUEUE); turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); } else turnstile_disown(ts); turnstile_chain_unlock(&rw->lock_object); out: curthread->td_rw_rlocks++; LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); LOCKSTAT_RECORD0(rw__downgrade, rw); } #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS #undef __rw_assert #endif /* * In the non-WITNESS case, rw_assert() can only detect that at least * *some* thread owns an rlock, but it cannot guarantee that *this* * thread owns an rlock. */ void __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct rwlock *rw; if (panicstr != NULL) return; rw = rwlock2rw(c); switch (what) { case RA_LOCKED: case RA_LOCKED | RA_RECURSED: case RA_LOCKED | RA_NOTRECURSED: case RA_RLOCKED: case RA_RLOCKED | RA_RECURSED: case RA_RLOCKED | RA_NOTRECURSED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If some other thread has a write lock or we have one * and are asserting a read lock, fail. Also, if no one * has a lock at all, fail. */ if (rw->rw_lock == RW_UNLOCKED || (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || rw_wowner(rw) != curthread))) panic("Lock %s not %slocked @ %s:%d\n", rw->lock_object.lo_name, (what & RA_RLOCKED) ? "read " : "", file, line); if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } #endif break; case RA_WLOCKED: case RA_WLOCKED | RA_RECURSED: case RA_WLOCKED | RA_NOTRECURSED: if (rw_wowner(rw) != curthread) panic("Lock %s not exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); break; case RA_UNLOCKED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If we hold a write lock fail. We can't reliably check * to see if we hold a read lock or not. */ if (rw_wowner(rw) == curthread) panic("Lock %s exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); #endif break; default: panic("Unknown rw lock assertion: %d @ %s:%d", what, file, line); } } #endif /* INVARIANT_SUPPORT */ #ifdef DDB void db_show_rwlock(const struct lock_object *lock) { const struct rwlock *rw; struct thread *td; rw = (const struct rwlock *)lock; db_printf(" state: "); if (rw->rw_lock == RW_UNLOCKED) db_printf("UNLOCKED\n"); else if (rw->rw_lock == RW_DESTROYED) { db_printf("DESTROYED\n"); return; } else if (rw->rw_lock & RW_LOCK_READ) db_printf("RLOCK: %ju locks\n", (uintmax_t)(RW_READERS(rw->rw_lock))); else { td = rw_wowner(rw); db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (rw_recursed(rw)) db_printf(" recursed: %u\n", rw->rw_recurse); } db_printf(" waiters: "); switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { case RW_LOCK_READ_WAITERS: db_printf("readers\n"); break; case RW_LOCK_WRITE_WAITERS: db_printf("writers\n"); break; case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: db_printf("readers and writers\n"); break; default: db_printf("none\n"); break; } } #endif Index: user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c (revision 303642) @@ -1,1258 +1,1258 @@ /*- * Copyright (c) 2007 Attilio Rao * Copyright (c) 2001 Jason Evans * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ /* * Shared/exclusive locks. This implementation attempts to ensure * deterministic lock granting behavior, so that slocks and xlocks are * interleaved. * * Priority propagation will not generally raise the priority of lock holders, * so should not be relied upon in combination with sx locks. */ #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_no_adaptive_sx.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_SX) #include #endif #ifdef DDB #include #endif #if defined(SMP) && !defined(NO_ADAPTIVE_SX) #define ADAPTIVE_SX #endif CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE); #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif /* Handy macros for sleep queues. */ #define SQ_EXCLUSIVE_QUEUE 0 #define SQ_SHARED_QUEUE 1 /* * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file. We * drop Giant anytime we have to sleep or if we adaptively spin. */ #define GIANT_DECLARE \ int _giantcnt = 0; \ WITNESS_SAVE_DECL(Giant) \ #define GIANT_SAVE() do { \ if (mtx_owned(&Giant)) { \ WITNESS_SAVE(&Giant.lock_object, Giant); \ while (mtx_owned(&Giant)) { \ _giantcnt++; \ mtx_unlock(&Giant); \ } \ } \ } while (0) #define GIANT_RESTORE() do { \ if (_giantcnt > 0) { \ mtx_assert(&Giant, MA_NOTOWNED); \ while (_giantcnt--) \ mtx_lock(&Giant); \ WITNESS_RESTORE(&Giant.lock_object, Giant); \ } \ } while (0) /* * Returns true if an exclusive lock is recursed. It assumes * curthread currently has an exclusive lock. */ #define sx_recursed(sx) ((sx)->sx_recurse != 0) static void assert_sx(const struct lock_object *lock, int what); #ifdef DDB static void db_show_sx(const struct lock_object *lock); #endif static void lock_sx(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_sx(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_sx(struct lock_object *lock); struct lock_class lock_class_sx = { .lc_name = "sx", .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE, .lc_assert = assert_sx, #ifdef DDB .lc_ddb_show = db_show_sx, #endif .lc_lock = lock_sx, .lc_unlock = unlock_sx, #ifdef KDTRACE_HOOKS .lc_owner = owner_sx, #endif }; #ifndef INVARIANTS #define _sx_assert(sx, what, file, line) #endif #ifdef ADAPTIVE_SX static u_int asx_retries = 10; static u_int asx_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging"); SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, ""); SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, ""); #endif void assert_sx(const struct lock_object *lock, int what) { sx_assert((const struct sx *)lock, what); } void lock_sx(struct lock_object *lock, uintptr_t how) { struct sx *sx; sx = (struct sx *)lock; if (how) sx_slock(sx); else sx_xlock(sx); } uintptr_t unlock_sx(struct lock_object *lock) { struct sx *sx; sx = (struct sx *)lock; sx_assert(sx, SA_LOCKED | SA_NOTRECURSED); if (sx_xlocked(sx)) { sx_xunlock(sx); return (0); } else { sx_sunlock(sx); return (1); } } #ifdef KDTRACE_HOOKS int owner_sx(const struct lock_object *lock, struct thread **owner) { const struct sx *sx = (const struct sx *)lock; uintptr_t x = sx->sx_lock; *owner = (struct thread *)SX_OWNER(x); return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) : (*owner != NULL)); } #endif void sx_sysinit(void *arg) { struct sx_args *sargs = arg; sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags); } void sx_init_flags(struct sx *sx, const char *description, int opts) { int flags; MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK | SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock, ("%s: sx_lock not aligned for %s: %p", __func__, description, &sx->sx_lock)); flags = LO_SLEEPABLE | LO_UPGRADABLE; if (opts & SX_DUPOK) flags |= LO_DUPOK; if (opts & SX_NOPROFILE) flags |= LO_NOPROFILE; if (!(opts & SX_NOWITNESS)) flags |= LO_WITNESS; if (opts & SX_RECURSE) flags |= LO_RECURSABLE; if (opts & SX_QUIET) flags |= LO_QUIET; if (opts & SX_NEW) flags |= LO_NEW; flags |= opts & SX_NOADAPTIVE; lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags); sx->sx_lock = SX_LOCK_UNLOCKED; sx->sx_recurse = 0; } void sx_destroy(struct sx *sx) { KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held")); KASSERT(sx->sx_recurse == 0, ("sx lock still recursed")); sx->sx_lock = SX_LOCK_DESTROYED; lock_destroy(&sx->lock_object); } int _sx_slock(struct sx *sx, int opts, const char *file, int line) { int error = 0; if (SCHEDULER_STOPPED()) return (0); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_slock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_slock() of destroyed sx @ %s:%d", file, line)); WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL); error = __sx_slock(sx, opts, file, line); if (!error) { LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line); WITNESS_LOCK(&sx->lock_object, 0, file, line); TD_LOCKS_INC(curthread); } return (error); } int sx_try_slock_(struct sx *sx, const char *file, int line) { uintptr_t x; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_try_slock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); for (;;) { x = sx->sx_lock; KASSERT(x != SX_LOCK_DESTROYED, ("sx_try_slock() of destroyed sx @ %s:%d", file, line)); if (!(x & SX_LOCK_SHARED)) break; if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line); WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line); LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, 0, 0, file, line, LOCKSTAT_READER); TD_LOCKS_INC(curthread); return (1); } } LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line); return (0); } int _sx_xlock(struct sx *sx, int opts, const char *file, int line) { int error = 0; if (SCHEDULER_STOPPED()) return (0); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_xlock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_xlock() of destroyed sx @ %s:%d", file, line)); WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); error = __sx_xlock(sx, curthread, opts, file, line); if (!error) { LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_INC(curthread); } return (error); } int sx_try_xlock_(struct sx *sx, const char *file, int line) { int rval; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_xlock() of destroyed sx @ %s:%d", file, line)); if (sx_xlocked(sx) && (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) { sx->sx_recurse++; atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED); rval = 1; } else rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, (uintptr_t)curthread); LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line); if (rval) { WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); if (!sx_recursed(sx)) LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, 0, 0, file, line, LOCKSTAT_WRITER); TD_LOCKS_INC(curthread); } return (rval); } void _sx_sunlock(struct sx *sx, const char *file, int line) { if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_sunlock() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_SLOCKED, file, line); WITNESS_UNLOCK(&sx->lock_object, 0, file, line); LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line); __sx_sunlock(sx, file, line); TD_LOCKS_DEC(curthread); } void _sx_xunlock(struct sx *sx, const char *file, int line) { if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_xunlock() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_XLOCKED, file, line); WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); __sx_xunlock(sx, curthread, file, line); TD_LOCKS_DEC(curthread); } /* * Try to do a non-blocking upgrade from a shared lock to an exclusive lock. * This will only succeed if this thread holds a single shared lock. * Return 1 if if the upgrade succeed, 0 otherwise. */ int sx_try_upgrade_(struct sx *sx, const char *file, int line) { uintptr_t x; int success; if (SCHEDULER_STOPPED()) return (1); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_SLOCKED, file, line); /* * Try to switch from one shared lock to an exclusive lock. We need * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that * we will wake up the exclusive waiters when we drop the lock. */ x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS; success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x, (uintptr_t)curthread | x); LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line); if (success) { WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); LOCKSTAT_RECORD0(sx__upgrade, sx); } return (success); } /* * Downgrade an unrecursed exclusive lock into a single shared lock. */ void sx_downgrade_(struct sx *sx, const char *file, int line) { uintptr_t x; int wakeup_swapper; if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_downgrade() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line); #ifndef INVARIANTS if (sx_recursed(sx)) panic("downgrade of a recursed lock"); #endif WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line); /* * Try to switch from an exclusive lock with no shared waiters * to one sharer with no shared waiters. If there are * exclusive waiters, we don't need to lock the sleep queue so * long as we preserve the flag. We do one quick try and if * that fails we grab the sleepq lock to keep the flags from * changing and do it the slow way. * * We have to lock the sleep queue if there are shared waiters * so we can wake them up. */ x = sx->sx_lock; if (!(x & SX_LOCK_SHARED_WAITERS) && atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) | (x & SX_LOCK_EXCLUSIVE_WAITERS))) { LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line); return; } /* * Lock the sleep queue so we can read the waiters bits * without any races and wakeup any shared waiters. */ sleepq_lock(&sx->lock_object); /* * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single * shared lock. If there are any shared waiters, wake them up. */ wakeup_swapper = 0; x = sx->sx_lock; atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | (x & SX_LOCK_EXCLUSIVE_WAITERS)); if (x & SX_LOCK_SHARED_WAITERS) wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, SQ_SHARED_QUEUE); sleepq_release(&sx->lock_object); LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line); LOCKSTAT_RECORD0(sx__downgrade, sx); if (wakeup_swapper) kick_proc0(); } /* * This function represents the so-called 'hard case' for sx_xlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int line) { GIANT_DECLARE; #ifdef ADAPTIVE_SX volatile struct thread *owner; u_int i, spintries = 0; #endif uintptr_t x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int error = 0; #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int spin_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return (0); /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n", sx->lock_object.lo_name, file, line)); sx->sx_recurse++; atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx); return (0); } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, sx->lock_object.lo_name, (void *)sx->sx_lock, file, line); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&sx->lock_object); state = sx->sx_lock; #endif for (;;) { if (sx->sx_lock == SX_LOCK_UNLOCKED && atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) break; #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); #ifdef ADAPTIVE_SX /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ x = sx->sx_lock; if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { if ((x & SX_LOCK_SHARED) == 0) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, sx, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } } else if (SX_SHARERS(x) && spintries < asx_retries) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); spintries++; for (i = 0; i < asx_loops; i++) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, sx, spintries, i); x = sx->sx_lock; if ((x & SX_LOCK_SHARED) == 0 || SX_SHARERS(x) == 0) break; cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); if (i != asx_loops) continue; } } #endif sleepq_lock(&sx->lock_object); x = sx->sx_lock; /* * If the lock was released while spinning on the * sleep queue chain lock, try again. */ if (x == SX_LOCK_UNLOCKED) { sleepq_release(&sx->lock_object); continue; } #ifdef ADAPTIVE_SX /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the sleep queue * chain lock. If so, drop the sleep queue lock and try * again. */ if (!(x & SX_LOCK_SHARED) && (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { owner = (struct thread *)SX_OWNER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&sx->lock_object); continue; } } #endif /* * If an exclusive lock was released with both shared * and exclusive waiters and a shared waiter hasn't * woken up and acquired the lock yet, sx_lock will be * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS. * If we see that value, try to acquire it once. Note * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS * as there are other exclusive waiters still. If we * fail, restart the loop. */ if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) { if (atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS, tid | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); CTR2(KTR_LOCK, "%s: %p claimed by new writer", __func__, sx); break; } sleepq_release(&sx->lock_object); continue; } /* * Try to set the SX_LOCK_EXCLUSIVE_WAITERS. If we fail, * than loop back and retry. */ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) { if (!atomic_cmpset_ptr(&sx->sx_lock, x, x | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set excl waiters flag", __func__, sx); } /* * Since we have been unable to acquire the exclusive * lock and the exclusive waiters flag is set, we have * to sleep. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on sleep queue", __func__, sx); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&sx->lock_object); #endif GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object, 0); else error = sleepq_wait_sig(&sx->lock_object, 0); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&sx->lock_object); sleep_cnt++; #endif if (error) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: interruptible sleep by %p suspended by signal", __func__, sx); break; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&sx->lock_object); if (sleep_time) LOCKSTAT_RECORD4(sx__block, sx, sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); #endif if (!error) LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, contested, waittime, file, line, LOCKSTAT_WRITER); GIANT_RESTORE(); return (error); } /* * This function represents the so-called 'hard case' for sx_xunlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line) { uintptr_t x; int queue, wakeup_swapper; if (SCHEDULER_STOPPED()) return; MPASS(!(sx->sx_lock & SX_LOCK_SHARED)); /* If the lock is recursed, then unrecurse one level. */ if (sx_xlocked(sx) && sx_recursed(sx)) { if ((--sx->sx_recurse) == 0) atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx); return; } MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, sx); sleepq_lock(&sx->lock_object); x = SX_LOCK_UNLOCKED; /* * The wake up algorithm here is quite simple and probably not * ideal. It gives precedence to shared waiters if they are * present. For this condition, we have to preserve the * state of the exclusive waiters flag. * If interruptible sleeps left the shared queue empty avoid a * starvation for the threads sleeping on the exclusive queue by giving * them precedence and cleaning up the shared waiters bit anyway. */ if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 && sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) { queue = SQ_SHARED_QUEUE; x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS); } else queue = SQ_EXCLUSIVE_QUEUE; /* Wake up all the waiters for the specific queue. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue", __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); atomic_store_rel_ptr(&sx->sx_lock, x); wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, queue); sleepq_release(&sx->lock_object); if (wakeup_swapper) kick_proc0(); } /* * This function represents the so-called 'hard case' for sx_slock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) { GIANT_DECLARE; #ifdef ADAPTIVE_SX volatile struct thread *owner; #endif #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif uintptr_t x; int error = 0; #ifdef KDTRACE_HOOKS uintptr_t state; - uint64_t spin_cnt = 0; - uint64_t sleep_cnt = 0; + u_int spin_cnt = 0; + u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return (0); #ifdef KDTRACE_HOOKS state = sx->sx_lock; all_time -= lockstat_nsecs(&sx->lock_object); #endif /* * As with rwlocks, we don't make any attempt to try to block * shared locks once there is an exclusive waiter. */ for (;;) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif x = sx->sx_lock; /* * If no other thread has an exclusive lock then try to bump up * the count of sharers. Since we have to preserve the state * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the * shared lock loop back and retry. */ if (x & SX_LOCK_SHARED) { MPASS(!(x & SX_LOCK_SHARED_WAITERS)); if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, sx, (void *)x, (void *)(x + SX_ONE_SHARER)); break; } continue; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); #ifdef ADAPTIVE_SX /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, sx, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { + cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif - cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } } #endif /* * Some other thread already has an exclusive lock, so * start the process of blocking. */ sleepq_lock(&sx->lock_object); x = sx->sx_lock; /* * The lock could have been released while we spun. * In this case loop back and retry. */ if (x & SX_LOCK_SHARED) { sleepq_release(&sx->lock_object); continue; } #ifdef ADAPTIVE_SX /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if (!(x & SX_LOCK_SHARED) && (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { owner = (struct thread *)SX_OWNER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&sx->lock_object); continue; } } #endif /* * Try to set the SX_LOCK_SHARED_WAITERS flag. If we * fail to set it drop the sleep queue lock and loop * back. */ if (!(x & SX_LOCK_SHARED_WAITERS)) { if (!atomic_cmpset_ptr(&sx->sx_lock, x, x | SX_LOCK_SHARED_WAITERS)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set shared waiters flag", __func__, sx); } /* * Since we have been unable to acquire the shared lock, * we have to sleep. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on sleep queue", __func__, sx); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&sx->lock_object); #endif GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object, 0); else error = sleepq_wait_sig(&sx->lock_object, 0); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&sx->lock_object); sleep_cnt++; #endif if (error) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: interruptible sleep by %p suspended by signal", __func__, sx); break; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&sx->lock_object); if (sleep_time) LOCKSTAT_RECORD4(sx__block, sx, sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); #endif if (error == 0) LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, contested, waittime, file, line, LOCKSTAT_READER); GIANT_RESTORE(); return (error); } /* * This function represents the so-called 'hard case' for sx_sunlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ void _sx_sunlock_hard(struct sx *sx, const char *file, int line) { uintptr_t x; int wakeup_swapper; if (SCHEDULER_STOPPED()) return; for (;;) { x = sx->sx_lock; /* * We should never have sharers while at least one thread * holds a shared lock. */ KASSERT(!(x & SX_LOCK_SHARED_WAITERS), ("%s: waiting sharers", __func__)); /* * See if there is more than one shared lock held. If * so, just drop one and return. */ if (SX_SHARERS(x) > 1) { if (atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeeded %p -> %p", __func__, sx, (void *)x, (void *)(x - SX_ONE_SHARER)); break; } continue; } /* * If there aren't any waiters for an exclusive lock, * then try to drop it quickly. */ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) { MPASS(x == SX_SHARERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded", __func__, sx); break; } continue; } /* * At this point, there should just be one sharer with * exclusive waiters. */ MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS)); sleepq_lock(&sx->lock_object); /* * Wake up semantic here is quite simple: * Just wake up all the exclusive waiters. * Note that the state of the lock could have changed, * so if it fails loop back and retry. */ if (!atomic_cmpset_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS, SX_LOCK_UNLOCKED)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p waking up all thread on" "exclusive queue", __func__, sx); wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, SQ_EXCLUSIVE_QUEUE); sleepq_release(&sx->lock_object); if (wakeup_swapper) kick_proc0(); break; } } #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS #undef _sx_assert #endif /* * In the non-WITNESS case, sx_assert() can only detect that at least * *some* thread owns an slock, but it cannot guarantee that *this* * thread owns an slock. */ void _sx_assert(const struct sx *sx, int what, const char *file, int line) { #ifndef WITNESS int slocked = 0; #endif if (panicstr != NULL) return; switch (what) { case SA_SLOCKED: case SA_SLOCKED | SA_NOTRECURSED: case SA_SLOCKED | SA_RECURSED: #ifndef WITNESS slocked = 1; /* FALLTHROUGH */ #endif case SA_LOCKED: case SA_LOCKED | SA_NOTRECURSED: case SA_LOCKED | SA_RECURSED: #ifdef WITNESS witness_assert(&sx->lock_object, what, file, line); #else /* * If some other thread has an exclusive lock or we * have one and are asserting a shared lock, fail. * Also, if no one has a lock at all, fail. */ if (sx->sx_lock == SX_LOCK_UNLOCKED || (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked || sx_xholder(sx) != curthread))) panic("Lock %s not %slocked @ %s:%d\n", sx->lock_object.lo_name, slocked ? "share " : "", file, line); if (!(sx->sx_lock & SX_LOCK_SHARED)) { if (sx_recursed(sx)) { if (what & SA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } else if (what & SA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } #endif break; case SA_XLOCKED: case SA_XLOCKED | SA_NOTRECURSED: case SA_XLOCKED | SA_RECURSED: if (sx_xholder(sx) != curthread) panic("Lock %s not exclusively locked @ %s:%d\n", sx->lock_object.lo_name, file, line); if (sx_recursed(sx)) { if (what & SA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } else if (what & SA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); break; case SA_UNLOCKED: #ifdef WITNESS witness_assert(&sx->lock_object, what, file, line); #else /* * If we hold an exclusve lock fail. We can't * reliably check to see if we hold a shared lock or * not. */ if (sx_xholder(sx) == curthread) panic("Lock %s exclusively locked @ %s:%d\n", sx->lock_object.lo_name, file, line); #endif break; default: panic("Unknown sx lock assertion: %d @ %s:%d", what, file, line); } } #endif /* INVARIANT_SUPPORT */ #ifdef DDB static void db_show_sx(const struct lock_object *lock) { struct thread *td; const struct sx *sx; sx = (const struct sx *)lock; db_printf(" state: "); if (sx->sx_lock == SX_LOCK_UNLOCKED) db_printf("UNLOCKED\n"); else if (sx->sx_lock == SX_LOCK_DESTROYED) { db_printf("DESTROYED\n"); return; } else if (sx->sx_lock & SX_LOCK_SHARED) db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock)); else { td = sx_xholder(sx); db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (sx_recursed(sx)) db_printf(" recursed: %d\n", sx->sx_recurse); } db_printf(" waiters: "); switch(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) { case SX_LOCK_SHARED_WAITERS: db_printf("shared\n"); break; case SX_LOCK_EXCLUSIVE_WAITERS: db_printf("exclusive\n"); break; case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS: db_printf("exclusive and shared\n"); break; default: db_printf("none\n"); } } /* * Check to see if a thread that is blocked on a sleep queue is actually * blocked on an sx lock. If so, output some details and return true. * If the lock has an exclusive owner, return that in *ownerp. */ int sx_chain(struct thread *td, struct thread **ownerp) { struct sx *sx; /* * Check to see if this thread is blocked on an sx lock. * First, we check the lock class. If that is ok, then we * compare the lock name against the wait message. */ sx = td->td_wchan; if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx || sx->lock_object.lo_name != td->td_wmesg) return (0); /* We think we have an sx lock, so output some details. */ db_printf("blocked on sx \"%s\" ", td->td_wmesg); *ownerp = sx_xholder(sx); if (sx->sx_lock & SX_LOCK_SHARED) db_printf("SLOCK (count %ju)\n", (uintmax_t)SX_SHARERS(sx->sx_lock)); else db_printf("XLOCK\n"); return (1); } #endif Index: user/alc/PQ_LAUNDRY/sys/kern/kern_tc.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/kern/kern_tc.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/kern/kern_tc.c (revision 303642) @@ -1,2168 +1,2166 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Julien Ridoux at the University * of Melbourne under sponsorship from the FreeBSD Foundation. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ntp.h" #include "opt_ffclock.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * A large step happens on boot. This constant detects such steps. * It is relatively small so that ntp_update_second gets called enough * in the typical 'missed a couple of seconds' case, but doesn't loop * forever when the time step is large. */ #define LARGE_STEP 200 /* * Implement a dummy timecounter which we can use until we get a real one * in the air. This allows the console and other early stuff to use * time services. */ static u_int dummy_get_timecount(struct timecounter *tc) { static u_int now; return (++now); } static struct timecounter dummy_timecounter = { dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 }; struct timehands { /* These fields must be initialized by the driver. */ struct timecounter *th_counter; int64_t th_adjustment; uint64_t th_scale; u_int th_offset_count; struct bintime th_offset; + struct bintime th_bintime; struct timeval th_microtime; struct timespec th_nanotime; struct bintime th_boottime; /* Fields not to be copied in tc_windup start with th_generation. */ u_int th_generation; struct timehands *th_next; }; static struct timehands th0; static struct timehands th1 = { .th_next = &th0 }; static struct timehands th0 = { .th_counter = &dummy_timecounter, .th_scale = (uint64_t)-1 / 1000000, .th_offset = { .sec = 1 }, .th_generation = 1, .th_next = &th1 }; static struct timehands *volatile timehands = &th0; struct timecounter *timecounter = &dummy_timecounter; static struct timecounter *timecounters = &dummy_timecounter; int tc_min_ticktock_freq = 1; volatile time_t time_second = 1; volatile time_t time_uptime = 1; static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD, NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime"); SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, ""); static int timestepwarnings; SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, ×tepwarnings, 0, "Log time steps"); struct bintime bt_timethreshold; struct bintime bt_tickthreshold; sbintime_t sbt_timethreshold; sbintime_t sbt_tickthreshold; struct bintime tc_tick_bt; sbintime_t tc_tick_sbt; int tc_precexp; int tc_timepercentage = TC_DEFAULTPERC; static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_timecounter_adjprecision, "I", "Allowed time interval deviation in percents"); static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ static void tc_windup(struct bintime *new_boottimebin); static void cpu_tick_calibrate(int); void dtrace_getnanotime(struct timespec *tsp); static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) { struct timeval boottime; getboottime(&boottime); #ifndef __mips__ #ifdef SCTL_MASK32 int tv[2]; if (req->flags & SCTL_MASK32) { tv[0] = boottime.tv_sec; tv[1] = boottime.tv_usec; return (SYSCTL_OUT(req, tv, sizeof(tv))); } #endif #endif return (SYSCTL_OUT(req, &boottime, sizeof(boottime))); } static int sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS) { u_int ncount; struct timecounter *tc = arg1; ncount = tc->tc_get_timecount(tc); return (sysctl_handle_int(oidp, &ncount, 0, req)); } static int sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS) { uint64_t freq; struct timecounter *tc = arg1; freq = tc->tc_frequency; return (sysctl_handle_64(oidp, &freq, 0, req)); } /* * Return the difference between the timehands' counter value now and what * was when we copied it to the timehands' offset_count. */ static __inline u_int tc_delta(struct timehands *th) { struct timecounter *tc; tc = th->th_counter; return ((tc->tc_get_timecount(tc) - th->th_offset_count) & tc->tc_counter_mask); } /* * Functions for reading the time. We have to loop until we are sure that * the timehands that we operated on was not updated under our feet. See * the comment in for a description of these 12 functions. */ #ifdef FFCLOCK void fbclock_binuptime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; bintime_addx(bt, th->th_scale * tc_delta(th)); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_nanouptime(struct timespec *tsp) { struct bintime bt; fbclock_binuptime(&bt); bintime2timespec(&bt, tsp); } void fbclock_microuptime(struct timeval *tvp) { struct bintime bt; fbclock_binuptime(&bt); bintime2timeval(&bt, tvp); } void fbclock_bintime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); - *bt = th->th_offset; + *bt = th->th_bintime; bintime_addx(bt, th->th_scale * tc_delta(th)); - bintime_add(bt, &th->th_boottime); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_nanotime(struct timespec *tsp) { struct bintime bt; fbclock_bintime(&bt); bintime2timespec(&bt, tsp); } void fbclock_microtime(struct timeval *tvp) { struct bintime bt; fbclock_bintime(&bt); bintime2timeval(&bt, tvp); } void fbclock_getbinuptime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getnanouptime(struct timespec *tsp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timespec(&th->th_offset, tsp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getmicrouptime(struct timeval *tvp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timeval(&th->th_offset, tvp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getbintime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); - *bt = th->th_offset; - bintime_add(bt, &th->th_boottime); + *bt = th->th_bintime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getnanotime(struct timespec *tsp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tsp = th->th_nanotime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getmicrotime(struct timeval *tvp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tvp = th->th_microtime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } #else /* !FFCLOCK */ void binuptime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; bintime_addx(bt, th->th_scale * tc_delta(th)); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void nanouptime(struct timespec *tsp) { struct bintime bt; binuptime(&bt); bintime2timespec(&bt, tsp); } void microuptime(struct timeval *tvp) { struct bintime bt; binuptime(&bt); bintime2timeval(&bt, tvp); } void bintime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); - *bt = th->th_offset; + *bt = th->th_bintime; bintime_addx(bt, th->th_scale * tc_delta(th)); - bintime_add(bt, &th->th_boottime); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void nanotime(struct timespec *tsp) { struct bintime bt; bintime(&bt); bintime2timespec(&bt, tsp); } void microtime(struct timeval *tvp) { struct bintime bt; bintime(&bt); bintime2timeval(&bt, tvp); } void getbinuptime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getnanouptime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timespec(&th->th_offset, tsp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getmicrouptime(struct timeval *tvp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timeval(&th->th_offset, tvp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getbintime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); - *bt = th->th_offset; - bintime_add(bt, &th->th_boottime); + *bt = th->th_bintime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getnanotime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tsp = th->th_nanotime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getmicrotime(struct timeval *tvp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tvp = th->th_microtime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } #endif /* FFCLOCK */ void getboottime(struct timeval *boottime) { struct bintime boottimebin; getboottimebin(&boottimebin); bintime2timeval(&boottimebin, boottime); } void getboottimebin(struct bintime *boottimebin) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *boottimebin = th->th_boottime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } #ifdef FFCLOCK /* * Support for feed-forward synchronization algorithms. This is heavily inspired * by the timehands mechanism but kept independent from it. *_windup() functions * have some connection to avoid accessing the timecounter hardware more than * necessary. */ /* Feed-forward clock estimates kept updated by the synchronization daemon. */ struct ffclock_estimate ffclock_estimate; struct bintime ffclock_boottime; /* Feed-forward boot time estimate. */ uint32_t ffclock_status; /* Feed-forward clock status. */ int8_t ffclock_updated; /* New estimates are available. */ struct mtx ffclock_mtx; /* Mutex on ffclock_estimate. */ struct fftimehands { struct ffclock_estimate cest; struct bintime tick_time; struct bintime tick_time_lerp; ffcounter tick_ffcount; uint64_t period_lerp; volatile uint8_t gen; struct fftimehands *next; }; #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) static struct fftimehands ffth[10]; static struct fftimehands *volatile fftimehands = ffth; static void ffclock_init(void) { struct fftimehands *cur; struct fftimehands *last; memset(ffth, 0, sizeof(ffth)); last = ffth + NUM_ELEMENTS(ffth) - 1; for (cur = ffth; cur < last; cur++) cur->next = cur + 1; last->next = ffth; ffclock_updated = 0; ffclock_status = FFCLOCK_STA_UNSYNC; mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF); } /* * Reset the feed-forward clock estimates. Called from inittodr() to get things * kick started and uses the timecounter nominal frequency as a first period * estimate. Note: this function may be called several time just after boot. * Note: this is the only function that sets the value of boot time for the * monotonic (i.e. uptime) version of the feed-forward clock. */ void ffclock_reset_clock(struct timespec *ts) { struct timecounter *tc; struct ffclock_estimate cest; tc = timehands->th_counter; memset(&cest, 0, sizeof(struct ffclock_estimate)); timespec2bintime(ts, &ffclock_boottime); timespec2bintime(ts, &(cest.update_time)); ffclock_read_counter(&cest.update_ffcount); cest.leapsec_next = 0; cest.period = ((1ULL << 63) / tc->tc_frequency) << 1; cest.errb_abs = 0; cest.errb_rate = 0; cest.status = FFCLOCK_STA_UNSYNC; cest.leapsec_total = 0; cest.leapsec = 0; mtx_lock(&ffclock_mtx); bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); ffclock_updated = INT8_MAX; mtx_unlock(&ffclock_mtx); printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name, (unsigned long long)tc->tc_frequency, (long)ts->tv_sec, (unsigned long)ts->tv_nsec); } /* * Sub-routine to convert a time interval measured in RAW counter units to time * in seconds stored in bintime format. * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be * larger than the max value of u_int (on 32 bit architecture). Loop to consume * extra cycles. */ static void ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt) { struct bintime bt2; ffcounter delta, delta_max; delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1; bintime_clear(bt); do { if (ffdelta > delta_max) delta = delta_max; else delta = ffdelta; bt2.sec = 0; bt2.frac = period; bintime_mul(&bt2, (unsigned int)delta); bintime_add(bt, &bt2); ffdelta -= delta; } while (ffdelta > 0); } /* * Update the fftimehands. * Push the tick ffcount and time(s) forward based on current clock estimate. * The conversion from ffcounter to bintime relies on the difference clock * principle, whose accuracy relies on computing small time intervals. If a new * clock estimate has been passed by the synchronisation daemon, make it * current, and compute the linear interpolation for monotonic time if needed. */ static void ffclock_windup(unsigned int delta) { struct ffclock_estimate *cest; struct fftimehands *ffth; struct bintime bt, gap_lerp; ffcounter ffdelta; uint64_t frac; unsigned int polling; uint8_t forward_jump, ogen; /* * Pick the next timehand, copy current ffclock estimates and move tick * times and counter forward. */ forward_jump = 0; ffth = fftimehands->next; ogen = ffth->gen; ffth->gen = 0; cest = &ffth->cest; bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate)); ffdelta = (ffcounter)delta; ffth->period_lerp = fftimehands->period_lerp; ffth->tick_time = fftimehands->tick_time; ffclock_convert_delta(ffdelta, cest->period, &bt); bintime_add(&ffth->tick_time, &bt); ffth->tick_time_lerp = fftimehands->tick_time_lerp; ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt); bintime_add(&ffth->tick_time_lerp, &bt); ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta; /* * Assess the status of the clock, if the last update is too old, it is * likely the synchronisation daemon is dead and the clock is free * running. */ if (ffclock_updated == 0) { ffdelta = ffth->tick_ffcount - cest->update_ffcount; ffclock_convert_delta(ffdelta, cest->period, &bt); if (bt.sec > 2 * FFCLOCK_SKM_SCALE) ffclock_status |= FFCLOCK_STA_UNSYNC; } /* * If available, grab updated clock estimates and make them current. * Recompute time at this tick using the updated estimates. The clock * estimates passed the feed-forward synchronisation daemon may result * in time conversion that is not monotonically increasing (just after * the update). time_lerp is a particular linear interpolation over the * synchronisation algo polling period that ensures monotonicity for the * clock ids requesting it. */ if (ffclock_updated > 0) { bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate)); ffdelta = ffth->tick_ffcount - cest->update_ffcount; ffth->tick_time = cest->update_time; ffclock_convert_delta(ffdelta, cest->period, &bt); bintime_add(&ffth->tick_time, &bt); /* ffclock_reset sets ffclock_updated to INT8_MAX */ if (ffclock_updated == INT8_MAX) ffth->tick_time_lerp = ffth->tick_time; if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >)) forward_jump = 1; else forward_jump = 0; bintime_clear(&gap_lerp); if (forward_jump) { gap_lerp = ffth->tick_time; bintime_sub(&gap_lerp, &ffth->tick_time_lerp); } else { gap_lerp = ffth->tick_time_lerp; bintime_sub(&gap_lerp, &ffth->tick_time); } /* * The reset from the RTC clock may be far from accurate, and * reducing the gap between real time and interpolated time * could take a very long time if the interpolated clock insists * on strict monotonicity. The clock is reset under very strict * conditions (kernel time is known to be wrong and * synchronization daemon has been restarted recently. * ffclock_boottime absorbs the jump to ensure boot time is * correct and uptime functions stay consistent. */ if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) && ((cest->status & FFCLOCK_STA_UNSYNC) == 0) && ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) { if (forward_jump) bintime_add(&ffclock_boottime, &gap_lerp); else bintime_sub(&ffclock_boottime, &gap_lerp); ffth->tick_time_lerp = ffth->tick_time; bintime_clear(&gap_lerp); } ffclock_status = cest->status; ffth->period_lerp = cest->period; /* * Compute corrected period used for the linear interpolation of * time. The rate of linear interpolation is capped to 5000PPM * (5ms/s). */ if (bintime_isset(&gap_lerp)) { ffdelta = cest->update_ffcount; ffdelta -= fftimehands->cest.update_ffcount; ffclock_convert_delta(ffdelta, cest->period, &bt); polling = bt.sec; bt.sec = 0; bt.frac = 5000000 * (uint64_t)18446744073LL; bintime_mul(&bt, polling); if (bintime_cmp(&gap_lerp, &bt, >)) gap_lerp = bt; /* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */ frac = 0; if (gap_lerp.sec > 0) { frac -= 1; frac /= ffdelta / gap_lerp.sec; } frac += gap_lerp.frac / ffdelta; if (forward_jump) ffth->period_lerp += frac; else ffth->period_lerp -= frac; } ffclock_updated = 0; } if (++ogen == 0) ogen = 1; ffth->gen = ogen; fftimehands = ffth; } /* * Adjust the fftimehands when the timecounter is changed. Stating the obvious, * the old and new hardware counter cannot be read simultaneously. tc_windup() * does read the two counters 'back to back', but a few cycles are effectively * lost, and not accumulated in tick_ffcount. This is a fairly radical * operation for a feed-forward synchronization daemon, and it is its job to not * pushing irrelevant data to the kernel. Because there is no locking here, * simply force to ignore pending or next update to give daemon a chance to * realize the counter has changed. */ static void ffclock_change_tc(struct timehands *th) { struct fftimehands *ffth; struct ffclock_estimate *cest; struct timecounter *tc; uint8_t ogen; tc = th->th_counter; ffth = fftimehands->next; ogen = ffth->gen; ffth->gen = 0; cest = &ffth->cest; bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate)); cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1; cest->errb_abs = 0; cest->errb_rate = 0; cest->status |= FFCLOCK_STA_UNSYNC; ffth->tick_ffcount = fftimehands->tick_ffcount; ffth->tick_time_lerp = fftimehands->tick_time_lerp; ffth->tick_time = fftimehands->tick_time; ffth->period_lerp = cest->period; /* Do not lock but ignore next update from synchronization daemon. */ ffclock_updated--; if (++ogen == 0) ogen = 1; ffth->gen = ogen; fftimehands = ffth; } /* * Retrieve feed-forward counter and time of last kernel tick. */ void ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags) { struct fftimehands *ffth; uint8_t gen; /* * No locking but check generation has not changed. Also need to make * sure ffdelta is positive, i.e. ffcount > tick_ffcount. */ do { ffth = fftimehands; gen = ffth->gen; if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) *bt = ffth->tick_time_lerp; else *bt = ffth->tick_time; *ffcount = ffth->tick_ffcount; } while (gen == 0 || gen != ffth->gen); } /* * Absolute clock conversion. Low level function to convert ffcounter to * bintime. The ffcounter is converted using the current ffclock period estimate * or the "interpolated period" to ensure monotonicity. * NOTE: this conversion may have been deferred, and the clock updated since the * hardware counter has been read. */ void ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags) { struct fftimehands *ffth; struct bintime bt2; ffcounter ffdelta; uint8_t gen; /* * No locking but check generation has not changed. Also need to make * sure ffdelta is positive, i.e. ffcount > tick_ffcount. */ do { ffth = fftimehands; gen = ffth->gen; if (ffcount > ffth->tick_ffcount) ffdelta = ffcount - ffth->tick_ffcount; else ffdelta = ffth->tick_ffcount - ffcount; if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) { *bt = ffth->tick_time_lerp; ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2); } else { *bt = ffth->tick_time; ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2); } if (ffcount > ffth->tick_ffcount) bintime_add(bt, &bt2); else bintime_sub(bt, &bt2); } while (gen == 0 || gen != ffth->gen); } /* * Difference clock conversion. * Low level function to Convert a time interval measured in RAW counter units * into bintime. The difference clock allows measuring small intervals much more * reliably than the absolute clock. */ void ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt) { struct fftimehands *ffth; uint8_t gen; /* No locking but check generation has not changed. */ do { ffth = fftimehands; gen = ffth->gen; ffclock_convert_delta(ffdelta, ffth->cest.period, bt); } while (gen == 0 || gen != ffth->gen); } /* * Access to current ffcounter value. */ void ffclock_read_counter(ffcounter *ffcount) { struct timehands *th; struct fftimehands *ffth; unsigned int gen, delta; /* * ffclock_windup() called from tc_windup(), safe to rely on * th->th_generation only, for correct delta and ffcounter. */ do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); ffth = fftimehands; delta = tc_delta(th); *ffcount = ffth->tick_ffcount; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); *ffcount += delta; } void binuptime(struct bintime *bt) { binuptime_fromclock(bt, sysclock_active); } void nanouptime(struct timespec *tsp) { nanouptime_fromclock(tsp, sysclock_active); } void microuptime(struct timeval *tvp) { microuptime_fromclock(tvp, sysclock_active); } void bintime(struct bintime *bt) { bintime_fromclock(bt, sysclock_active); } void nanotime(struct timespec *tsp) { nanotime_fromclock(tsp, sysclock_active); } void microtime(struct timeval *tvp) { microtime_fromclock(tvp, sysclock_active); } void getbinuptime(struct bintime *bt) { getbinuptime_fromclock(bt, sysclock_active); } void getnanouptime(struct timespec *tsp) { getnanouptime_fromclock(tsp, sysclock_active); } void getmicrouptime(struct timeval *tvp) { getmicrouptime_fromclock(tvp, sysclock_active); } void getbintime(struct bintime *bt) { getbintime_fromclock(bt, sysclock_active); } void getnanotime(struct timespec *tsp) { getnanotime_fromclock(tsp, sysclock_active); } void getmicrotime(struct timeval *tvp) { getmicrouptime_fromclock(tvp, sysclock_active); } #endif /* FFCLOCK */ /* * This is a clone of getnanotime and used for walltimestamps. * The dtrace_ prefix prevents fbt from creating probes for * it so walltimestamp can be safely used in all fbt probes. */ void dtrace_getnanotime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tsp = th->th_nanotime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } /* * System clock currently providing time to the system. Modifiable via sysctl * when the FFCLOCK option is defined. */ int sysclock_active = SYSCLOCK_FBCK; /* Internal NTP status and error estimates. */ extern int time_status; extern long time_esterror; /* * Take a snapshot of sysclock data which can be used to compare system clocks * and generate timestamps after the fact. */ void sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast) { struct fbclock_info *fbi; struct timehands *th; struct bintime bt; unsigned int delta, gen; #ifdef FFCLOCK ffcounter ffcount; struct fftimehands *ffth; struct ffclock_info *ffi; struct ffclock_estimate cest; ffi = &clock_snap->ff_info; #endif fbi = &clock_snap->fb_info; delta = 0; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); fbi->th_scale = th->th_scale; fbi->tick_time = th->th_offset; #ifdef FFCLOCK ffth = fftimehands; ffi->tick_time = ffth->tick_time_lerp; ffi->tick_time_lerp = ffth->tick_time_lerp; ffi->period = ffth->cest.period; ffi->period_lerp = ffth->period_lerp; clock_snap->ffcount = ffth->tick_ffcount; cest = ffth->cest; #endif if (!fast) delta = tc_delta(th); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); clock_snap->delta = delta; clock_snap->sysclock_active = sysclock_active; /* Record feedback clock status and error. */ clock_snap->fb_info.status = time_status; /* XXX: Very crude estimate of feedback clock error. */ bt.sec = time_esterror / 1000000; bt.frac = ((time_esterror - bt.sec) * 1000000) * (uint64_t)18446744073709ULL; clock_snap->fb_info.error = bt; #ifdef FFCLOCK if (!fast) clock_snap->ffcount += delta; /* Record feed-forward clock leap second adjustment. */ ffi->leapsec_adjustment = cest.leapsec_total; if (clock_snap->ffcount > cest.leapsec_next) ffi->leapsec_adjustment -= cest.leapsec; /* Record feed-forward clock status and error. */ clock_snap->ff_info.status = cest.status; ffcount = clock_snap->ffcount - cest.update_ffcount; ffclock_convert_delta(ffcount, cest.period, &bt); /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */ bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL); /* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */ bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL); clock_snap->ff_info.error = bt; #endif } /* * Convert a sysclock snapshot into a struct bintime based on the specified * clock source and flags. */ int sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt, int whichclock, uint32_t flags) { struct bintime boottimebin; #ifdef FFCLOCK struct bintime bt2; uint64_t period; #endif switch (whichclock) { case SYSCLOCK_FBCK: *bt = cs->fb_info.tick_time; /* If snapshot was created with !fast, delta will be >0. */ if (cs->delta > 0) bintime_addx(bt, cs->fb_info.th_scale * cs->delta); if ((flags & FBCLOCK_UPTIME) == 0) { getboottimebin(&boottimebin); bintime_add(bt, &boottimebin); } break; #ifdef FFCLOCK case SYSCLOCK_FFWD: if (flags & FFCLOCK_LERP) { *bt = cs->ff_info.tick_time_lerp; period = cs->ff_info.period_lerp; } else { *bt = cs->ff_info.tick_time; period = cs->ff_info.period; } /* If snapshot was created with !fast, delta will be >0. */ if (cs->delta > 0) { ffclock_convert_delta(cs->delta, period, &bt2); bintime_add(bt, &bt2); } /* Leap second adjustment. */ if (flags & FFCLOCK_LEAPSEC) bt->sec -= cs->ff_info.leapsec_adjustment; /* Boot time adjustment, for uptime/monotonic clocks. */ if (flags & FFCLOCK_UPTIME) bintime_sub(bt, &ffclock_boottime); break; #endif default: return (EINVAL); break; } return (0); } /* * Initialize a new timecounter and possibly use it. */ void tc_init(struct timecounter *tc) { u_int u; struct sysctl_oid *tc_root; u = tc->tc_frequency / tc->tc_counter_mask; /* XXX: We need some margin here, 10% is a guess */ u *= 11; u /= 10; if (u > hz && tc->tc_quality >= 0) { tc->tc_quality = -2000; if (bootverbose) { printf("Timecounter \"%s\" frequency %ju Hz", tc->tc_name, (uintmax_t)tc->tc_frequency); printf(" -- Insufficient hz, needs at least %u\n", u); } } else if (tc->tc_quality >= 0 || bootverbose) { printf("Timecounter \"%s\" frequency %ju Hz quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency, tc->tc_quality); } tc->tc_next = timecounters; timecounters = tc; /* * Set up sysctl tree for this counter. */ tc_root = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name, CTLFLAG_RW, 0, "timecounter description"); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0, "mask for implemented bits"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc), sysctl_kern_timecounter_get, "IU", "current timecounter value"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc), sysctl_kern_timecounter_freq, "QU", "timecounter frequency"); SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "quality", CTLFLAG_RD, &(tc->tc_quality), 0, "goodness of time counter"); /* * Do not automatically switch if the current tc was specifically * chosen. Never automatically use a timecounter with negative quality. * Even though we run on the dummy counter, switching here may be * worse since this timecounter may not be monotonic. */ if (tc_chosen) return; if (tc->tc_quality < 0) return; if (tc->tc_quality < timecounter->tc_quality) return; if (tc->tc_quality == timecounter->tc_quality && tc->tc_frequency < timecounter->tc_frequency) return; (void)tc->tc_get_timecount(tc); (void)tc->tc_get_timecount(tc); timecounter = tc; } /* Report the frequency of the current timecounter. */ uint64_t tc_getfrequency(void) { return (timehands->th_counter->tc_frequency); } static struct mtx tc_setclock_mtx; MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); /* * Step our concept of UTC. This is done by modifying our estimate of * when we booted. */ void tc_setclock(struct timespec *ts) { struct timespec tbef, taft; struct bintime bt, bt2; timespec2bintime(ts, &bt); nanotime(&tbef); mtx_lock_spin(&tc_setclock_mtx); cpu_tick_calibrate(1); binuptime(&bt2); bintime_sub(&bt, &bt2); /* XXX fiddle all the little crinkly bits around the fiords... */ tc_windup(&bt); mtx_unlock_spin(&tc_setclock_mtx); if (timestepwarnings) { nanotime(&taft); log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", (intmax_t)tbef.tv_sec, tbef.tv_nsec, (intmax_t)taft.tv_sec, taft.tv_nsec, (intmax_t)ts->tv_sec, ts->tv_nsec); } } /* * Initialize the next struct timehands in the ring and make * it the active timehands. Along the way we might switch to a different * timecounter and/or do seconds processing in NTP. Slightly magic. */ static void tc_windup(struct bintime *new_boottimebin) { struct bintime bt; struct timehands *th, *tho; uint64_t scale; u_int delta, ncount, ogen; int i; time_t t; /* * Make the next timehands a copy of the current one, but do * not overwrite the generation or next pointer. While we * update the contents, the generation must be zero. We need * to ensure that the zero generation is visible before the * data updates become visible, which requires release fence. * For similar reasons, re-reading of the generation after the * data is read should use acquire fence. */ tho = timehands; th = tho->th_next; ogen = th->th_generation; th->th_generation = 0; atomic_thread_fence_rel(); bcopy(tho, th, offsetof(struct timehands, th_generation)); if (new_boottimebin != NULL) th->th_boottime = *new_boottimebin; /* * Capture a timecounter delta on the current timecounter and if * changing timecounters, a counter value from the new timecounter. * Update the offset fields accordingly. */ delta = tc_delta(th); if (th->th_counter != timecounter) ncount = timecounter->tc_get_timecount(timecounter); else ncount = 0; #ifdef FFCLOCK ffclock_windup(delta); #endif th->th_offset_count += delta; th->th_offset_count &= th->th_counter->tc_counter_mask; while (delta > th->th_counter->tc_frequency) { /* Eat complete unadjusted seconds. */ delta -= th->th_counter->tc_frequency; th->th_offset.sec++; } if ((delta > th->th_counter->tc_frequency / 2) && (th->th_scale * delta < ((uint64_t)1 << 63))) { /* The product th_scale * delta just barely overflows. */ th->th_offset.sec++; } bintime_addx(&th->th_offset, th->th_scale * delta); /* * Hardware latching timecounters may not generate interrupts on * PPS events, so instead we poll them. There is a finite risk that * the hardware might capture a count which is later than the one we * got above, and therefore possibly in the next NTP second which might * have a different rate than the current NTP second. It doesn't * matter in practice. */ if (tho->th_counter->tc_poll_pps) tho->th_counter->tc_poll_pps(tho->th_counter); /* * Deal with NTP second processing. The for loop normally * iterates at most once, but in extreme situations it might * keep NTP sane if timeouts are not run for several seconds. * At boot, the time step can be large when the TOD hardware * has been read, so on really large steps, we call * ntp_update_second only twice. We need to call it twice in * case we missed a leap second. */ bt = th->th_offset; bintime_add(&bt, &th->th_boottime); i = bt.sec - tho->th_microtime.tv_sec; if (i > LARGE_STEP) i = 2; for (; i > 0; i--) { t = bt.sec; ntp_update_second(&th->th_adjustment, &bt.sec); if (bt.sec != t) th->th_boottime.sec += bt.sec - t; } + th->th_bintime = th->th_offset; + bintime_add(&th->th_bintime, &th->th_boottime); /* Update the UTC timestamps used by the get*() functions. */ /* XXX shouldn't do this here. Should force non-`get' versions. */ bintime2timeval(&bt, &th->th_microtime); bintime2timespec(&bt, &th->th_nanotime); /* Now is a good time to change timecounters. */ if (th->th_counter != timecounter) { #ifndef __arm__ if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0) cpu_disable_c2_sleep++; if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0) cpu_disable_c2_sleep--; #endif th->th_counter = timecounter; th->th_offset_count = ncount; tc_min_ticktock_freq = max(1, timecounter->tc_frequency / (((uint64_t)timecounter->tc_counter_mask + 1) / 3)); #ifdef FFCLOCK ffclock_change_tc(th); #endif } /*- * Recalculate the scaling factor. We want the number of 1/2^64 * fractions of a second per period of the hardware counter, taking * into account the th_adjustment factor which the NTP PLL/adjtime(2) * processing provides us with. * * The th_adjustment is nanoseconds per second with 32 bit binary * fraction and we want 64 bit binary fraction of second: * * x = a * 2^32 / 10^9 = a * 4.294967296 * * The range of th_adjustment is +/- 5000PPM so inside a 64bit int * we can only multiply by about 850 without overflowing, that * leaves no suitably precise fractions for multiply before divide. * * Divide before multiply with a fraction of 2199/512 results in a * systematic undercompensation of 10PPM of th_adjustment. On a * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. * * We happily sacrifice the lowest of the 64 bits of our result * to the goddess of code clarity. * */ scale = (uint64_t)1 << 63; scale += (th->th_adjustment / 1024) * 2199; scale /= th->th_counter->tc_frequency; th->th_scale = scale * 2; /* * Now that the struct timehands is again consistent, set the new * generation number, making sure to not make it zero. */ if (++ogen == 0) ogen = 1; atomic_store_rel_int(&th->th_generation, ogen); /* Go live with the new struct timehands. */ #ifdef FFCLOCK switch (sysclock_active) { case SYSCLOCK_FBCK: #endif time_second = th->th_microtime.tv_sec; time_uptime = th->th_offset.sec; #ifdef FFCLOCK break; case SYSCLOCK_FFWD: time_second = fftimehands->tick_time_lerp.sec; time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec; break; } #endif timehands = th; timekeep_push_vdso(); } /* Report or change the active timecounter hardware. */ static int sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) { char newname[32]; struct timecounter *newtc, *tc; int error; tc = timecounter; strlcpy(newname, tc->tc_name, sizeof(newname)); error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); if (error != 0 || req->newptr == NULL) return (error); /* Record that the tc in use now was specifically chosen. */ tc_chosen = 1; if (strcmp(newname, tc->tc_name) == 0) return (0); for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { if (strcmp(newname, newtc->tc_name) != 0) continue; /* Warm up new timecounter. */ (void)newtc->tc_get_timecount(newtc); (void)newtc->tc_get_timecount(newtc); timecounter = newtc; /* * The vdso timehands update is deferred until the next * 'tc_windup()'. * * This is prudent given that 'timekeep_push_vdso()' does not * use any locking and that it can be called in hard interrupt * context via 'tc_windup()'. */ return (0); } return (EINVAL); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_kern_timecounter_hardware, "A", "Timecounter hardware selected"); /* Report the available timecounter hardware. */ static int sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS) { struct sbuf sb; struct timecounter *tc; int error; sbuf_new_for_sysctl(&sb, NULL, 0, req); for (tc = timecounters; tc != NULL; tc = tc->tc_next) { if (tc != timecounters) sbuf_putc(&sb, ' '); sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality); } error = sbuf_finish(&sb); sbuf_delete(&sb); return (error); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected"); /* * RFC 2783 PPS-API implementation. */ /* * Return true if the driver is aware of the abi version extensions in the * pps_state structure, and it supports at least the given abi version number. */ static inline int abi_aware(struct pps_state *pps, int vers) { return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers); } static int pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps) { int err, timo; pps_seq_t aseq, cseq; struct timeval tv; if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); /* * If no timeout is requested, immediately return whatever values were * most recently captured. If timeout seconds is -1, that's a request * to block without a timeout. WITNESS won't let us sleep forever * without a lock (we really don't need a lock), so just repeatedly * sleep a long time. */ if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) { if (fapi->timeout.tv_sec == -1) timo = 0x7fffffff; else { tv.tv_sec = fapi->timeout.tv_sec; tv.tv_usec = fapi->timeout.tv_nsec / 1000; timo = tvtohz(&tv); } aseq = pps->ppsinfo.assert_sequence; cseq = pps->ppsinfo.clear_sequence; while (aseq == pps->ppsinfo.assert_sequence && cseq == pps->ppsinfo.clear_sequence) { if (abi_aware(pps, 1) && pps->driver_mtx != NULL) { if (pps->flags & PPSFLAG_MTX_SPIN) { err = msleep_spin(pps, pps->driver_mtx, "ppsfch", timo); } else { err = msleep(pps, pps->driver_mtx, PCATCH, "ppsfch", timo); } } else { err = tsleep(pps, PCATCH, "ppsfch", timo); } if (err == EWOULDBLOCK) { if (fapi->timeout.tv_sec == -1) { continue; } else { return (ETIMEDOUT); } } else if (err != 0) { return (err); } } } pps->ppsinfo.current_mode = pps->ppsparam.mode; fapi->pps_info_buf = pps->ppsinfo; return (0); } int pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) { pps_params_t *app; struct pps_fetch_args *fapi; #ifdef FFCLOCK struct pps_fetch_ffc_args *fapi_ffc; #endif #ifdef PPS_SYNC struct pps_kcbind_args *kapi; #endif KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl")); switch (cmd) { case PPS_IOC_CREATE: return (0); case PPS_IOC_DESTROY: return (0); case PPS_IOC_SETPARAMS: app = (pps_params_t *)data; if (app->mode & ~pps->ppscap) return (EINVAL); #ifdef FFCLOCK /* Ensure only a single clock is selected for ffc timestamp. */ if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK) return (EINVAL); #endif pps->ppsparam = *app; return (0); case PPS_IOC_GETPARAMS: app = (pps_params_t *)data; *app = pps->ppsparam; app->api_version = PPS_API_VERS_1; return (0); case PPS_IOC_GETCAP: *(int*)data = pps->ppscap; return (0); case PPS_IOC_FETCH: fapi = (struct pps_fetch_args *)data; return (pps_fetch(fapi, pps)); #ifdef FFCLOCK case PPS_IOC_FETCH_FFCOUNTER: fapi_ffc = (struct pps_fetch_ffc_args *)data; if (fapi_ffc->tsformat && fapi_ffc->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec) return (EOPNOTSUPP); pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode; fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc; /* Overwrite timestamps if feedback clock selected. */ switch (pps->ppsparam.mode & PPS_TSCLK_MASK) { case PPS_TSCLK_FBCK: fapi_ffc->pps_info_buf_ffc.assert_timestamp = pps->ppsinfo.assert_timestamp; fapi_ffc->pps_info_buf_ffc.clear_timestamp = pps->ppsinfo.clear_timestamp; break; case PPS_TSCLK_FFWD: break; default: break; } return (0); #endif /* FFCLOCK */ case PPS_IOC_KCBIND: #ifdef PPS_SYNC kapi = (struct pps_kcbind_args *)data; /* XXX Only root should be able to do this */ if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (kapi->kernel_consumer != PPS_KC_HARDPPS) return (EINVAL); if (kapi->edge & ~pps->ppscap) return (EINVAL); pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) | (pps->kcmode & KCMODE_ABIFLAG); return (0); #else return (EOPNOTSUPP); #endif default: return (ENOIOCTL); } } void pps_init(struct pps_state *pps) { pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT; if (pps->ppscap & PPS_CAPTUREASSERT) pps->ppscap |= PPS_OFFSETASSERT; if (pps->ppscap & PPS_CAPTURECLEAR) pps->ppscap |= PPS_OFFSETCLEAR; #ifdef FFCLOCK pps->ppscap |= PPS_TSCLK_MASK; #endif pps->kcmode &= ~KCMODE_ABIFLAG; } void pps_init_abi(struct pps_state *pps) { pps_init(pps); if (pps->driver_abi > 0) { pps->kcmode |= KCMODE_ABIFLAG; pps->kernel_abi = PPS_ABI_VERSION; } } void pps_capture(struct pps_state *pps) { struct timehands *th; KASSERT(pps != NULL, ("NULL pps pointer in pps_capture")); th = timehands; pps->capgen = atomic_load_acq_int(&th->th_generation); pps->capth = th; #ifdef FFCLOCK pps->capffth = fftimehands; #endif pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); atomic_thread_fence_acq(); if (pps->capgen != th->th_generation) pps->capgen = 0; } void pps_event(struct pps_state *pps, int event) { struct bintime bt; struct timespec ts, *tsp, *osp; u_int tcount, *pcount; int foff; pps_seq_t *pseq; #ifdef FFCLOCK struct timespec *tsp_ffc; pps_seq_t *pseq_ffc; ffcounter *ffcount; #endif #ifdef PPS_SYNC int fhard; #endif KASSERT(pps != NULL, ("NULL pps pointer in pps_event")); /* Nothing to do if not currently set to capture this event type. */ if ((event & pps->ppsparam.mode) == 0) return; /* If the timecounter was wound up underneath us, bail out. */ if (pps->capgen == 0 || pps->capgen != atomic_load_acq_int(&pps->capth->th_generation)) return; /* Things would be easier with arrays. */ if (event == PPS_CAPTUREASSERT) { tsp = &pps->ppsinfo.assert_timestamp; osp = &pps->ppsparam.assert_offset; foff = pps->ppsparam.mode & PPS_OFFSETASSERT; #ifdef PPS_SYNC fhard = pps->kcmode & PPS_CAPTUREASSERT; #endif pcount = &pps->ppscount[0]; pseq = &pps->ppsinfo.assert_sequence; #ifdef FFCLOCK ffcount = &pps->ppsinfo_ffc.assert_ffcount; tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp; pseq_ffc = &pps->ppsinfo_ffc.assert_sequence; #endif } else { tsp = &pps->ppsinfo.clear_timestamp; osp = &pps->ppsparam.clear_offset; foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; #ifdef PPS_SYNC fhard = pps->kcmode & PPS_CAPTURECLEAR; #endif pcount = &pps->ppscount[1]; pseq = &pps->ppsinfo.clear_sequence; #ifdef FFCLOCK ffcount = &pps->ppsinfo_ffc.clear_ffcount; tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp; pseq_ffc = &pps->ppsinfo_ffc.clear_sequence; #endif } /* * If the timecounter changed, we cannot compare the count values, so * we have to drop the rest of the PPS-stuff until the next event. */ if (pps->ppstc != pps->capth->th_counter) { pps->ppstc = pps->capth->th_counter; *pcount = pps->capcount; pps->ppscount[2] = pps->capcount; return; } /* Convert the count to a timespec. */ tcount = pps->capcount - pps->capth->th_offset_count; tcount &= pps->capth->th_counter->tc_counter_mask; - bt = pps->capth->th_offset; + bt = pps->capth->th_bintime; bintime_addx(&bt, pps->capth->th_scale * tcount); - bintime_add(&bt, &pps->capth->th_boottime); bintime2timespec(&bt, &ts); /* If the timecounter was wound up underneath us, bail out. */ atomic_thread_fence_acq(); if (pps->capgen != pps->capth->th_generation) return; *pcount = pps->capcount; (*pseq)++; *tsp = ts; if (foff) { timespecadd(tsp, osp); if (tsp->tv_nsec < 0) { tsp->tv_nsec += 1000000000; tsp->tv_sec -= 1; } } #ifdef FFCLOCK *ffcount = pps->capffth->tick_ffcount + tcount; bt = pps->capffth->tick_time; ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt); bintime_add(&bt, &pps->capffth->tick_time); bintime2timespec(&bt, &ts); (*pseq_ffc)++; *tsp_ffc = ts; #endif #ifdef PPS_SYNC if (fhard) { uint64_t scale; /* * Feed the NTP PLL/FLL. * The FLL wants to know how many (hardware) nanoseconds * elapsed since the previous event. */ tcount = pps->capcount - pps->ppscount[2]; pps->ppscount[2] = pps->capcount; tcount &= pps->capth->th_counter->tc_counter_mask; scale = (uint64_t)1 << 63; scale /= pps->capth->th_counter->tc_frequency; scale *= 2; bt.sec = 0; bt.frac = 0; bintime_addx(&bt, scale * tcount); bintime2timespec(&bt, &ts); hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); } #endif /* Wakeup anyone sleeping in pps_fetch(). */ wakeup(pps); } /* * Timecounters need to be updated every so often to prevent the hardware * counter from overflowing. Updating also recalculates the cached values * used by the get*() family of functions, so their precision depends on * the update frequency. */ static int tc_tick; SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, "Approximate number of hardclock ticks in a millisecond"); void tc_ticktock(int cnt) { static int count; if (mtx_trylock_spin(&tc_setclock_mtx)) { count += cnt; if (count >= tc_tick) { count = 0; tc_windup(NULL); } mtx_unlock_spin(&tc_setclock_mtx); } } static void __inline tc_adjprecision(void) { int t; if (tc_timepercentage > 0) { t = (99 + tc_timepercentage) / tc_timepercentage; tc_precexp = fls(t + (t >> 1)) - 1; FREQ2BT(hz / tc_tick, &bt_timethreshold); FREQ2BT(hz, &bt_tickthreshold); bintime_shift(&bt_timethreshold, tc_precexp); bintime_shift(&bt_tickthreshold, tc_precexp); } else { tc_precexp = 31; bt_timethreshold.sec = INT_MAX; bt_timethreshold.frac = ~(uint64_t)0; bt_tickthreshold = bt_timethreshold; } sbt_timethreshold = bttosbt(bt_timethreshold); sbt_tickthreshold = bttosbt(bt_tickthreshold); } static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS) { int error, val; val = tc_timepercentage; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); tc_timepercentage = val; if (cold) goto done; tc_adjprecision(); done: return (0); } static void inittimecounter(void *dummy) { u_int p; int tick_rate; /* * Set the initial timeout to * max(1, ). * People should probably not use the sysctl to set the timeout * to smaller than its initial value, since that value is the * smallest reasonable one. If they want better timestamps they * should use the non-"get"* functions. */ if (hz > 1000) tc_tick = (hz + 500) / 1000; else tc_tick = 1; tc_adjprecision(); FREQ2BT(hz, &tick_bt); tick_sbt = bttosbt(tick_bt); tick_rate = hz / tc_tick; FREQ2BT(tick_rate, &tc_tick_bt); tc_tick_sbt = bttosbt(tc_tick_bt); p = (tc_tick * 1000000) / hz; printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); #ifdef FFCLOCK ffclock_init(); #endif /* warm up new timecounter (again) and get rolling. */ (void)timecounter->tc_get_timecount(timecounter); (void)timecounter->tc_get_timecount(timecounter); mtx_lock_spin(&tc_setclock_mtx); tc_windup(NULL); mtx_unlock_spin(&tc_setclock_mtx); } SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); /* Cpu tick handling -------------------------------------------------*/ static int cpu_tick_variable; static uint64_t cpu_tick_frequency; static DPCPU_DEFINE(uint64_t, tc_cpu_ticks_base); static DPCPU_DEFINE(unsigned, tc_cpu_ticks_last); static uint64_t tc_cpu_ticks(void) { struct timecounter *tc; uint64_t res, *base; unsigned u, *last; critical_enter(); base = DPCPU_PTR(tc_cpu_ticks_base); last = DPCPU_PTR(tc_cpu_ticks_last); tc = timehands->th_counter; u = tc->tc_get_timecount(tc) & tc->tc_counter_mask; if (u < *last) *base += (uint64_t)tc->tc_counter_mask + 1; *last = u; res = u + *base; critical_exit(); return (res); } void cpu_tick_calibration(void) { static time_t last_calib; if (time_uptime != last_calib && !(time_uptime & 0xf)) { cpu_tick_calibrate(0); last_calib = time_uptime; } } /* * This function gets called every 16 seconds on only one designated * CPU in the system from hardclock() via cpu_tick_calibration()(). * * Whenever the real time clock is stepped we get called with reset=1 * to make sure we handle suspend/resume and similar events correctly. */ static void cpu_tick_calibrate(int reset) { static uint64_t c_last; uint64_t c_this, c_delta; static struct bintime t_last; struct bintime t_this, t_delta; uint32_t divi; if (reset) { /* The clock was stepped, abort & reset */ t_last.sec = 0; return; } /* we don't calibrate fixed rate cputicks */ if (!cpu_tick_variable) return; getbinuptime(&t_this); c_this = cpu_ticks(); if (t_last.sec != 0) { c_delta = c_this - c_last; t_delta = t_this; bintime_sub(&t_delta, &t_last); /* * Headroom: * 2^(64-20) / 16[s] = * 2^(44) / 16[s] = * 17.592.186.044.416 / 16 = * 1.099.511.627.776 [Hz] */ divi = t_delta.sec << 20; divi |= t_delta.frac >> (64 - 20); c_delta <<= 20; c_delta /= divi; if (c_delta > cpu_tick_frequency) { if (0 && bootverbose) printf("cpu_tick increased to %ju Hz\n", c_delta); cpu_tick_frequency = c_delta; } } c_last = c_this; t_last = t_this; } void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) { if (func == NULL) { cpu_ticks = tc_cpu_ticks; } else { cpu_tick_frequency = freq; cpu_tick_variable = var; cpu_ticks = func; } } uint64_t cpu_tickrate(void) { if (cpu_ticks == tc_cpu_ticks) return (tc_getfrequency()); return (cpu_tick_frequency); } /* * We need to be slightly careful converting cputicks to microseconds. * There is plenty of margin in 64 bits of microseconds (half a million * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply * before divide conversion (to retain precision) we find that the * margin shrinks to 1.5 hours (one millionth of 146y). * With a three prong approach we never lose significant bits, no * matter what the cputick rate and length of timeinterval is. */ uint64_t cputick2usec(uint64_t tick) { if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ return (tick / (cpu_tickrate() / 1000000LL)); else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); else return ((tick * 1000000LL) / cpu_tickrate()); } cpu_tick_f *cpu_ticks = tc_cpu_ticks; static int vdso_th_enable = 1; static int sysctl_fast_gettime(SYSCTL_HANDLER_ARGS) { int old_vdso_th_enable, error; old_vdso_th_enable = vdso_th_enable; error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req); if (error != 0) return (error); vdso_th_enable = old_vdso_th_enable; return (0); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day"); uint32_t tc_fill_vdso_timehands(struct vdso_timehands *vdso_th) { struct timehands *th; uint32_t enabled; th = timehands; vdso_th->th_algo = VDSO_TH_ALGO_1; vdso_th->th_scale = th->th_scale; vdso_th->th_offset_count = th->th_offset_count; vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; vdso_th->th_offset = th->th_offset; vdso_th->th_boottime = th->th_boottime; enabled = cpu_fill_vdso_timehands(vdso_th, th->th_counter); if (!vdso_th_enable) enabled = 0; return (enabled); } #ifdef COMPAT_FREEBSD32 uint32_t tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) { struct timehands *th; uint32_t enabled; th = timehands; vdso_th32->th_algo = VDSO_TH_ALGO_1; *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale; vdso_th32->th_offset_count = th->th_offset_count; vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; vdso_th32->th_offset.sec = th->th_offset.sec; *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; vdso_th32->th_boottime.sec = th->th_boottime.sec; *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac; enabled = cpu_fill_vdso_timehands32(vdso_th32, th->th_counter); if (!vdso_th_enable) enabled = 0; return (enabled); } #endif Index: user/alc/PQ_LAUNDRY/sys/mips/atheros/ar71xx_gpio.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/mips/atheros/ar71xx_gpio.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/mips/atheros/ar71xx_gpio.c (revision 303642) @@ -1,589 +1,637 @@ /*- * Copyright (c) 2009, Oleksandr Tymoshenko * Copyright (c) 2009, Luiz Otavio O Souza. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * GPIO driver for AR71xx */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gpio_if.h" #define DEFAULT_CAPS (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT) /* * Helpers */ static void ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, uint32_t mask); static void ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, uint32_t mask); static void ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin, uint32_t flags); /* * Driver stuff */ static int ar71xx_gpio_probe(device_t dev); static int ar71xx_gpio_attach(device_t dev); static int ar71xx_gpio_detach(device_t dev); static int ar71xx_gpio_filter(void *arg); static void ar71xx_gpio_intr(void *arg); /* * GPIO interface */ static device_t ar71xx_gpio_get_bus(device_t); static int ar71xx_gpio_pin_max(device_t dev, int *maxpin); static int ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps); static int ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags); static int ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name); static int ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags); static int ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value); static int ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val); static int ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin); /* * Enable/disable the GPIO function control space. * * This is primarily for the AR71xx, which has SPI CS1/CS2, UART, SLIC, I2S * as GPIO pin options. */ static void ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, uint32_t mask) { /* * XXX TODO: refactor this out into a per-chipset method. */ if (ar71xx_soc == AR71XX_SOC_AR9341 || ar71xx_soc == AR71XX_SOC_AR9342 || ar71xx_soc == AR71XX_SOC_AR9344 || ar71xx_soc == AR71XX_SOC_QCA9533 || ar71xx_soc == AR71XX_SOC_QCA9533_V2 || ar71xx_soc == AR71XX_SOC_QCA9556 || ar71xx_soc == AR71XX_SOC_QCA9558) GPIO_SET_BITS(sc, AR934X_GPIO_REG_FUNC, mask); else GPIO_SET_BITS(sc, AR71XX_GPIO_FUNCTION, mask); } static void ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, uint32_t mask) { /* * XXX TODO: refactor this out into a per-chipset method. */ if (ar71xx_soc == AR71XX_SOC_AR9341 || ar71xx_soc == AR71XX_SOC_AR9342 || ar71xx_soc == AR71XX_SOC_AR9344 || ar71xx_soc == AR71XX_SOC_QCA9533 || ar71xx_soc == AR71XX_SOC_QCA9533_V2 || ar71xx_soc == AR71XX_SOC_QCA9556 || ar71xx_soc == AR71XX_SOC_QCA9558) GPIO_CLEAR_BITS(sc, AR934X_GPIO_REG_FUNC, mask); else GPIO_CLEAR_BITS(sc, AR71XX_GPIO_FUNCTION, mask); } +/* + * On most platforms, GPIO_OE is a bitmap where the bit set + * means "enable output." + * + * On AR934x and QCA953x, it's the opposite - the bit set means + * "input enable". + */ +static int +ar71xx_gpio_oe_is_high(void) +{ + switch (ar71xx_soc) { + case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: + return 0; + default: + return 1; + } +} + static void -ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin, - unsigned int flags) +ar71xx_gpio_oe_set_output(struct ar71xx_gpio_softc *sc, int b) { uint32_t mask; - mask = 1 << pin->gp_pin; + mask = 1 << b; + if (ar71xx_gpio_oe_is_high()) + GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask); + else + GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask); +} + +static void +ar71xx_gpio_oe_set_input(struct ar71xx_gpio_softc *sc, int b) +{ + uint32_t mask; + + mask = 1 << b; + + if (ar71xx_gpio_oe_is_high()) + GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask); + else + GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask); +} + +static void +ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin, + unsigned int flags) +{ + /* * Manage input/output */ if (flags & (GPIO_PIN_INPUT|GPIO_PIN_OUTPUT)) { pin->gp_flags &= ~(GPIO_PIN_INPUT|GPIO_PIN_OUTPUT); if (flags & GPIO_PIN_OUTPUT) { pin->gp_flags |= GPIO_PIN_OUTPUT; - GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask); - } - else { + ar71xx_gpio_oe_set_output(sc, pin->gp_pin); + } else { pin->gp_flags |= GPIO_PIN_INPUT; - GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask); + ar71xx_gpio_oe_set_input(sc, pin->gp_pin); } } } static device_t ar71xx_gpio_get_bus(device_t dev) { struct ar71xx_gpio_softc *sc; sc = device_get_softc(dev); return (sc->busdev); } static int ar71xx_gpio_pin_max(device_t dev, int *maxpin) { switch (ar71xx_soc) { case AR71XX_SOC_AR9130: case AR71XX_SOC_AR9132: *maxpin = AR91XX_GPIO_PINS - 1; break; case AR71XX_SOC_AR7240: case AR71XX_SOC_AR7241: case AR71XX_SOC_AR7242: *maxpin = AR724X_GPIO_PINS - 1; break; case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: *maxpin = AR933X_GPIO_COUNT - 1; break; case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: *maxpin = AR934X_GPIO_COUNT - 1; break; case AR71XX_SOC_QCA9533: case AR71XX_SOC_QCA9533_V2: *maxpin = QCA953X_GPIO_COUNT - 1; break; case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: *maxpin = QCA955X_GPIO_COUNT - 1; break; default: *maxpin = AR71XX_GPIO_PINS - 1; } return (0); } static int ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); GPIO_LOCK(sc); *caps = sc->gpio_pins[i].gp_caps; GPIO_UNLOCK(sc); return (0); } static int ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); GPIO_LOCK(sc); *flags = sc->gpio_pins[i].gp_flags; GPIO_UNLOCK(sc); return (0); } static int ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); GPIO_LOCK(sc); memcpy(name, sc->gpio_pins[i].gp_name, GPIOMAXNAME); GPIO_UNLOCK(sc); return (0); } static int ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags) { int i; struct ar71xx_gpio_softc *sc = device_get_softc(dev); for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); ar71xx_gpio_pin_configure(sc, &sc->gpio_pins[i], flags); return (0); } static int ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); if (value) GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin)); else GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin)); return (0); } static int ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); *val = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0; return (0); } static int ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin) { int res, i; struct ar71xx_gpio_softc *sc = device_get_softc(dev); for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); res = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0; if (res) GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin)); else GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin)); return (0); } static int ar71xx_gpio_filter(void *arg) { /* TODO: something useful */ return (FILTER_STRAY); } static void ar71xx_gpio_intr(void *arg) { struct ar71xx_gpio_softc *sc = arg; GPIO_LOCK(sc); /* TODO: something useful */ GPIO_UNLOCK(sc); } static int ar71xx_gpio_probe(device_t dev) { device_set_desc(dev, "Atheros AR71XX GPIO driver"); return (0); } static int ar71xx_gpio_attach(device_t dev) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i, j, maxpin; int mask, pinon; uint32_t oe; KASSERT((device_get_unit(dev) == 0), ("ar71xx_gpio: Only one gpio module supported")); mtx_init(&sc->gpio_mtx, device_get_nameunit(dev), NULL, MTX_DEF); /* Map control/status registers. */ sc->gpio_mem_rid = 0; sc->gpio_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->gpio_mem_rid, RF_ACTIVE); if (sc->gpio_mem_res == NULL) { device_printf(dev, "couldn't map memory\n"); ar71xx_gpio_detach(dev); return (ENXIO); } if ((sc->gpio_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->gpio_irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "unable to allocate IRQ resource\n"); ar71xx_gpio_detach(dev); return (ENXIO); } if ((bus_setup_intr(dev, sc->gpio_irq_res, INTR_TYPE_MISC, ar71xx_gpio_filter, ar71xx_gpio_intr, sc, &sc->gpio_ih))) { device_printf(dev, "WARNING: unable to register interrupt handler\n"); ar71xx_gpio_detach(dev); return (ENXIO); } sc->dev = dev; /* Enable function bits that are required */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "function_set", &mask) == 0) { device_printf(dev, "function_set: 0x%x\n", mask); ar71xx_gpio_function_enable(sc, mask); } /* Disable function bits that are required */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "function_clear", &mask) == 0) { device_printf(dev, "function_clear: 0x%x\n", mask); ar71xx_gpio_function_disable(sc, mask); } /* Disable interrupts for all pins. */ GPIO_WRITE(sc, AR71XX_GPIO_INT_MASK, 0); /* Initialise all pins specified in the mask, up to the pin count */ (void) ar71xx_gpio_pin_max(dev, &maxpin); if (resource_int_value(device_get_name(dev), device_get_unit(dev), "pinmask", &mask) != 0) mask = 0; if (resource_int_value(device_get_name(dev), device_get_unit(dev), "pinon", &pinon) != 0) pinon = 0; device_printf(dev, "gpio pinmask=0x%x\n", mask); for (j = 0; j <= maxpin; j++) { if ((mask & (1 << j)) == 0) continue; sc->gpio_npins++; } /* Iniatilize the GPIO pins, keep the loader settings. */ oe = GPIO_READ(sc, AR71XX_GPIO_OE); + /* + * For AR934x and QCA953x, the meaning of oe is inverted; + * so flip it the right way around so we can parse the GPIO + * state. + */ + if (!ar71xx_gpio_oe_is_high()) + oe = ~oe; + sc->gpio_pins = malloc(sizeof(*sc->gpio_pins) * sc->gpio_npins, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0, j = 0; j <= maxpin; j++) { if ((mask & (1 << j)) == 0) continue; snprintf(sc->gpio_pins[i].gp_name, GPIOMAXNAME, "pin %d", j); sc->gpio_pins[i].gp_pin = j; sc->gpio_pins[i].gp_caps = DEFAULT_CAPS; if (oe & (1 << j)) sc->gpio_pins[i].gp_flags = GPIO_PIN_OUTPUT; else sc->gpio_pins[i].gp_flags = GPIO_PIN_INPUT; i++; } /* Turn on the hinted pins. */ for (i = 0; i < sc->gpio_npins; i++) { j = sc->gpio_pins[i].gp_pin; if ((pinon & (1 << j)) != 0) { ar71xx_gpio_pin_setflags(dev, j, GPIO_PIN_OUTPUT); ar71xx_gpio_pin_set(dev, j, 1); } } /* * Search through the function hints, in case there's some * overrides such as LNA control. * * hint.gpio.X.func..gpiofunc= * hint.gpio.X.func..gpiomode=1 (for output, default low) */ for (i = 0; i <= maxpin; i++) { char buf[32]; int gpiofunc, gpiomode; snprintf(buf, 32, "func.%d.gpiofunc", i); if (resource_int_value(device_get_name(dev), device_get_unit(dev), buf, &gpiofunc) != 0) continue; /* Get the mode too */ snprintf(buf, 32, "func.%d.gpiomode", i); if (resource_int_value(device_get_name(dev), device_get_unit(dev), buf, &gpiomode) != 0) continue; /* We only handle mode=1 for now */ if (gpiomode != 1) continue; device_printf(dev, "%s: GPIO %d: func=%d, mode=%d\n", __func__, i, gpiofunc, gpiomode); - /* Set output (bit == 0) */ - oe = GPIO_READ(sc, AR71XX_GPIO_OE); - oe &= ~ (1 << i); - GPIO_WRITE(sc, AR71XX_GPIO_OE, oe); - /* Set pin value = 0, so it stays low by default */ oe = GPIO_READ(sc, AR71XX_GPIO_OUT); oe &= ~ (1 << i); GPIO_WRITE(sc, AR71XX_GPIO_OUT, oe); + + /* Set output */ + ar71xx_gpio_oe_set_output(sc, i); /* Finally: Set the output config */ ar71xx_gpio_ouput_configure(i, gpiofunc); } sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { ar71xx_gpio_detach(dev); return (ENXIO); } return (0); } static int ar71xx_gpio_detach(device_t dev) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->gpio_mtx), ("gpio mutex not initialized")); gpiobus_detach_bus(dev); if (sc->gpio_ih) bus_teardown_intr(dev, sc->gpio_irq_res, sc->gpio_ih); if (sc->gpio_irq_res) bus_release_resource(dev, SYS_RES_IRQ, sc->gpio_irq_rid, sc->gpio_irq_res); if (sc->gpio_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->gpio_mem_rid, sc->gpio_mem_res); if (sc->gpio_pins) free(sc->gpio_pins, M_DEVBUF); mtx_destroy(&sc->gpio_mtx); return(0); } static device_method_t ar71xx_gpio_methods[] = { DEVMETHOD(device_probe, ar71xx_gpio_probe), DEVMETHOD(device_attach, ar71xx_gpio_attach), DEVMETHOD(device_detach, ar71xx_gpio_detach), /* GPIO protocol */ DEVMETHOD(gpio_get_bus, ar71xx_gpio_get_bus), DEVMETHOD(gpio_pin_max, ar71xx_gpio_pin_max), DEVMETHOD(gpio_pin_getname, ar71xx_gpio_pin_getname), DEVMETHOD(gpio_pin_getflags, ar71xx_gpio_pin_getflags), DEVMETHOD(gpio_pin_getcaps, ar71xx_gpio_pin_getcaps), DEVMETHOD(gpio_pin_setflags, ar71xx_gpio_pin_setflags), DEVMETHOD(gpio_pin_get, ar71xx_gpio_pin_get), DEVMETHOD(gpio_pin_set, ar71xx_gpio_pin_set), DEVMETHOD(gpio_pin_toggle, ar71xx_gpio_pin_toggle), {0, 0}, }; static driver_t ar71xx_gpio_driver = { "gpio", ar71xx_gpio_methods, sizeof(struct ar71xx_gpio_softc), }; static devclass_t ar71xx_gpio_devclass; DRIVER_MODULE(ar71xx_gpio, apb, ar71xx_gpio_driver, ar71xx_gpio_devclass, 0, 0); Index: user/alc/PQ_LAUNDRY/sys/mips/broadcom/uart_cpu_chipc.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/mips/broadcom/uart_cpu_chipc.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/mips/broadcom/uart_cpu_chipc.c (revision 303642) @@ -1,123 +1,173 @@ /*- * Copyright (c) 2016 Michael Zhilin * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_uart.h" #include #include #include #include #include #include #include #include #include #include #include #include "bcm_socinfo.h" +#ifdef CFE +#include +#include +#include +#endif + bus_space_tag_t uart_bus_space_io; bus_space_tag_t uart_bus_space_mem; static struct uart_class *chipc_uart_class = &uart_ns8250_class; #define CHIPC_UART_BAUDRATE 115200 int uart_cpu_eqres(struct uart_bas *b1, struct uart_bas *b2) { return ((b1->bsh == b2->bsh && b1->bst == b2->bst) ? 1 : 0); } static int -uart_cpu_init(struct uart_devinfo *di, int uart, int baudrate) +uart_cpu_init(struct uart_devinfo *di, u_int uart, int baudrate) { struct bcm_socinfo *socinfo; if (uart >= CHIPC_UART_MAX) return (EINVAL); socinfo = bcm_get_socinfo(); di->ops = uart_getops(chipc_uart_class); di->bas.chan = 0; di->bas.bst = uart_bus_space_mem; di->bas.bsh = (bus_space_handle_t) BCM_SOCREG(CHIPC_UART(uart)); di->bas.regshft = 0; di->bas.rclk = socinfo->uartrate; /* in Hz */ di->baudrate = baudrate; di->databits = 8; di->stopbits = 1; di->parity = UART_PARITY_NONE; return (0); } +#ifdef CFE +static int +uart_getenv_cfe(int devtype, struct uart_devinfo *di) +{ + char device[sizeof("uartXX")]; + int baud, fd, len; + int ret; + u_int uart; + + /* CFE only vends console configuration */ + if (devtype != UART_DEV_CONSOLE) + return (ENODEV); + + /* Fetch console device */ + ret = cfe_getenv("BOOT_CONSOLE", device, sizeof(device)); + if (ret != CFE_OK) + return (ENXIO); + + /* Parse serial console unit. Fails on non-uart devices. */ + if (sscanf(device, "uart%u", &uart) != 1) + return (ENXIO); + + /* Fetch device handle */ + fd = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE); + if (fd < 0) + return (ENXIO); + + /* Fetch serial configuration */ + ret = cfe_ioctl(fd, IOCTL_SERIAL_GETSPEED, (unsigned char *)&baud, + sizeof(baud), &len, 0); + if (ret != CFE_OK) + baud = CHIPC_UART_BAUDRATE; + + /* Initialize device info */ + return (uart_cpu_init(di, uart, baud)); +} +#endif /* CFE */ + int uart_cpu_getdev(int devtype, struct uart_devinfo *di) { int ivar; uart_bus_space_io = NULL; uart_bus_space_mem = mips_bus_space_generic; - /* Check the environment. */ +#ifdef CFE + /* Check the CFE environment */ + if (uart_getenv_cfe(devtype, di) == 0) + return (0); +#endif /* CFE */ + + /* Check the kernel environment. */ if (uart_getenv(devtype, di, chipc_uart_class) == 0) return (0); /* Scan the device hints for the first matching device */ - for (int i = 0; i < CHIPC_UART_MAX; i++) { + for (u_int i = 0; i < CHIPC_UART_MAX; i++) { if (resource_int_value("uart", i, "flags", &ivar)) continue; /* Check usability */ if (devtype == UART_DEV_CONSOLE && !UART_FLAGS_CONSOLE(ivar)) continue; if (devtype == UART_DEV_DBGPORT && !UART_FLAGS_DBGPORT(ivar)) continue; if (resource_int_value("uart", i, "disabled", &ivar) == 0 && ivar == 0) continue; /* Found */ if (resource_int_value("uart", i, "baud", &ivar) != 0) ivar = CHIPC_UART_BAUDRATE; return (uart_cpu_init(di, i, ivar)); } /* Default to uart0/115200 */ return (uart_cpu_init(di, 0, CHIPC_UART_BAUDRATE)); } Index: user/alc/PQ_LAUNDRY/sys/mips/conf/SENTRY5.hints =================================================================== --- user/alc/PQ_LAUNDRY/sys/mips/conf/SENTRY5.hints (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/mips/conf/SENTRY5.hints (revision 303642) @@ -1,7 +1,4 @@ # $FreeBSD$ hint.bhnd.0.at="nexus0" hint.bhnd.0.maddr="0x18000000" hint.bhnd.0.msize="0x00100000" - -# console on uart1 -hint.uart.1.flags="0x10" Index: user/alc/PQ_LAUNDRY/sys/mips/conf/TL-WDR4300.hints =================================================================== --- user/alc/PQ_LAUNDRY/sys/mips/conf/TL-WDR4300.hints (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/mips/conf/TL-WDR4300.hints (revision 303642) @@ -1,226 +1,231 @@ # $FreeBSD$ # MAC/ART ? - they're 00:02:03:04:05:06 :( # ath0 chain0 EXTERNAL_LNA0: 18 # ath0 chain1 EXTERNAL_LNA1: 19 # These are configured as GPIO output, init low, then # set the GPIO 'type' AR934X_GPIO_OUT_EXT_LNA0/AR934X_GPIO_OUT_EXT_LNA1. # XXX There's no arge1 on this! # XXX RFKILL? # mdiobus0 on arge0 hint.argemdio.0.at="nexus0" hint.argemdio.0.maddr=0x19000000 hint.argemdio.0.msize=0x1000 hint.argemdio.0.order=0 # DB120 GMAC configuration # + AR934X_ETH_CFG_RGMII_GMAC0 (1 << 0) hint.ar934x_gmac.0.gmac_cfg=0x1 # Board mac address is at 0x1f01fc00. # ath0: offset 0 # ath1: offset -1 # arge0: offset -2 # arge1: not hooked up; doesn't matter hint.ar71xx.0.eeprom_mac_addr=0x1f01fc00 hint.ar71xx.0.eeprom_mac_isascii=0 hint.ar71xx_mac_map.0.devid=ath hint.ar71xx_mac_map.0.unitid=0 hint.ar71xx_mac_map.0.offset=0 hint.ar71xx_mac_map.0.is_local=0 hint.ar71xx_mac_map.1.devid=ath hint.ar71xx_mac_map.1.unitid=1 hint.ar71xx_mac_map.1.offset=-1 hint.ar71xx_mac_map.1.is_local=0 hint.ar71xx_mac_map.2.devid=arge hint.ar71xx_mac_map.2.unitid=0 hint.ar71xx_mac_map.2.offset=-2 hint.ar71xx_mac_map.2.is_local=0 # GMAC0 here - connected to an AR8327 hint.arswitch.0.at="mdio0" hint.arswitch.0.is_7240=0 hint.arswitch.0.is_9340=0 # not the internal switch! hint.arswitch.0.numphys=5 hint.arswitch.0.phy4cpu=0 hint.arswitch.0.is_rgmii=0 hint.arswitch.0.is_gmii=0 # Other AR8327 configuration parameters # AR8327_PAD_MAC_RGMII hint.arswitch.0.pad.0.mode=6 hint.arswitch.0.pad.0.txclk_delay_en=1 hint.arswitch.0.pad.0.rxclk_delay_en=1 # AR8327_CLK_DELAY_SEL1 hint.arswitch.0.pad.0.txclk_delay_sel=1 # AR8327_CLK_DELAY_SEL2 hint.arswitch.0.pad.0.rxclk_delay_sel=2 # XXX there's no LED management just yet! hint.arswitch.0.led.ctrl0=0xc737c737 hint.arswitch.0.led.ctrl1=0x00000000 hint.arswitch.0.led.ctrl2=0x00000000 hint.arswitch.0.led.ctrl3=0x0030c300 hint.arswitch.0.led.open_drain=0 # force_link=1 is required for the rest of the parameters # to be configured. hint.arswitch.0.port.0.force_link=1 hint.arswitch.0.port.0.speed=1000 hint.arswitch.0.port.0.duplex=1 hint.arswitch.0.port.0.txpause=1 hint.arswitch.0.port.0.rxpause=1 # XXX OpenWRT DB120 BSP doesn't have media/duplex set? hint.arge.0.phymask=0x0 hint.arge.0.media=1000 hint.arge.0.fduplex=1 hint.arge.0.miimode=3 # RGMII hint.arge.0.pll_1000=0x06000000 # mdiobus1 on arge1 hint.argemdio.1.at="nexus0" hint.argemdio.1.maddr=0x1a000000 hint.argemdio.1.msize=0x1000 hint.argemdio.1.order=0 # Embedded switch on the AR9344 # mdio1 is actually created as the AR8327 internal bus; so # this pops up as mdio2. # # XXX TODO: there's no need for AR9344 internal switch; it isn't exposed hint.arswitch.1.at="mdio2" hint.arswitch.1.is_7240=0 hint.arswitch.1.is_9340=1 hint.arswitch.1.numphys=5 hint.arswitch.1.phy4cpu=0 # phy 4 is not a "CPU port" PHY here hint.arswitch.1.is_rgmii=0 hint.arswitch.1.is_gmii=1 # arge1 <-> switch PHY is GMII # arge1 - lock up to 1000/full hint.arge.1.phymask=0x0 # Nothing attached here (XXX?) hint.arge.1.media=1000 hint.arge.1.fduplex=1 hint.arge.1.miimode=1 # GMII # MAC for arge1 is the second 6 bytes of the ART # hint.arge.1.eeprommac=0x1f7f0006 # ath0: Where the ART is - last 64k in the flash hint.ath.0.eepromaddr=0x1fff0000 hint.ath.0.eepromsize=16384 # ath1: it's different; it's a PCIe attached device, so # we instead need to teach the PCIe bridge code about it # (ie, the 'early pci fixup' stuff that programs the PCIe # host registers on the NIC) and then we teach ath where # to find it. # ath1 hint - pcie slot 0 hint.pcib.0.bus.0.0.0.ath_fixup_addr=0x1fff4000 hint.pcib.0.bus.0.0.0.ath_fixup_size=16384 # ath0 - eeprom comes from here hint.ath.1.eeprom_firmware="pcib.0.bus.0.0.0.eeprom_firmware" # flash layout: # # bootargs=console=ttyS0,115200 root=31:02 rootfstype=jffs2 init=/sbin/init mtdparts=ath-nor0:256k(u-boot),64k(u-boot-env),6336k(rootfs),1408k(uImage),64k(mib0),64k(ART) # 128KiB uboot hint.map.0.at="flash/spi0" hint.map.0.start=0x00000000 hint.map.0.end=0x00020000 # 128k u-boot hint.map.0.name="u-boot" hint.map.0.readonly=1 # kernel hint.map.2.at="flash/spi0" hint.map.2.start=0x00020000 hint.map.2.end="search:0x00020000:0x10000:.!/bin/sh" hint.map.2.name="kernel" hint.map.2.readonly=1 # 1344KiB uImage hint.map.3.at="flash/spi0" hint.map.3.start="search:0x00020000:0x10000:.!/bin/sh" hint.map.3.end=0x007d0000 hint.map.3.name="rootfs" hint.map.3.readonly=1 # 64KiB cfg hint.map.4.at="flash/spi0" hint.map.4.start=0x007d0000 hint.map.4.end=0x007e0000 hint.map.4.name="cfg" hint.map.4.readonly=0 # 64KiB mib0 hint.map.5.at="flash/spi0" hint.map.5.start=0x007e0000 hint.map.5.end=0x007f0000 # 64k mib0 hint.map.5.name="mib0" hint.map.5.readonly=1 # 64KiB ART hint.map.6.at="flash/spi0" hint.map.6.start=0x007f0000 hint.map.6.end=0x00800000 # 64k ART hint.map.6.name="ART" hint.map.6.readonly=1 # GPIO configuration # GPIO21 and GPIO22 - USB1 and USB2 power # ath0 chain0 EXTERNAL_LNA0: 18, output # ath0 chain1 EXTERNAL_LNA1: 19, output # These are the GPIO LEDs and buttons which can be software controlled. hint.gpio.0.pinmask=0x0063f800 # Enable GPIO21, GPIO22 output and high - for USB power hint.gpio.0.pinon=0x00600000 hint.gpio.0.func.18.gpiofunc=46 hint.gpio.0.func.18.gpiomode=1 # output, default low hint.gpio.0.func.19.gpiofunc=47 hint.gpio.0.func.19.gpiomode=1 # output, default low # LED QSS - 15 # LED SYSTEM - 14 # LED USB1 - 11 # LED USB2 - 12 # LED WLAN2G - 13 # SWITCH WPS - 16 # SWITCH RFKILL - 17 hint.gpioled.0.at="gpiobus0" hint.gpioled.0.name="USB1" hint.gpioled.0.pins=0x0800 +hint.gpioled.0.invert=1 hint.gpioled.1.at="gpiobus0" hint.gpioled.1.name="USB2" hint.gpioled.1.pins=0x1000 +hint.gpioled.1.invert=1 hint.gpioled.2.at="gpiobus0" hint.gpioled.2.name="WLAN2G" hint.gpioled.2.pins=0x2000 +hint.gpioled.2.invert=1 hint.gpioled.3.at="gpiobus0" hint.gpioled.3.name="SYSTEM" hint.gpioled.3.pins=0x4000 +hint.gpioled.3.invert=1 hint.gpioled.4.at="gpiobus0" hint.gpioled.4.name="QSS" hint.gpioled.4.pins=0x8000 +hint.gpioled.4.invert=1 # XXX TODO: WPS/RFKILL switch Index: user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile =================================================================== --- user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile (nonexistent) +++ user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile (revision 303642) @@ -0,0 +1,20 @@ +# $FreeBSD$ + +.include + +KMOD= ng_checksum +SRCS= ng_checksum.c opt_inet.h opt_inet6.h + +#.if !defined(KERNBUILDDIR) +# +#.if ${MK_INET_SUPPORT} != "no" +#opt_inet.h: +# echo "#define INET 1" > ${.TARGET} +#.endif +#.if ${MK_INET6_SUPPORT} != "no" +#opt_inet6.h: +# echo "#define INET6 1" > ${.TARGET} +#.endif +#.endif + +.include Property changes on: user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c (nonexistent) +++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c (revision 303642) @@ -0,0 +1,729 @@ +/*- + * Copyright (c) 2015 Dmitry Vagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* private data */ +struct ng_checksum_priv { + hook_p in; + hook_p out; + uint8_t dlt; /* DLT_XXX from bpf.h */ + struct ng_checksum_config *conf; + struct ng_checksum_stats stats; +}; + +typedef struct ng_checksum_priv *priv_p; + +/* Netgraph methods */ +static ng_constructor_t ng_checksum_constructor; +static ng_rcvmsg_t ng_checksum_rcvmsg; +static ng_shutdown_t ng_checksum_shutdown; +static ng_newhook_t ng_checksum_newhook; +static ng_rcvdata_t ng_checksum_rcvdata; +static ng_disconnect_t ng_checksum_disconnect; + +#define ERROUT(x) { error = (x); goto done; } + +static const struct ng_parse_struct_field ng_checksum_config_type_fields[] + = NG_CHECKSUM_CONFIG_TYPE; +static const struct ng_parse_type ng_checksum_config_type = { + &ng_parse_struct_type, + &ng_checksum_config_type_fields +}; + +static const struct ng_parse_struct_field ng_checksum_stats_fields[] + = NG_CHECKSUM_STATS_TYPE; +static const struct ng_parse_type ng_checksum_stats_type = { + &ng_parse_struct_type, + &ng_checksum_stats_fields +}; + +static const struct ng_cmdlist ng_checksum_cmdlist[] = { + { + NGM_CHECKSUM_COOKIE, + NGM_CHECKSUM_GETDLT, + "getdlt", + NULL, + &ng_parse_uint8_type + }, + { + NGM_CHECKSUM_COOKIE, + NGM_CHECKSUM_SETDLT, + "setdlt", + &ng_parse_uint8_type, + NULL + }, + { + NGM_CHECKSUM_COOKIE, + NGM_CHECKSUM_GETCONFIG, + "getconfig", + NULL, + &ng_checksum_config_type + }, + { + NGM_CHECKSUM_COOKIE, + NGM_CHECKSUM_SETCONFIG, + "setconfig", + &ng_checksum_config_type, + NULL + }, + { + NGM_CHECKSUM_COOKIE, + NGM_CHECKSUM_GET_STATS, + "getstats", + NULL, + &ng_checksum_stats_type + }, + { + NGM_CHECKSUM_COOKIE, + NGM_CHECKSUM_CLR_STATS, + "clrstats", + NULL, + NULL + }, + { + NGM_CHECKSUM_COOKIE, + NGM_CHECKSUM_GETCLR_STATS, + "getclrstats", + NULL, + &ng_checksum_stats_type + }, + { 0 } +}; + +static struct ng_type typestruct = { + .version = NG_ABI_VERSION, + .name = NG_CHECKSUM_NODE_TYPE, + .constructor = ng_checksum_constructor, + .rcvmsg = ng_checksum_rcvmsg, + .shutdown = ng_checksum_shutdown, + .newhook = ng_checksum_newhook, + .rcvdata = ng_checksum_rcvdata, + .disconnect = ng_checksum_disconnect, + .cmdlist = ng_checksum_cmdlist, +}; + +NETGRAPH_INIT(checksum, &typestruct); + +static int +ng_checksum_constructor(node_p node) +{ + priv_p priv; + + priv = malloc(sizeof(*priv), M_NETGRAPH, M_WAITOK|M_ZERO); + priv->dlt = DLT_RAW; + + NG_NODE_SET_PRIVATE(node, priv); + + return (0); +} + +static int +ng_checksum_newhook(node_p node, hook_p hook, const char *name) +{ + const priv_p priv = NG_NODE_PRIVATE(node); + + if (strncmp(name, NG_CHECKSUM_HOOK_IN, strlen(NG_CHECKSUM_HOOK_IN)) == 0) { + priv->in = hook; + } else if (strncmp(name, NG_CHECKSUM_HOOK_OUT, strlen(NG_CHECKSUM_HOOK_OUT)) == 0) { + priv->out = hook; + } else + return (EINVAL); + + return (0); +} + +static int +ng_checksum_rcvmsg(node_p node, item_p item, hook_p lasthook) +{ + const priv_p priv = NG_NODE_PRIVATE(node); + struct ng_checksum_config *conf, *newconf; + struct ng_mesg *msg; + struct ng_mesg *resp = NULL; + int error = 0; + + NGI_GET_MSG(item, msg); + + if (msg->header.typecookie != NGM_CHECKSUM_COOKIE) + ERROUT(EINVAL); + + switch (msg->header.cmd) + { + case NGM_CHECKSUM_GETDLT: + NG_MKRESPONSE(resp, msg, sizeof(uint8_t), M_WAITOK); + + if (resp == NULL) + ERROUT(ENOMEM); + + *((uint8_t *) resp->data) = priv->dlt; + + break; + + case NGM_CHECKSUM_SETDLT: + if (msg->header.arglen != sizeof(uint8_t)) + ERROUT(EINVAL); + + switch (*(uint8_t *) msg->data) + { + case DLT_EN10MB: + case DLT_RAW: + priv->dlt = *(uint8_t *) msg->data; + break; + + default: + ERROUT(EINVAL); + } + + break; + + case NGM_CHECKSUM_GETCONFIG: + if (priv->conf == NULL) + ERROUT(0); + + NG_MKRESPONSE(resp, msg, sizeof(struct ng_checksum_config), M_WAITOK); + + if (resp == NULL) + ERROUT(ENOMEM); + + bcopy(priv->conf, resp->data, sizeof(struct ng_checksum_config)); + + break; + + case NGM_CHECKSUM_SETCONFIG: + conf = (struct ng_checksum_config *) msg->data; + + if (msg->header.arglen != sizeof(struct ng_checksum_config)) + ERROUT(EINVAL); + + conf->csum_flags &= NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6; + conf->csum_offload &= NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6; + + newconf = malloc(sizeof(struct ng_checksum_config), M_NETGRAPH, M_WAITOK|M_ZERO); + + bcopy(conf, newconf, sizeof(struct ng_checksum_config)); + + if (priv->conf) + free(priv->conf, M_NETGRAPH); + + priv->conf = newconf; + + break; + + case NGM_CHECKSUM_GET_STATS: + case NGM_CHECKSUM_CLR_STATS: + case NGM_CHECKSUM_GETCLR_STATS: + if (msg->header.cmd != NGM_CHECKSUM_CLR_STATS) { + NG_MKRESPONSE(resp, msg, sizeof(struct ng_checksum_stats), M_WAITOK); + + if (resp == NULL) + ERROUT(ENOMEM); + + bcopy(&(priv->stats), resp->data, sizeof(struct ng_checksum_stats)); + } + + if (msg->header.cmd != NGM_CHECKSUM_GET_STATS) + bzero(&(priv->stats), sizeof(struct ng_checksum_stats)); + + break; + + default: + ERROUT(EINVAL); + } + +done: + NG_RESPOND_MSG(error, node, item, resp); + NG_FREE_MSG(msg); + + return (error); +} + +#define PULLUP_CHECK(mbuf, length) do { \ + pullup_len += length; \ + if (((mbuf)->m_pkthdr.len < pullup_len) || \ + (pullup_len > MHLEN)) { \ + return (EINVAL); \ + } \ + if ((mbuf)->m_len < pullup_len && \ + (((mbuf) = m_pullup((mbuf), pullup_len)) == NULL)) { \ + return (ENOBUFS); \ + } \ +} while (0) + +#ifdef INET +static int +checksum_ipv4(priv_p priv, struct mbuf *m, int l3_offset) +{ + struct ip *ip4; + int pullup_len; + int hlen, plen; + int processed = 0; + + pullup_len = l3_offset; + + PULLUP_CHECK(m, sizeof(struct ip)); + ip4 = (struct ip *) mtodo(m, l3_offset); + + if (ip4->ip_v != IPVERSION) + return (EOPNOTSUPP); + + hlen = ip4->ip_hl << 2; + plen = ntohs(ip4->ip_len); + + if (hlen < sizeof(struct ip) || m->m_pkthdr.len < l3_offset + plen) + return (EINVAL); + + if (m->m_pkthdr.csum_flags & CSUM_IP) { + ip4->ip_sum = 0; + + if ((priv->conf->csum_offload & CSUM_IP) == 0) { + if (hlen == sizeof(struct ip)) + ip4->ip_sum = in_cksum_hdr(ip4); + else + ip4->ip_sum = in_cksum_skip(m, l3_offset + hlen, l3_offset); + + m->m_pkthdr.csum_flags &= ~CSUM_IP; + } + + processed = 1; + } + + pullup_len = l3_offset + hlen; + + /* We can not calculate a checksum fragmented packets */ + if (ip4->ip_off & htons(IP_MF|IP_OFFMASK)) { + m->m_pkthdr.csum_flags &= ~(CSUM_TCP|CSUM_UDP); + return (0); + } + + switch (ip4->ip_p) + { + case IPPROTO_TCP: + if (m->m_pkthdr.csum_flags & CSUM_TCP) { + struct tcphdr *th; + + PULLUP_CHECK(m, sizeof(struct tcphdr)); + th = (struct tcphdr *) mtodo(m, l3_offset + hlen); + + th->th_sum = in_pseudo(ip4->ip_src.s_addr, + ip4->ip_dst.s_addr, htons(ip4->ip_p + plen - hlen)); + + if ((priv->conf->csum_offload & CSUM_TCP) == 0) { + th->th_sum = in_cksum_skip(m, l3_offset + plen, l3_offset + hlen); + m->m_pkthdr.csum_flags &= ~CSUM_TCP; + } + + processed = 1; + } + + m->m_pkthdr.csum_flags &= ~CSUM_UDP; + break; + + case IPPROTO_UDP: + if (m->m_pkthdr.csum_flags & CSUM_UDP) { + struct udphdr *uh; + + PULLUP_CHECK(m, sizeof(struct udphdr)); + uh = (struct udphdr *) mtodo(m, l3_offset + hlen); + + uh->uh_sum = in_pseudo(ip4->ip_src.s_addr, + ip4->ip_dst.s_addr, htons(ip4->ip_p + plen - hlen)); + + if ((priv->conf->csum_offload & CSUM_UDP) == 0) { + uh->uh_sum = in_cksum_skip(m, + l3_offset + plen, l3_offset + hlen); + + if (uh->uh_sum == 0) + uh->uh_sum = 0xffff; + + m->m_pkthdr.csum_flags &= ~CSUM_UDP; + } + + processed = 1; + } + + m->m_pkthdr.csum_flags &= ~CSUM_TCP; + break; + + default: + m->m_pkthdr.csum_flags &= ~(CSUM_TCP|CSUM_UDP); + break; + } + + m->m_pkthdr.csum_flags &= ~NG_CHECKSUM_CSUM_IPV6; + + if (processed) + priv->stats.processed++; + + return (0); +} +#endif /* INET */ + +#ifdef INET6 +static int +checksum_ipv6(priv_p priv, struct mbuf *m, int l3_offset) +{ + struct ip6_hdr *ip6; + struct ip6_ext *ip6e = NULL; + int pullup_len; + int hlen, plen; + int nxt; + int processed = 0; + + pullup_len = l3_offset; + + PULLUP_CHECK(m, sizeof(struct ip6_hdr)); + ip6 = (struct ip6_hdr *) mtodo(m, l3_offset); + + if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) + return (EOPNOTSUPP); + + hlen = sizeof(struct ip6_hdr); + plen = ntohs(ip6->ip6_plen) + hlen; + + if (m->m_pkthdr.len < l3_offset + plen) + return (EINVAL); + + nxt = ip6->ip6_nxt; + + for (;;) { + switch (nxt) + { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + PULLUP_CHECK(m, sizeof(struct ip6_ext)); + ip6e = (struct ip6_ext *) mtodo(m, l3_offset + hlen); + nxt = ip6e->ip6e_nxt; + hlen += (ip6e->ip6e_len + 1) << 3; + pullup_len = l3_offset + hlen; + break; + + case IPPROTO_AH: + PULLUP_CHECK(m, sizeof(struct ip6_ext)); + ip6e = (struct ip6_ext *) mtodo(m, l3_offset + hlen); + nxt = ip6e->ip6e_nxt; + hlen += (ip6e->ip6e_len + 2) << 2; + pullup_len = l3_offset + hlen; + break; + + case IPPROTO_FRAGMENT: + /* We can not calculate a checksum fragmented packets */ + m->m_pkthdr.csum_flags &= ~(CSUM_TCP_IPV6|CSUM_UDP_IPV6); + return (0); + + default: + goto loopend; + } + + if (nxt == 0) + return (EINVAL); + } + +loopend: + + switch (nxt) + { + case IPPROTO_TCP: + if (m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { + struct tcphdr *th; + + PULLUP_CHECK(m, sizeof(struct tcphdr)); + th = (struct tcphdr *) mtodo(m, l3_offset + hlen); + + th->th_sum = in6_cksum_pseudo(ip6, plen - hlen, nxt, 0); + + if ((priv->conf->csum_offload & CSUM_TCP_IPV6) == 0) { + th->th_sum = in_cksum_skip(m, l3_offset + plen, l3_offset + hlen); + m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6; + } + + processed = 1; + } + + m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6; + break; + + case IPPROTO_UDP: + if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { + struct udphdr *uh; + + PULLUP_CHECK(m, sizeof(struct udphdr)); + uh = (struct udphdr *) mtodo(m, l3_offset + hlen); + + uh->uh_sum = in6_cksum_pseudo(ip6, plen - hlen, nxt, 0); + + if ((priv->conf->csum_offload & CSUM_UDP_IPV6) == 0) { + uh->uh_sum = in_cksum_skip(m, + l3_offset + plen, l3_offset + hlen); + + if (uh->uh_sum == 0) + uh->uh_sum = 0xffff; + + m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6; + } + + processed = 1; + } + + m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6; + break; + + default: + m->m_pkthdr.csum_flags &= ~(CSUM_TCP_IPV6|CSUM_UDP_IPV6); + break; + } + + m->m_pkthdr.csum_flags &= ~NG_CHECKSUM_CSUM_IPV4; + + if (processed) + priv->stats.processed++; + + return (0); +} +#endif /* INET6 */ + +#undef PULLUP_CHECK + +static int +ng_checksum_rcvdata(hook_p hook, item_p item) +{ + const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); + struct mbuf *m; + hook_p out; + int error = 0; + + priv->stats.received++; + + NGI_GET_M(item, m); + +#define PULLUP_CHECK(mbuf, length) do { \ + pullup_len += length; \ + if (((mbuf)->m_pkthdr.len < pullup_len) || \ + (pullup_len > MHLEN)) { \ + error = EINVAL; \ + goto bypass; \ + } \ + if ((mbuf)->m_len < pullup_len && \ + (((mbuf) = m_pullup((mbuf), pullup_len)) == NULL)) { \ + error = ENOBUFS; \ + goto drop; \ + } \ +} while (0) + + if (!(priv->conf && hook == priv->in && m && (m->m_flags & M_PKTHDR))) + goto bypass; + + m->m_pkthdr.csum_flags |= priv->conf->csum_flags; + + if (m->m_pkthdr.csum_flags & (NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6)) + { + struct ether_header *eh; + struct ng_checksum_vlan_header *vh; + int pullup_len = 0; + uint16_t etype; + + m = m_unshare(m, M_NOWAIT); + + if (m == NULL) + ERROUT(ENOMEM); + + switch (priv->dlt) + { + case DLT_EN10MB: + PULLUP_CHECK(m, sizeof(struct ether_header)); + eh = mtod(m, struct ether_header *); + etype = ntohs(eh->ether_type); + + for (;;) { /* QinQ support */ + switch (etype) + { + case 0x8100: + case 0x88A8: + case 0x9100: + PULLUP_CHECK(m, sizeof(struct ng_checksum_vlan_header)); + vh = (struct ng_checksum_vlan_header *) mtodo(m, + pullup_len - sizeof(struct ng_checksum_vlan_header)); + etype = ntohs(vh->etype); + break; + + default: + goto loopend; + } + } +loopend: +#ifdef INET + if (etype == ETHERTYPE_IP && + (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV4)) { + error = checksum_ipv4(priv, m, pullup_len); + if (error == ENOBUFS) + goto drop; + } else +#endif +#ifdef INET6 + if (etype == ETHERTYPE_IPV6 && + (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV6)) { + error = checksum_ipv6(priv, m, pullup_len); + if (error == ENOBUFS) + goto drop; + } else +#endif + { + m->m_pkthdr.csum_flags &= + ~(NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6); + } + + break; + + case DLT_RAW: +#ifdef INET + if (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV4) + { + error = checksum_ipv4(priv, m, pullup_len); + + if (error == 0) + goto bypass; + else if (error == ENOBUFS) + goto drop; + } +#endif +#ifdef INET6 + if (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV6) + { + error = checksum_ipv6(priv, m, pullup_len); + + if (error == 0) + goto bypass; + else if (error == ENOBUFS) + goto drop; + } +#endif + if (error) + m->m_pkthdr.csum_flags &= + ~(NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6); + + break; + + default: + ERROUT(EINVAL); + } + } + +#undef PULLUP_CHECK + +bypass: + out = NULL; + + if (hook == priv->in) { + /* return frames on 'in' hook if 'out' not connected */ + out = priv->out ? priv->out : priv->in; + } else if (hook == priv->out && priv->in) { + /* pass frames on 'out' hook if 'in' connected */ + out = priv->in; + } + + if (out == NULL) + ERROUT(0); + + NG_FWD_NEW_DATA(error, item, out, m); + + return (error); + +done: +drop: + NG_FREE_ITEM(item); + NG_FREE_M(m); + + priv->stats.dropped++; + + return (error); +} + +static int +ng_checksum_shutdown(node_p node) +{ + const priv_p priv = NG_NODE_PRIVATE(node); + + NG_NODE_SET_PRIVATE(node, NULL); + NG_NODE_UNREF(node); + + if (priv->conf) + free(priv->conf, M_NETGRAPH); + + free(priv, M_NETGRAPH); + + return (0); +} + +static int +ng_checksum_disconnect(hook_p hook) +{ + priv_p priv; + + priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); + + if (hook == priv->in) + priv->in = NULL; + + if (hook == priv->out) + priv->out = NULL; + + if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0 && + NG_NODE_IS_VALID(NG_HOOK_NODE(hook))) /* already shutting down? */ + ng_rmnode_self(NG_HOOK_NODE(hook)); + + return (0); +} Property changes on: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h (nonexistent) +++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h (revision 303642) @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 2015 Dmitry Vagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETGRAPH_NG_CHECKSUM_H_ +#define _NETGRAPH_NG_CHECKSUM_H_ + +/* Node type name. */ +#define NG_CHECKSUM_NODE_TYPE "checksum" + +/* Node type cookie. */ +#define NGM_CHECKSUM_COOKIE 439419912 + +/* Hook names */ +#define NG_CHECKSUM_HOOK_IN "in" +#define NG_CHECKSUM_HOOK_OUT "out" + +/* Checksum flags */ +#define NG_CHECKSUM_CSUM_IPV4 (CSUM_IP|CSUM_TCP|CSUM_UDP) +#define NG_CHECKSUM_CSUM_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6) + +/* Netgraph commands understood by this node type */ +enum { + NGM_CHECKSUM_GETDLT = 1, + NGM_CHECKSUM_SETDLT, + NGM_CHECKSUM_GETCONFIG, + NGM_CHECKSUM_SETCONFIG, + NGM_CHECKSUM_GETCLR_STATS, + NGM_CHECKSUM_GET_STATS, + NGM_CHECKSUM_CLR_STATS, +}; + +/* Parsing declarations */ + +#define NG_CHECKSUM_CONFIG_TYPE { \ + { "csum_flags", &ng_parse_uint64_type }, \ + { "csum_offload", &ng_parse_uint64_type }, \ + { NULL } \ +} + +#define NG_CHECKSUM_STATS_TYPE { \ + { "Received", &ng_parse_uint64_type }, \ + { "Processed", &ng_parse_uint64_type }, \ + { "Dropped", &ng_parse_uint64_type }, \ + { NULL } \ +} + +struct ng_checksum_config { + uint64_t csum_flags; + uint64_t csum_offload; +}; + +struct ng_checksum_stats { + uint64_t received; + uint64_t processed; + uint64_t dropped; +}; + +struct ng_checksum_vlan_header { + u_int16_t tag; + u_int16_t etype; +}; + +#endif /* _NETGRAPH_NG_CHECKSUM_H_ */ Property changes on: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.c (revision 303642) @@ -1,576 +1,579 @@ /*- - * Copyright (C) 2010 by Maxim Ignatenko + * Copyright (c) 2010 Maxim Ignatenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include static ng_constructor_t ng_patch_constructor; static ng_rcvmsg_t ng_patch_rcvmsg; static ng_shutdown_t ng_patch_shutdown; static ng_newhook_t ng_patch_newhook; static ng_rcvdata_t ng_patch_rcvdata; static ng_disconnect_t ng_patch_disconnect; static int ng_patch_config_getlen(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { - const struct ng_patch_config *p; + const struct ng_patch_config *conf; - p = (const struct ng_patch_config *)(buf - + conf = (const struct ng_patch_config *)(buf - offsetof(struct ng_patch_config, ops)); - return (p->count); + + return (conf->count); } static const struct ng_parse_struct_field ng_patch_op_type_fields[] = NG_PATCH_OP_TYPE_INFO; static const struct ng_parse_type ng_patch_op_type = { &ng_parse_struct_type, &ng_patch_op_type_fields }; -static const struct ng_parse_array_info ng_patch_confarr_info = { +static const struct ng_parse_array_info ng_patch_ops_array_info = { &ng_patch_op_type, &ng_patch_config_getlen }; -static const struct ng_parse_type ng_patch_confarr_type = { +static const struct ng_parse_type ng_patch_ops_array_type = { &ng_parse_array_type, - &ng_patch_confarr_info + &ng_patch_ops_array_info }; static const struct ng_parse_struct_field ng_patch_config_type_fields[] = NG_PATCH_CONFIG_TYPE_INFO; static const struct ng_parse_type ng_patch_config_type = { &ng_parse_struct_type, &ng_patch_config_type_fields }; static const struct ng_parse_struct_field ng_patch_stats_fields[] = NG_PATCH_STATS_TYPE_INFO; static const struct ng_parse_type ng_patch_stats_type = { &ng_parse_struct_type, &ng_patch_stats_fields }; static const struct ng_cmdlist ng_patch_cmdlist[] = { { NGM_PATCH_COOKIE, NGM_PATCH_GETCONFIG, "getconfig", NULL, &ng_patch_config_type }, { NGM_PATCH_COOKIE, NGM_PATCH_SETCONFIG, "setconfig", &ng_patch_config_type, NULL }, { NGM_PATCH_COOKIE, NGM_PATCH_GET_STATS, "getstats", NULL, &ng_patch_stats_type }, { NGM_PATCH_COOKIE, NGM_PATCH_CLR_STATS, "clrstats", NULL, NULL }, { NGM_PATCH_COOKIE, NGM_PATCH_GETCLR_STATS, "getclrstats", NULL, &ng_patch_stats_type }, { 0 } }; static struct ng_type typestruct = { .version = NG_ABI_VERSION, .name = NG_PATCH_NODE_TYPE, .constructor = ng_patch_constructor, .rcvmsg = ng_patch_rcvmsg, .shutdown = ng_patch_shutdown, .newhook = ng_patch_newhook, .rcvdata = ng_patch_rcvdata, .disconnect = ng_patch_disconnect, .cmdlist = ng_patch_cmdlist, }; + NETGRAPH_INIT(patch, &typestruct); union patch_val { uint8_t v1; uint16_t v2; uint32_t v4; uint64_t v8; }; +/* private data */ struct ng_patch_priv { hook_p in; hook_p out; struct ng_patch_config *config; union patch_val *val; struct ng_patch_stats stats; }; typedef struct ng_patch_priv *priv_p; #define NG_PATCH_CONF_SIZE(count) (sizeof(struct ng_patch_config) + \ (count) * sizeof(struct ng_patch_op)) static void do_patch(priv_p conf, struct mbuf *m); static int ng_patch_constructor(node_p node) { priv_p privdata; privdata = malloc(sizeof(*privdata), M_NETGRAPH, M_WAITOK | M_ZERO); NG_NODE_SET_PRIVATE(node, privdata); privdata->in = NULL; privdata->out = NULL; privdata->config = NULL; return (0); } static int ng_patch_newhook(node_p node, hook_p hook, const char *name) { const priv_p privp = NG_NODE_PRIVATE(node); if (strncmp(name, NG_PATCH_HOOK_IN, strlen(NG_PATCH_HOOK_IN)) == 0) { privp->in = hook; } else if (strncmp(name, NG_PATCH_HOOK_OUT, strlen(NG_PATCH_HOOK_OUT)) == 0) { privp->out = hook; } else return (EINVAL); return(0); } static int ng_patch_rcvmsg(node_p node, item_p item, hook_p lasthook) { const priv_p privp = NG_NODE_PRIVATE(node); struct ng_patch_config *conf, *newconf; union patch_val *newval; struct ng_mesg *msg; struct ng_mesg *resp; int i, clear, error; clear = error = 0; resp = NULL; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { case NGM_PATCH_COOKIE: switch (msg->header.cmd) { case NGM_PATCH_GETCONFIG: if (privp->config == NULL) break; NG_MKRESPONSE(resp, msg, NG_PATCH_CONF_SIZE(privp->config->count), M_WAITOK); bcopy(privp->config, resp->data, NG_PATCH_CONF_SIZE(privp->config->count)); break; case NGM_PATCH_SETCONFIG: { if (msg->header.arglen < sizeof(struct ng_patch_config)) { error = EINVAL; break; } conf = (struct ng_patch_config *)msg->data; if (msg->header.arglen < NG_PATCH_CONF_SIZE(conf->count)) { error = EINVAL; break; } for(i = 0; i < conf->count; i++) { switch(conf->ops[i].length) { case 1: case 2: case 4: case 8: break; default: error = EINVAL; break; } if (error != 0) break; } conf->csum_flags &= CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP; if (error == 0) { newconf = malloc( NG_PATCH_CONF_SIZE(conf->count), M_NETGRAPH, M_WAITOK); newval = malloc(conf->count * sizeof(union patch_val), M_NETGRAPH, M_WAITOK); for(i = 0; i < conf->count; i++) { switch (conf->ops[i].length) { case 1: newval[i].v1 = conf->ops[i].value; break; case 2: newval[i].v2 = conf->ops[i].value; break; case 4: newval[i].v4 = conf->ops[i].value; break; case 8: newval[i].v8 = conf->ops[i].value; break; } } bcopy(conf, newconf, NG_PATCH_CONF_SIZE(conf->count)); if (privp->val != NULL) free(privp->val, M_NETGRAPH); privp->val = newval; if (privp->config != NULL) free(privp->config, M_NETGRAPH); privp->config = newconf; } break; } case NGM_PATCH_GETCLR_STATS: clear = 1; /* FALLTHROUGH */ case NGM_PATCH_GET_STATS: NG_MKRESPONSE(resp, msg, sizeof(struct ng_patch_stats), M_WAITOK); bcopy(&(privp->stats), resp->data, sizeof(struct ng_patch_stats)); if (clear == 0) break; /* else FALLTHROUGH */ case NGM_PATCH_CLR_STATS: bzero(&(privp->stats), sizeof(struct ng_patch_stats)); break; default: error = EINVAL; break; } break; default: error = EINVAL; break; } NG_RESPOND_MSG(error, node, item, resp); NG_FREE_MSG(msg); return(error); } static void do_patch(priv_p privp, struct mbuf *m) { struct ng_patch_config *conf; uint64_t buf; int i, patched; conf = privp->config; patched = 0; for(i = 0; i < conf->count; i++) { if (conf->ops[i].offset + conf->ops[i].length > m->m_pkthdr.len) continue; /* for "=" operation we don't need to copy data from mbuf */ if (conf->ops[i].mode != NG_PATCH_MODE_SET) { m_copydata(m, conf->ops[i].offset, conf->ops[i].length, (caddr_t)&buf); } switch (conf->ops[i].length) { case 1: switch (conf->ops[i].mode) { case NG_PATCH_MODE_SET: *((uint8_t *)&buf) = privp->val[i].v1; break; case NG_PATCH_MODE_ADD: *((uint8_t *)&buf) += privp->val[i].v1; break; case NG_PATCH_MODE_SUB: *((uint8_t *)&buf) -= privp->val[i].v1; break; case NG_PATCH_MODE_MUL: *((uint8_t *)&buf) *= privp->val[i].v1; break; case NG_PATCH_MODE_DIV: *((uint8_t *)&buf) /= privp->val[i].v1; break; case NG_PATCH_MODE_NEG: *((int8_t *)&buf) = - *((int8_t *)&buf); break; case NG_PATCH_MODE_AND: *((uint8_t *)&buf) &= privp->val[i].v1; break; case NG_PATCH_MODE_OR: *((uint8_t *)&buf) |= privp->val[i].v1; break; case NG_PATCH_MODE_XOR: *((uint8_t *)&buf) ^= privp->val[i].v1; break; case NG_PATCH_MODE_SHL: *((uint8_t *)&buf) <<= privp->val[i].v1; break; case NG_PATCH_MODE_SHR: *((uint8_t *)&buf) >>= privp->val[i].v1; break; } break; case 2: *((int16_t *)&buf) = ntohs(*((int16_t *)&buf)); switch (conf->ops[i].mode) { case NG_PATCH_MODE_SET: *((uint16_t *)&buf) = privp->val[i].v2; break; case NG_PATCH_MODE_ADD: *((uint16_t *)&buf) += privp->val[i].v2; break; case NG_PATCH_MODE_SUB: *((uint16_t *)&buf) -= privp->val[i].v2; break; case NG_PATCH_MODE_MUL: *((uint16_t *)&buf) *= privp->val[i].v2; break; case NG_PATCH_MODE_DIV: *((uint16_t *)&buf) /= privp->val[i].v2; break; case NG_PATCH_MODE_NEG: *((int16_t *)&buf) = - *((int16_t *)&buf); break; case NG_PATCH_MODE_AND: *((uint16_t *)&buf) &= privp->val[i].v2; break; case NG_PATCH_MODE_OR: *((uint16_t *)&buf) |= privp->val[i].v2; break; case NG_PATCH_MODE_XOR: *((uint16_t *)&buf) ^= privp->val[i].v2; break; case NG_PATCH_MODE_SHL: *((uint16_t *)&buf) <<= privp->val[i].v2; break; case NG_PATCH_MODE_SHR: *((uint16_t *)&buf) >>= privp->val[i].v2; break; } *((int16_t *)&buf) = htons(*((int16_t *)&buf)); break; case 4: *((int32_t *)&buf) = ntohl(*((int32_t *)&buf)); switch (conf->ops[i].mode) { case NG_PATCH_MODE_SET: *((uint32_t *)&buf) = privp->val[i].v4; break; case NG_PATCH_MODE_ADD: *((uint32_t *)&buf) += privp->val[i].v4; break; case NG_PATCH_MODE_SUB: *((uint32_t *)&buf) -= privp->val[i].v4; break; case NG_PATCH_MODE_MUL: *((uint32_t *)&buf) *= privp->val[i].v4; break; case NG_PATCH_MODE_DIV: *((uint32_t *)&buf) /= privp->val[i].v4; break; case NG_PATCH_MODE_NEG: *((int32_t *)&buf) = - *((int32_t *)&buf); break; case NG_PATCH_MODE_AND: *((uint32_t *)&buf) &= privp->val[i].v4; break; case NG_PATCH_MODE_OR: *((uint32_t *)&buf) |= privp->val[i].v4; break; case NG_PATCH_MODE_XOR: *((uint32_t *)&buf) ^= privp->val[i].v4; break; case NG_PATCH_MODE_SHL: *((uint32_t *)&buf) <<= privp->val[i].v4; break; case NG_PATCH_MODE_SHR: *((uint32_t *)&buf) >>= privp->val[i].v4; break; } *((int32_t *)&buf) = htonl(*((int32_t *)&buf)); break; case 8: *((int64_t *)&buf) = be64toh(*((int64_t *)&buf)); switch (conf->ops[i].mode) { case NG_PATCH_MODE_SET: *((uint64_t *)&buf) = privp->val[i].v8; break; case NG_PATCH_MODE_ADD: *((uint64_t *)&buf) += privp->val[i].v8; break; case NG_PATCH_MODE_SUB: *((uint64_t *)&buf) -= privp->val[i].v8; break; case NG_PATCH_MODE_MUL: *((uint64_t *)&buf) *= privp->val[i].v8; break; case NG_PATCH_MODE_DIV: *((uint64_t *)&buf) /= privp->val[i].v8; break; case NG_PATCH_MODE_NEG: *((int64_t *)&buf) = - *((int64_t *)&buf); break; case NG_PATCH_MODE_AND: *((uint64_t *)&buf) &= privp->val[i].v8; break; case NG_PATCH_MODE_OR: *((uint64_t *)&buf) |= privp->val[i].v8; break; case NG_PATCH_MODE_XOR: *((uint64_t *)&buf) ^= privp->val[i].v8; break; case NG_PATCH_MODE_SHL: *((uint64_t *)&buf) <<= privp->val[i].v8; break; case NG_PATCH_MODE_SHR: *((uint64_t *)&buf) >>= privp->val[i].v8; break; } *((int64_t *)&buf) = htobe64(*((int64_t *)&buf)); break; } m_copyback(m, conf->ops[i].offset, conf->ops[i].length, (caddr_t)&buf); patched = 1; } if (patched > 0) privp->stats.patched++; } static int ng_patch_rcvdata(hook_p hook, item_p item) { const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); struct mbuf *m; hook_p target; int error; priv->stats.received++; NGI_GET_M(item, m); if (priv->config != NULL && hook == priv->in && (m->m_flags & M_PKTHDR) != 0) { m = m_unshare(m,M_NOWAIT); if (m == NULL) { priv->stats.dropped++; NG_FREE_ITEM(item); return (ENOMEM); } do_patch(priv, m); m->m_pkthdr.csum_flags |= priv->config->csum_flags; } target = NULL; if (hook == priv->in) { /* return frames on 'in' hook if 'out' not connected */ if (priv->out != NULL) target = priv->out; else target = priv->in; } if (hook == priv->out && priv->in != NULL) target = priv->in; if (target == NULL) { priv->stats.dropped++; NG_FREE_ITEM(item); NG_FREE_M(m); return (0); } NG_FWD_NEW_DATA(error, item, target, m); return (error); } static int ng_patch_shutdown(node_p node) { const priv_p privdata = NG_NODE_PRIVATE(node); if (privdata->val != NULL) free(privdata->val, M_NETGRAPH); if (privdata->config != NULL) free(privdata->config, M_NETGRAPH); NG_NODE_SET_PRIVATE(node, NULL); NG_NODE_UNREF(node); free(privdata, M_NETGRAPH); return (0); } static int ng_patch_disconnect(hook_p hook) { priv_p priv; priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); if (hook == priv->in) { priv->in = NULL; } if (hook == priv->out) { priv->out = NULL; } if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0 && NG_NODE_IS_VALID(NG_HOOK_NODE(hook))) /* already shutting down? */ ng_rmnode_self(NG_HOOK_NODE(hook)); return (0); } Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.h =================================================================== --- user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.h (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.h (revision 303642) @@ -1,107 +1,107 @@ /*- * Copyright (C) 2010 by Maxim Ignatenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NETGRAPH_NG_PATCH_H_ #define _NETGRAPH_NG_PATCH_H_ /* Node type name. */ #define NG_PATCH_NODE_TYPE "patch" /* Node type cookie. */ #define NGM_PATCH_COOKIE 1262445509 /* Hook names */ #define NG_PATCH_HOOK_IN "in" #define NG_PATCH_HOOK_OUT "out" /* Netgraph commands understood by this node type */ enum { NGM_PATCH_SETCONFIG = 1, NGM_PATCH_GETCONFIG, NGM_PATCH_GET_STATS, NGM_PATCH_CLR_STATS, NGM_PATCH_GETCLR_STATS }; /* Patching modes */ enum { NG_PATCH_MODE_SET = 1, NG_PATCH_MODE_ADD = 2, NG_PATCH_MODE_SUB = 3, NG_PATCH_MODE_MUL = 4, NG_PATCH_MODE_DIV = 5, NG_PATCH_MODE_NEG = 6, NG_PATCH_MODE_AND = 7, - NG_PATCH_MODE_OR = 8, + NG_PATCH_MODE_OR = 8, NG_PATCH_MODE_XOR = 9, NG_PATCH_MODE_SHL = 10, NG_PATCH_MODE_SHR = 11 }; struct ng_patch_op { uint64_t value; uint32_t offset; - uint16_t length; /* 1,2,4 or 8 (bytes) */ + uint16_t length; /* 1, 2, 4 or 8 (bytes) */ uint16_t mode; }; -#define NG_PATCH_OP_TYPE_INFO { \ - { "value", &ng_parse_uint64_type }, \ - { "offset", &ng_parse_uint32_type }, \ - { "length", &ng_parse_uint16_type }, \ - { "mode", &ng_parse_uint16_type }, \ - { NULL } \ +#define NG_PATCH_OP_TYPE_INFO { \ + { "value", &ng_parse_uint64_type }, \ + { "offset", &ng_parse_uint32_type }, \ + { "length", &ng_parse_uint16_type }, \ + { "mode", &ng_parse_uint16_type }, \ + { NULL } \ } struct ng_patch_config { uint32_t count; uint32_t csum_flags; struct ng_patch_op ops[]; }; -#define NG_PATCH_CONFIG_TYPE_INFO { \ - { "count", &ng_parse_uint32_type }, \ - { "csum_flags", &ng_parse_uint32_type }, \ - { "ops", &ng_patch_confarr_type }, \ - { NULL } \ +#define NG_PATCH_CONFIG_TYPE_INFO { \ + { "count", &ng_parse_uint32_type }, \ + { "csum_flags", &ng_parse_uint64_type }, \ + { "ops", &ng_patch_ops_array_type }, \ + { NULL } \ } struct ng_patch_stats { uint64_t received; uint64_t patched; uint64_t dropped; }; -#define NG_PATCH_STATS_TYPE_INFO { \ - { "received", &ng_parse_uint64_type }, \ - { "patched", &ng_parse_uint64_type }, \ - { "dropped", &ng_parse_uint64_type }, \ - { NULL } \ +#define NG_PATCH_STATS_TYPE_INFO { \ + { "Received", &ng_parse_uint64_type }, \ + { "Patched", &ng_parse_uint64_type }, \ + { "Dropped", &ng_parse_uint64_type }, \ + { NULL } \ } #endif /* _NETGRAPH_NG_PATCH_H_ */ Index: user/alc/PQ_LAUNDRY/sys/netinet/tcp_subr.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/netinet/tcp_subr.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/netinet/tcp_subr.c (revision 303642) @@ -1,3018 +1,3093 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 +#include #include #include #include #include #include #include #endif #ifdef TCP_RFC7413 #include #endif #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef TCPPCAP #include #endif #ifdef TCPDEBUG #include #endif #ifdef INET6 #include #endif #ifdef TCP_OFFLOAD #include #endif #ifdef IPSEC #include #include #ifdef INET6 #include #endif #include #include #endif /*IPSEC*/ #include #include #include VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS; #ifdef INET6 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif struct rwlock tcp_function_lock; static int sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0, &sysctl_net_inet_tcp_mss_check, "I", "Default TCP Maximum Segment Size"); #ifdef INET6 static int sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_v6mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_v6mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0, &sysctl_net_inet_tcp_mss_v6_check, "I", "Default TCP Maximum Segment Size for IPv6"); #endif /* INET6 */ /* * Minimum MSS we accept and use. This prevents DoS attacks where * we are forced to a ridiculous low MSS like 20 and send hundreds * of packets instead of one. The effect scales with the available * bandwidth and quickly saturates the CPU and network interface * with packet generation and sending. Set to zero to disable MINMSS * checking. This setting prevents us from sending too small packets. */ VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS; SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_minmss), 0, "Minimum TCP Maximum Segment Size"); VNET_DEFINE(int, tcp_do_rfc1323) = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc1323), 0, "Enable rfc1323 (high performance TCP) extensions"); static int tcp_log_debug = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW, &tcp_log_debug, 0, "Log errors caused by incoming TCP segments"); static int tcp_tcbhashsize; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); static int do_tcpdrain = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs"); static VNET_DEFINE(int, icmp_may_rst) = 1; #define V_icmp_may_rst VNET(icmp_may_rst) SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_may_rst), 0, "Certain ICMP unreachable messages may abort connections in SYN_SENT"); static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0; #define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_isn_reseed_interval), 0, "Seconds between reseeding of ISN secret"); static int tcp_soreceive_stream; SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN, &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets"); #ifdef TCP_SIGNATURE static int tcp_sig_checksigs = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW, &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic"); #endif VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); static struct inpcb *tcp_notify(struct inpcb *, int); static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int); static void tcp_mtudisc(struct inpcb *, int); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); static struct tcp_function_block tcp_def_funcblk = { "default", tcp_output, tcp_do_segment, tcp_default_ctloutput, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0 }; int t_functions_inited = 0; struct tcp_funchead t_functions; static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk; static void init_tcp_functions(void) { if (t_functions_inited == 0) { TAILQ_INIT(&t_functions); rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0); t_functions_inited = 1; } } static struct tcp_function_block * find_tcp_functions_locked(struct tcp_function_set *fs) { struct tcp_function *f; struct tcp_function_block *blk=NULL; TAILQ_FOREACH(f, &t_functions, tf_next) { if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) { blk = f->tf_fb; break; } } return(blk); } static struct tcp_function_block * find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s) { struct tcp_function_block *rblk=NULL; struct tcp_function *f; TAILQ_FOREACH(f, &t_functions, tf_next) { if (f->tf_fb == blk) { rblk = blk; if (s) { *s = f; } break; } } return (rblk); } struct tcp_function_block * find_and_ref_tcp_functions(struct tcp_function_set *fs) { struct tcp_function_block *blk; rw_rlock(&tcp_function_lock); blk = find_tcp_functions_locked(fs); if (blk) refcount_acquire(&blk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(blk); } struct tcp_function_block * find_and_ref_tcp_fb(struct tcp_function_block *blk) { struct tcp_function_block *rblk; rw_rlock(&tcp_function_lock); rblk = find_tcp_fb_locked(blk, NULL); if (rblk) refcount_acquire(&rblk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(rblk); } static int sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) { int error=ENOENT; struct tcp_function_set fs; struct tcp_function_block *blk; memset(&fs, 0, sizeof(fs)); rw_rlock(&tcp_function_lock); blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL); if (blk) { /* Found him */ strcpy(fs.function_set_name, blk->tfb_tcp_block_name); fs.pcbcnt = blk->tfb_refcnt; } rw_runlock(&tcp_function_lock); error = sysctl_handle_string(oidp, fs.function_set_name, sizeof(fs.function_set_name), req); /* Check for error or no change */ if (error != 0 || req->newptr == NULL) return(error); rw_wlock(&tcp_function_lock); blk = find_tcp_functions_locked(&fs); if ((blk == NULL) || (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { error = ENOENT; goto done; } tcp_func_set_ptr = blk; done: rw_wunlock(&tcp_function_lock); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_net_inet_default_tcp_functions, "A", "Set/get the default TCP functions"); static int sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS) { int error, cnt, linesz; struct tcp_function *f; char *buffer, *cp; size_t bufsz, outsz; cnt = 0; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { cnt++; } rw_runlock(&tcp_function_lock); bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1; buffer = malloc(bufsz, M_TEMP, M_WAITOK); error = 0; cp = buffer; linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count"); cp += linesz; bufsz -= linesz; outsz = linesz; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { linesz = snprintf(cp, bufsz, "%-32s%c %u\n", f->tf_fb->tfb_tcp_block_name, (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ', f->tf_fb->tfb_refcnt); if (linesz >= bufsz) { error = EOVERFLOW; break; } cp += linesz; bufsz -= linesz; outsz += linesz; } rw_runlock(&tcp_function_lock); if (error == 0) error = sysctl_handle_string(oidp, buffer, outsz + 1, req); free(buffer, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available, CTLTYPE_STRING|CTLFLAG_RD, NULL, 0, sysctl_net_inet_list_available, "A", "list available TCP Function sets"); /* * Target size of TCP PCB hash tables. Must be a power of two. * * Note that this can be overridden by the kernel environment * variable net.inet.tcp.tcbhashsize */ #ifndef TCBHASHSIZE #define TCBHASHSIZE 0 #endif /* * XXX * Callouts should be moved into struct tcp directly. They are currently * separate because the tcpcb structure is exported to userland for sysctl * parsing purposes, which do not know about callouts. */ struct tcpcb_mem { struct tcpcb tcb; struct tcp_timer tt; struct cc_var ccv; struct osd osd; }; static VNET_DEFINE(uma_zone_t, tcpcb_zone); #define V_tcpcb_zone VNET(tcpcb_zone) MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory"); static struct mtx isn_mtx; #define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF) #define ISN_LOCK() mtx_lock(&isn_mtx) #define ISN_UNLOCK() mtx_unlock(&isn_mtx) /* * TCP initialization. */ static void tcp_zone_change(void *tag) { uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets); uma_zone_set_max(V_tcpcb_zone, maxsockets); tcp_tw_zone_change(); } static int tcp_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "tcpinp"); return (0); } /* * Take a value and get the next power of 2 that doesn't overflow. * Used to size the tcp_inpcb hash buckets. */ static int maketcp_hashsize(int size) { int hashsize; /* * auto tune. * get the next power of 2 higher than maxsockets. */ hashsize = 1 << fls(size); /* catch overflow, and just go one power of 2 smaller */ if (hashsize < size) { hashsize = 1 << (fls(size) - 1); } return (hashsize); } int register_tcp_functions(struct tcp_function_block *blk, int wait) { struct tcp_function_block *lblk; struct tcp_function *n; struct tcp_function_set fs; if (t_functions_inited == 0) { init_tcp_functions(); } if ((blk->tfb_tcp_output == NULL) || (blk->tfb_tcp_do_segment == NULL) || (blk->tfb_tcp_ctloutput == NULL) || (strlen(blk->tfb_tcp_block_name) == 0)) { /* * These functions are required and you * need a name. */ return (EINVAL); } if (blk->tfb_tcp_timer_stop_all || blk->tfb_tcp_timer_activate || blk->tfb_tcp_timer_active || blk->tfb_tcp_timer_stop) { /* * If you define one timer function you * must have them all. */ if ((blk->tfb_tcp_timer_stop_all == NULL) || (blk->tfb_tcp_timer_activate == NULL) || (blk->tfb_tcp_timer_active == NULL) || (blk->tfb_tcp_timer_stop == NULL)) { return (EINVAL); } } n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait); if (n == NULL) { return (ENOMEM); } n->tf_fb = blk; strcpy(fs.function_set_name, blk->tfb_tcp_block_name); rw_wlock(&tcp_function_lock); lblk = find_tcp_functions_locked(&fs); if (lblk) { /* Duplicate name space not allowed */ rw_wunlock(&tcp_function_lock); free(n, M_TCPFUNCTIONS); return (EALREADY); } refcount_init(&blk->tfb_refcnt, 0); blk->tfb_flags = 0; TAILQ_INSERT_TAIL(&t_functions, n, tf_next); rw_wunlock(&tcp_function_lock); return(0); } int deregister_tcp_functions(struct tcp_function_block *blk) { struct tcp_function_block *lblk; struct tcp_function *f; int error=ENOENT; if (strcmp(blk->tfb_tcp_block_name, "default") == 0) { /* You can't un-register the default */ return (EPERM); } rw_wlock(&tcp_function_lock); if (blk == tcp_func_set_ptr) { /* You can't free the current default */ rw_wunlock(&tcp_function_lock); return (EBUSY); } if (blk->tfb_refcnt) { /* Still tcb attached, mark it. */ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED; rw_wunlock(&tcp_function_lock); return (EBUSY); } lblk = find_tcp_fb_locked(blk, &f); if (lblk) { /* Found */ TAILQ_REMOVE(&t_functions, f, tf_next); f->tf_fb = NULL; free(f, M_TCPFUNCTIONS); error = 0; } rw_wunlock(&tcp_function_lock); return (error); } void tcp_init(void) { const char *tcbhash_tuneable; int hashsize; tcbhash_tuneable = "net.inet.tcp.tcbhashsize"; if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize); if (hashsize == 0) { /* * Auto tune the hash size based on maxsockets. * A perfect hash would have a 1:1 mapping * (hashsize = maxsockets) however it's been * suggested that O(2) average is better. */ hashsize = maketcp_hashsize(maxsockets / 4); /* * Our historical default is 512, * do not autotune lower than this. */ if (hashsize < 512) hashsize = 512; if (bootverbose && IS_DEFAULT_VNET(curvnet)) printf("%s: %s auto tuned to %d\n", __func__, tcbhash_tuneable, hashsize); } /* * We require a hashsize to be a power of two. * Previously if it was not a power of two we would just reset it * back to 512, which could be a nasty surprise if you did not notice * the error message. * Instead what we do is clip it to the closest power of two lower * than the specified hash value. */ if (!powerof2(hashsize)) { int oldhashsize = hashsize; hashsize = maketcp_hashsize(hashsize); /* prevent absurdly low value */ if (hashsize < 16) hashsize = 16; printf("%s: WARNING: TCB hash size not a power of 2, " "clipped from %d to %d.\n", __func__, oldhashsize, hashsize); } in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, "tcp_inpcb", tcp_inpcb_init, NULL, 0, IPI_HASHFIELDS_4TUPLE); /* * These have to be type stable for the benefit of the timers. */ V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_zone_set_max(V_tcpcb_zone, maxsockets); uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached"); tcp_tw_init(); syncache_init(); tcp_hc_init(); TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; tcp_reass_global_init(); /* XXX virtualize those bellow? */ tcp_delacktime = TCPTV_DELACK; tcp_keepinit = TCPTV_KEEP_INIT; tcp_keepidle = TCPTV_KEEP_IDLE; tcp_keepintvl = TCPTV_KEEPINTVL; tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; tcp_rexmit_min = TCPTV_MIN; if (tcp_rexmit_min < 1) tcp_rexmit_min = 1; tcp_persmin = TCPTV_PERSMIN; tcp_persmax = TCPTV_PERSMAX; tcp_rexmit_slop = TCPTV_CPU_VAR; tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; tcp_tcbhashsize = hashsize; /* Setup the tcp function block list */ init_tcp_functions(); register_tcp_functions(&tcp_def_funcblk, M_WAITOK); if (tcp_soreceive_stream) { #ifdef INET tcp_usrreqs.pru_soreceive = soreceive_stream; #endif #ifdef INET6 tcp6_usrreqs.pru_soreceive = soreceive_stream; #endif /* INET6 */ } #ifdef INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) #endif /* INET6 */ if (max_protohdr < TCP_MINPROTOHDR) max_protohdr = TCP_MINPROTOHDR; if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR ISN_LOCK_INIT(); EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, EVENTHANDLER_PRI_ANY); #ifdef TCPPCAP tcp_pcap_init(); #endif #ifdef TCP_RFC7413 tcp_fastopen_init(); #endif } #ifdef VIMAGE static void tcp_destroy(void *unused __unused) { int error, n; /* * All our processes are gone, all our sockets should be cleaned * up, which means, we should be past the tcp_discardcb() calls. * Sleep to let all tcpcb timers really disappear and cleanup. */ for (;;) { INP_LIST_RLOCK(&V_tcbinfo); n = V_tcbinfo.ipi_count; INP_LIST_RUNLOCK(&V_tcbinfo); if (n == 0) break; pause("tcpdes", hz / 10); } tcp_hc_destroy(); syncache_destroy(); tcp_tw_destroy(); in_pcbinfo_destroy(&V_tcbinfo); /* tcp_discardcb() clears the sack_holes up. */ uma_zdestroy(V_sack_hole_zone); uma_zdestroy(V_tcpcb_zone); #ifdef TCP_RFC7413 /* * Cannot free the zone until all tcpcbs are released as we attach * the allocations to them. */ tcp_fastopen_destroy(); #endif error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error); } error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error); } } VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL); #endif void tcp_fini(void *xtp) { } /* * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. * tcp_template used to store this data in mbufs, but we now recopy it out * of the tcpcb each time to conserve mbufs. */ void tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) { struct tcphdr *th = (struct tcphdr *)tcp_ptr; INP_WLOCK_ASSERT(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { struct ip6_hdr *ip6; ip6 = (struct ip6_hdr *)ip_ptr; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { struct ip *ip; ip = (struct ip *)ip_ptr; ip->ip_v = IPVERSION; ip->ip_hl = 5; ip->ip_tos = inp->inp_ip_tos; ip->ip_len = 0; ip->ip_id = 0; ip->ip_off = 0; ip->ip_ttl = inp->inp_ip_ttl; ip->ip_sum = 0; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } #endif /* INET */ th->th_sport = inp->inp_lport; th->th_dport = inp->inp_fport; th->th_seq = 0; th->th_ack = 0; th->th_x2 = 0; th->th_off = 5; th->th_flags = 0; th->th_win = 0; th->th_urp = 0; th->th_sum = 0; /* in_pseudo() is called later for ipv4 */ } /* * Create template to be used to send tcp packets on a connection. * Allocates an mbuf and fills in a skeletal tcp/ip header. The only * use for this function is in keepalives, which use tcp_respond. */ struct tcptemp * tcpip_maketemplate(struct inpcb *inp) { struct tcptemp *t; t = malloc(sizeof(*t), M_TEMP, M_NOWAIT); if (t == NULL) return (NULL); tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); return (t); } /* * Send a single message to the TCP at address specified by * the given TCP/IP header. If m == NULL, then we make a copy * of the tcpiphdr at th and send directly to the addressed host. * This is used to force keep alive messages out using the TCP * template for a connection. If flags are given then we send * a message back to the TCP which originated the segment th, * and discard the mbuf containing it and any other attached mbufs. * * In any case the ack and sequence number of the transmitted * segment are as specified by the parameters. * * NOTE: If m != NULL, then th must point to *inside* the mbuf. */ void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags) { struct tcpopt to; struct inpcb *inp; struct ip *ip; struct mbuf *optm; struct tcphdr *nth; u_char *optp; #ifdef INET6 struct ip6_hdr *ip6; int isipv6; #endif /* INET6 */ int optlen, tlen, win; bool incl_opts; KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL")); #ifdef INET6 isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4); ip6 = ipgen; #endif /* INET6 */ ip = ipgen; if (tp != NULL) { inp = tp->t_inpcb; KASSERT(inp != NULL, ("tcp control block w/o inpcb")); INP_WLOCK_ASSERT(inp); } else inp = NULL; incl_opts = false; win = 0; if (tp != NULL) { if (!(flags & TH_RST)) { win = sbspace(&inp->inp_socket->so_rcv); if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; } if ((tp->t_flags & TF_NOOPT) == 0) incl_opts = true; } if (m == NULL) { m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; m->m_data += max_linkhdr; #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; } else if (!M_WRITABLE(m)) { struct mbuf *n; /* Can't reuse 'm', allocate a new mbuf. */ n = m_gethdr(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } if (!m_dup_pkthdr(n, m, M_NOWAIT)) { m_freem(m); m_freem(n); return; } n->m_data += max_linkhdr; /* m_len is set later */ #define xchg(a,b,type) { type t; t=a; a=b; b=t; } #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(n, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(n, struct ip6_hdr *); xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip)); ip = mtod(n, struct ip *); xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); xchg(nth->th_dport, nth->th_sport, uint16_t); th = nth; m_freem(m); m = n; } else { /* * reuse the mbuf. * XXX MRT We inherit the FIB, which is lucky. */ m_freem(m->m_next); m->m_next = NULL; m->m_data = (caddr_t)ipgen; /* m_len is set later */ #ifdef INET6 if (isipv6) { xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); } if (th != nth) { /* * this is usually a case when an extension header * exists between the IPv6 header and the * TCP header. */ nth->th_sport = th->th_sport; nth->th_dport = th->th_dport; } xchg(nth->th_dport, nth->th_sport, uint16_t); #undef xchg } tlen = 0; #ifdef INET6 if (isipv6) tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr); #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET tlen = sizeof (struct tcpiphdr); #endif #ifdef INVARIANTS m->m_len = 0; KASSERT(M_TRAILINGSPACE(m) >= tlen, ("Not enough trailing space for message (m=%p, need=%d, have=%ld)", m, tlen, (long)M_TRAILINGSPACE(m))); #endif m->m_len = tlen; to.to_flags = 0; if (incl_opts) { /* Make sure we have room. */ if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) { m->m_next = m_get(M_NOWAIT, MT_DATA); if (m->m_next) { optp = mtod(m->m_next, u_char *); optm = m->m_next; } else incl_opts = false; } else { optp = (u_char *) (nth + 1); optm = m; } } if (incl_opts) { /* Timestamps. */ if (tp->t_flags & TF_RCVD_TSTMP) { to.to_tsval = tcp_ts_getticks() + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; } #ifdef TCP_SIGNATURE /* TCP-MD5 (RFC2385). */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* Add the options. */ tlen += optlen = tcp_addoptions(&to, optp); /* Update m_len in the correct mbuf. */ optm->m_len += optlen; } else optlen = 0; #ifdef INET6 if (isipv6) { ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(tlen - sizeof(*ip6)); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { ip->ip_len = htons(tlen); ip->ip_ttl = V_ip_defttl; if (V_path_mtu_discovery) ip->ip_off |= htons(IP_DF); } #endif m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = NULL; #ifdef MAC if (inp != NULL) { /* * Packet is associated with a socket, so allow the * label of the response to reflect the socket label. */ INP_WLOCK_ASSERT(inp); mac_inpcb_create_mbuf(inp, m); } else { /* * Packet is not associated with a socket, so possibly * update the label in place. */ mac_netinet_tcp_reply(m); } #endif nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2; nth->th_flags = flags; if (tp != NULL) nth->th_win = htons((u_short) (win >> tp->rcv_scale)); else nth->th_win = htons((u_short)win); nth->th_urp = 0; #ifdef TCP_SIGNATURE if (to.to_flags & TOF_SIGNATURE) { tcp_signature_compute(m, 0, 0, optlen, to.to_signature, IPSEC_DIR_OUTBOUND); } #endif m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; nth->th_sum = in6_cksum_pseudo(ip6, tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : NULL, NULL); } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { m->m_pkthdr.csum_flags = CSUM_TCP; nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); } #endif /* INET */ #ifdef TCPDEBUG if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif TCP_PROBE3(debug__output, tp, th, mtod(m, const char *)); if (flags & TH_RST) TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *), tp, nth); TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth); #ifdef INET6 if (isipv6) (void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp); #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET (void) ip_output(m, NULL, NULL, 0, NULL, inp); #endif } /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument * protocol control block. The `inp' parameter must have * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(struct inpcb *inp) { struct tcpcb_mem *tm; struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO); if (tm == NULL) return (NULL); tp = &tm->tcb; /* Initialise cc_var struct for this tcpcb. */ tp->ccv = &tm->ccv; tp->ccv->type = IPPROTO_TCP; tp->ccv->ccvc.tcp = tp; rw_rlock(&tcp_function_lock); tp->t_fb = tcp_func_set_ptr; refcount_acquire(&tp->t_fb->tfb_refcnt); rw_runlock(&tcp_function_lock); if (tp->t_fb->tfb_tcp_fb_init) { (*tp->t_fb->tfb_tcp_fb_init)(tp); } /* * Use the current system default CC algorithm. */ CC_LIST_RLOCK(); KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!")); CC_ALGO(tp) = CC_DEFAULT(); CC_LIST_RUNLOCK(); if (CC_ALGO(tp)->cb_init != NULL) if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } tp->osd = &tm->osd; if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } #ifdef VIMAGE tp->t_vnet = inp->inp_vnet; #endif tp->t_timers = &tm->tt; /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = #ifdef INET6 isipv6 ? V_tcp_v6mssdflt : #endif /* INET6 */ V_tcp_mssdflt; /* Set up our timeouts. */ callout_init(&tp->t_timers->tt_rexmt, 1); callout_init(&tp->t_timers->tt_persist, 1); callout_init(&tp->t_timers->tt_keep, 1); callout_init(&tp->t_timers->tt_2msl, 1); callout_init(&tp->t_timers->tt_delack, 1); if (V_tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (V_tcp_do_sack) tp->t_flags |= TF_SACK_PERMIT; TAILQ_INIT(&tp->snd_holes); /* * The tcpcb will hold a reference on its inpcb until tcp_discardcb() * is called. */ in_pcbref(inp); /* Reference for tcpcb */ tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives * reasonable initial retransmit time. */ tp->t_srtt = TCPTV_SRTTBASE; tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; tp->t_rttmin = tcp_rexmit_min; tp->t_rxtcur = TCPTV_RTOBASE; tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; inp->inp_ppcb = tp; #ifdef TCPPCAP /* * Init the TCP PCAP queues. */ tcp_pcap_tcpcb_init(tp); #endif return (tp); /* XXX */ } /* * Switch the congestion control algorithm back to NewReno for any active * control blocks using an algorithm which is about to go away. * This ensures the CC framework can allow the unload to proceed without leaving * any dangling pointers which would trigger a panic. * Returning non-zero would inform the CC framework that something went wrong * and it would be unsafe to allow the unload to proceed. However, there is no * way for this to occur with this implementation so we always return zero. */ int tcp_ccalgounload(struct cc_algo *unload_algo) { struct cc_algo *tmpalgo; struct inpcb *inp; struct tcpcb *tp; VNET_ITERATOR_DECL(vnet_iter); /* * Check all active control blocks across all network stacks and change * any that are using "unload_algo" back to NewReno. If "unload_algo" * requires cleanup code to be run, call it. */ VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); INP_INFO_WLOCK(&V_tcbinfo); /* * New connections already part way through being initialised * with the CC algo we're removing will not race with this code * because the INP_INFO_WLOCK is held during initialisation. We * therefore don't enter the loop below until the connection * list has stabilised. */ LIST_FOREACH(inp, &V_tcb, inp_list) { INP_WLOCK(inp); /* Important to skip tcptw structs. */ if (!(inp->inp_flags & INP_TIMEWAIT) && (tp = intotcpcb(inp)) != NULL) { /* * By holding INP_WLOCK here, we are assured * that the connection is not currently * executing inside the CC module's functions * i.e. it is safe to make the switch back to * NewReno. */ if (CC_ALGO(tp) == unload_algo) { tmpalgo = CC_ALGO(tp); /* NewReno does not require any init. */ CC_ALGO(tp) = &newreno_cc_algo; if (tmpalgo->cb_destroy != NULL) tmpalgo->cb_destroy(tp->ccv); } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); return (0); } /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ struct tcpcb * tcp_drop(struct tcpcb *tp, int errno) { struct socket *so = tp->t_inpcb->inp_socket; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (TCPS_HAVERCVDSYN(tp->t_state)) { tcp_state_change(tp, TCPS_CLOSED); (void) tp->t_fb->tfb_tcp_output(tp); TCPSTAT_INC(tcps_drops); } else TCPSTAT_INC(tcps_conndrops); if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } void tcp_discardcb(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ int released; INP_WLOCK_ASSERT(inp); /* * Make sure that all of our timers are stopped before we delete the * PCB. * * If stopping a timer fails, we schedule a discard function in same * callout, and the last discard function called will take care of * deleting the tcpcb. */ tp->t_timers->tt_draincnt = 0; tcp_timer_stop(tp, TT_REXMT); tcp_timer_stop(tp, TT_PERSIST); tcp_timer_stop(tp, TT_KEEP); tcp_timer_stop(tp, TT_2MSL); tcp_timer_stop(tp, TT_DELACK); if (tp->t_fb->tfb_tcp_timer_stop_all) { /* * Call the stop-all function of the methods, * this function should call the tcp_timer_stop() * method with each of the function specific timeouts. * That stop will be called via the tfb_tcp_timer_stop() * which should use the async drain function of the * callout system (see tcp_var.h). */ tp->t_fb->tfb_tcp_timer_stop_all(tp); } /* * If we got enough samples through the srtt filter, * save the rtt and rttvar in the routing entry. * 'Enough' is arbitrarily defined as 4 rtt samples. * 4 samples is enough for the srtt filter to converge * to within enough % of the correct value; fewer samples * and we could save a bogus rtt. The danger is not high * as tcp quickly recovers from everything. * XXX: Works very well but needs some more statistics! */ if (tp->t_rttupdated >= 4) { struct hc_metrics_lite metrics; u_long ssthresh; bzero(&metrics, sizeof(metrics)); /* * Update the ssthresh always when the conditions below * are satisfied. This gives us better new start value * for the congestion avoidance for new connections. * ssthresh is only set if packet loss occurred on a session. * * XXXRW: 'so' may be NULL here, and/or socket buffer may be * being torn down. Ideally this code would not use 'so'. */ ssthresh = tp->snd_ssthresh; if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { /* * convert the limit from user data bytes to * packets then to packet data bytes. */ ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; if (ssthresh < 2) ssthresh = 2; ssthresh *= (u_long)(tp->t_maxseg + #ifdef INET6 (isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : #endif sizeof (struct tcpiphdr) #ifdef INET6 ) #endif ); } else ssthresh = 0; metrics.rmx_ssthresh = ssthresh; metrics.rmx_rtt = tp->t_srtt; metrics.rmx_rttvar = tp->t_rttvar; metrics.rmx_cwnd = tp->snd_cwnd; metrics.rmx_sendpipe = 0; metrics.rmx_recvpipe = 0; tcp_hc_update(&inp->inp_inc, &metrics); } /* free the reassembly queue, if any */ tcp_reass_flush(tp); #ifdef TCP_OFFLOAD /* Disconnect offload device, if any. */ if (tp->t_flags & TF_TOE) tcp_offload_detach(tp); #endif tcp_free_sackholes(tp); #ifdef TCPPCAP /* Free the TCP PCAP queues. */ tcp_pcap_drain(&(tp->t_inpkts)); tcp_pcap_drain(&(tp->t_outpkts)); #endif /* Allow the CC algorithm to clean up after itself. */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(tp->ccv); khelp_destroy_osd(tp->osd); CC_ALGO(tp) = NULL; inp->inp_ppcb = NULL; if (tp->t_timers->tt_draincnt == 0) { /* We own the last reference on tcpcb, let's free it. */ if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); released = in_pcbrele_wlocked(inp); KASSERT(!released, ("%s: inp %p should not have been released " "here", __func__, inp)); } } void tcp_timer_discard(void *ptp) { struct inpcb *inp; struct tcpcb *tp; tp = (struct tcpcb *)ptp; CURVNET_SET(tp->t_vnet); INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0, ("%s: tcpcb has to be stopped here", __func__)); tp->t_timers->tt_draincnt--; if (tp->t_timers->tt_draincnt == 0) { /* We own the last reference on this tcpcb, let's free it. */ if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } } INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } /* * Attempt to close a TCP control block, marking it as dropped, and freeing * the socket if we hold the only reference. */ struct tcpcb * tcp_close(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD if (tp->t_state == TCPS_LISTEN) tcp_offload_listen_stop(tp); #endif #ifdef TCP_RFC7413 /* * This releases the TFO pending counter resource for TFO listen * sockets as well as passively-created TFO sockets that transition * from SYN_RECEIVED to CLOSED. */ if (tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; } #endif in_pcbdrop(inp); TCPSTAT_INC(tcps_closed); TCPSTATES_DEC(tp->t_state); KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); so = inp->inp_socket; soisdisconnected(so); if (inp->inp_flags & INP_SOCKREF) { KASSERT(so->so_state & SS_PROTOREF, ("tcp_close: !SS_PROTOREF")); inp->inp_flags &= ~INP_SOCKREF; INP_WUNLOCK(inp); ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state &= ~SS_PROTOREF; sofree(so); return (NULL); } return (tp); } void tcp_drain(void) { VNET_ITERATOR_DECL(vnet_iter); if (!do_tcpdrain) return; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct inpcb *inpb; struct tcpcb *tcpb; /* * Walk the tcpbs, if existing, and flush the reassembly queue, * if there is one... * XXX: The "Net/3" implementation doesn't imply that the TCP * reassembly queue should be flushed, but in a situation * where we're really low on mbufs, this is potentially * useful. */ INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { if (inpb->inp_flags & INP_TIMEWAIT) continue; INP_WLOCK(inpb); if ((tcpb = intotcpcb(inpb)) != NULL) { tcp_reass_flush(tcpb); tcp_clean_sackreport(tcpb); #ifdef TCPPCAP if (tcp_pcap_aggressive_free) { /* Free the TCP PCAP queues. */ tcp_pcap_drain(&(tcpb->t_inpkts)); tcp_pcap_drain(&(tcpb->t_outpkts)); } #endif } INP_WUNLOCK(inpb); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Notify a tcp user of an asynchronous error; * store error as soft error, but wake up user * (for now, won't do anything until can select for soft error). * * Do not wake up user since there currently is no mechanism for * reporting soft errors (yet - a kqueue filter may be added). */ static struct inpcb * tcp_notify(struct inpcb *inp, int error) { struct tcpcb *tp; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_notify: tp == NULL")); /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { if (inp->inp_route.ro_rt) { RTFREE(inp->inp_route.ro_rt); inp->inp_route.ro_rt = (struct rtentry *)NULL; } return (inp); } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) { tp = tcp_drop(tp, error); if (tp != NULL) return (inp); else return (NULL); } else { tp->t_softerror = error; return (inp); } #if 0 wakeup( &so->so_timeo); sorwakeup(so); sowwakeup(so); #endif } static int tcp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, m, n, pcb_count; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = V_tcbinfo.ipi_count + counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ INP_LIST_RLOCK(&V_tcbinfo); gencnt = V_tcbinfo.ipi_gencnt; n = V_tcbinfo.ipi_count; INP_LIST_RUNLOCK(&V_tcbinfo); m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + (n + m) * sizeof(struct xtcpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n + m; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); error = syncache_pcblist(req, m, &pcb_count); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); INP_INFO_WLOCK(&V_tcbinfo); for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt) { /* * XXX: This use of cr_cansee(), introduced with * TCP state changes, is not quite right, but for * now, better than nothing. */ if (inp->inp_flags & INP_TIMEWAIT) { if (intotw(inp) != NULL) error = cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred); else error = EINVAL; /* Skip this inp. */ } else error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) { in_pcbref(inp); inp_list[i++] = inp; } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; void *inp_ppcb; bzero(&xt, sizeof(xt)); xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb == NULL) bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); else if (inp->inp_flags & INP_TIMEWAIT) { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); xt.xt_tp.t_state = TCPS_TIME_WAIT; } else { bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); if (xt.xt_tp.t_timers) tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer); } if (inp->inp_socket != NULL) sotoxsocket(inp->inp_socket, &xt.xt_socket); else { bzero(&xt.xt_socket, sizeof xt.xt_socket); xt.xt_socket.xso_protocol = IPPROTO_TCP; } xt.xt_inp.inp_gencnt = inp->inp_gencnt; INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xt, sizeof xt); } else INP_RUNLOCK(inp); } INP_INFO_RLOCK(&V_tcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_RUNLOCK(&V_tcbinfo); if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ INP_LIST_RLOCK(&V_tcbinfo); xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_tcbinfo.ipi_count + pcb_count; INP_LIST_RUNLOCK(&V_tcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); #ifdef INET static int tcp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp_getcred, "S,xucred", "Get the xucred of a TCP connection"); #endif /* INET */ #ifdef INET6 static int tcp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; #ifdef INET int mapped = 0; #endif error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) mapped = 1; else #endif return (EINVAL); } #ifdef INET if (mapped == 1) inp = in_pcblookup(&V_tcbinfo, *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], addrs[1].sin6_port, *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); else #endif inp = in6_pcblookup(&V_tcbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection"); #endif /* INET6 */ #ifdef INET void tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct ip *ip = vip; struct tcphdr *th; struct in_addr faddr; struct inpcb *inp; struct tcpcb *tp; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct icmp *icp; struct in_conninfo inc; tcp_seq icmp_tcp_seq; int mtu; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ else if (cmd == PRC_HOSTDEAD) ip = NULL; else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip == NULL) { in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify); return; } icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); INP_INFO_RLOCK(&V_tcbinfo); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { /* signal EHOSTDOWN, as it flushes the cached route */ inp = (*notify)(inp, EHOSTDOWN); if (inp != NULL) INP_WUNLOCK(inp); } else if (inp != NULL) { if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { icmp_tcp_seq = ntohl(th->th_seq); tp = intotcpcb(inp); if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && SEQ_LT(icmp_tcp_seq, tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* * MTU discovery: * If we got a needfrag set the MTU * in the route to the suggested new * value (if given) and then notify. */ mtu = ntohs(icp->icmp_nextmtu); /* * If no alternative MTU was * proposed, try the next smaller * one. */ if (!mtu) mtu = ip_next_mtu( ntohs(ip->ip_len), 1); if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr)) mtu = V_tcp_minmss + sizeof(struct tcpiphdr); /* * Only process the offered MTU if it * is smaller than the current one. */ if (mtu < tp->t_maxseg + sizeof(struct tcpiphdr)) { bzero(&inc, sizeof(inc)); inc.inc_faddr = faddr; inc.inc_fibnum = inp->inp_inc.inc_fibnum; tcp_hc_updatemtu(&inc, mtu); tcp_mtudisc(inp, mtu); } } else inp = (*notify)(inp, inetctlerrmap[cmd]); } } if (inp != NULL) INP_WUNLOCK(inp); } else { bzero(&inc, sizeof(inc)); inc.inc_fport = th->th_dport; inc.inc_lport = th->th_sport; inc.inc_faddr = faddr; inc.inc_laddr = ip->ip_src; syncache_unreach(&inc, th); } INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET */ #ifdef INET6 void tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { - struct tcphdr th; + struct in6_addr *dst; + struct tcphdr *th; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct ip6_hdr *ip6; struct mbuf *m; + struct inpcb *inp; + struct tcpcb *tp; + struct icmp6_hdr *icmp6; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; - int off; - struct tcp_portonly { - u_int16_t th_sport; - u_int16_t th_dport; - } *thp; + struct in_conninfo inc; + tcp_seq icmp_tcp_seq; + unsigned int mtu; + unsigned int off; + if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; - if (cmd == PRC_MSGSIZE) - notify = tcp_mtudisc_notify; - else if (!PRC_IS_REDIRECT(cmd) && - ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) - return; - /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; + icmp6 = ip6cp->ip6c_icmp6; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; sa6_src = ip6cp->ip6c_src; + dst = ip6cp->ip6c_finaldst; } else { m = NULL; ip6 = NULL; off = 0; /* fool gcc */ sa6_src = &sa6_any; + dst = NULL; } - if (ip6 != NULL) { - struct in_conninfo inc; - /* - * XXX: We assume that when IPV6 is non NULL, - * M and OFF are valid. - */ + if (cmd == PRC_MSGSIZE) + notify = tcp_mtudisc_notify; + else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || + cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && + ip6 != NULL) + notify = tcp_drop_syn_sent; - /* check if we can safely examine src and dst ports */ - if (m->m_pkthdr.len < off + sizeof(*thp)) - return; + /* + * Hostdead is ugly because it goes linearly through all PCBs. + * XXX: We never get this from ICMP, otherwise it makes an + * excellent DoS attack on machines with many connections. + */ + else if (cmd == PRC_HOSTDEAD) + ip6 = NULL; + else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0) + return; - bzero(&th, sizeof(th)); - m_copydata(m, off, sizeof(*thp), (caddr_t)&th); + if (ip6 == NULL) { + in6_pcbnotify(&V_tcbinfo, sa, 0, + (const struct sockaddr *)sa6_src, + 0, cmd, NULL, notify); + return; + } - in6_pcbnotify(&V_tcbinfo, sa, th.th_dport, - (struct sockaddr *)ip6cp->ip6c_src, - th.th_sport, cmd, NULL, notify); + /* Check if we can safely get the ports from the tcp hdr */ + if (m == NULL || + (m->m_pkthdr.len < + (int32_t) (off + offsetof(struct tcphdr, th_seq)))) { + return; + } + th = (struct tcphdr *) mtodo(ip6cp->ip6c_m, ip6cp->ip6c_off); + INP_INFO_RLOCK(&V_tcbinfo); + inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, th->th_dport, + &ip6->ip6_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); + if (inp != NULL && PRC_IS_REDIRECT(cmd)) { + /* signal EHOSTDOWN, as it flushes the cached route */ + inp = (*notify)(inp, EHOSTDOWN); + if (inp != NULL) + INP_WUNLOCK(inp); + } else if (inp != NULL) { + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED) && + !(inp->inp_socket == NULL)) { + icmp_tcp_seq = ntohl(th->th_seq); + tp = intotcpcb(inp); + if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && + SEQ_LT(icmp_tcp_seq, tp->snd_max)) { + if (cmd == PRC_MSGSIZE) { + /* + * MTU discovery: + * If we got a needfrag set the MTU + * in the route to the suggested new + * value (if given) and then notify. + */ + mtu = ntohl(icmp6->icmp6_mtu); + /* + * If no alternative MTU was + * proposed, or the proposed + * MTU was too small, set to + * the min. + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU - 8; + + + bzero(&inc, sizeof(inc)); + inc.inc_fibnum = M_GETFIB(m); + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = *dst; + if (in6_setscope(&inc.inc6_faddr, + m->m_pkthdr.rcvif, NULL)) + goto unlock_inp; + + /* + * Only process the offered MTU if it + * is smaller than the current one. + */ + if (mtu < tp->t_maxseg + + (sizeof (*th) + sizeof (*ip6))) { + tcp_hc_updatemtu(&inc, mtu); + tcp_mtudisc(inp, mtu); + ICMP6STAT_INC(icp6s_pmtuchg); + } + } else + inp = (*notify)(inp, + inet6ctlerrmap[cmd]); + } + } +unlock_inp: + if (inp != NULL) + INP_WUNLOCK(inp); + } else { bzero(&inc, sizeof(inc)); - inc.inc_fport = th.th_dport; - inc.inc_lport = th.th_sport; - inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr; - inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr; + inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; - INP_INFO_RLOCK(&V_tcbinfo); - syncache_unreach(&inc, &th); - INP_INFO_RUNLOCK(&V_tcbinfo); - } else - in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src, - 0, cmd, NULL, notify); + inc.inc_fport = th->th_dport; + inc.inc_lport = th->th_sport; + inc.inc6_faddr = *dst; + inc.inc6_laddr = ip6->ip6_src; + syncache_unreach(&inc, th); + } + INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET6 */ /* * Following is where TCP initial sequence number generation occurs. * * There are two places where we must use initial sequence numbers: * 1. In SYN-ACK packets. * 2. In SYN packets. * * All ISNs for SYN-ACK packets are generated by the syncache. See * tcp_syncache.c for details. * * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling * depends on this property. In addition, these ISNs should be * unguessable so as to prevent connection hijacking. To satisfy * the requirements of this situation, the algorithm outlined in * RFC 1948 is used, with only small modifications. * * Implementation details: * * Time is based off the system timer, and is corrected so that it * increases by one megabyte per second. This allows for proper * recycling on high speed LANs while still leaving over an hour * before rollover. * * As reading the *exact* system time is too expensive to be done * whenever setting up a TCP connection, we increment the time * offset in two ways. First, a small random positive increment * is added to isn_offset for each connection that is set up. * Second, the function tcp_isn_tick fires once per clock tick * and increments isn_offset as necessary so that sequence numbers * are incremented at approximately ISN_BYTES_PER_SECOND. The * random positive increments serve only to ensure that the same * exact sequence number is never sent out twice (as could otherwise * happen when a port is recycled in less than the system tick * interval.) * * net.inet.tcp.isn_reseed_interval controls the number of seconds * between seeding of isn_secret. This is normally set to zero, * as reseeding should not be necessary. * * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In * general, this means holding an exclusive (write) lock. */ #define ISN_BYTES_PER_SECOND 1048576 #define ISN_STATIC_INCREMENT 4096 #define ISN_RANDOM_INCREMENT (4096 - 1) static VNET_DEFINE(u_char, isn_secret[32]); static VNET_DEFINE(int, isn_last); static VNET_DEFINE(int, isn_last_reseed); static VNET_DEFINE(u_int32_t, isn_offset); static VNET_DEFINE(u_int32_t, isn_offset_old); #define V_isn_secret VNET(isn_secret) #define V_isn_last VNET(isn_last) #define V_isn_last_reseed VNET(isn_last_reseed) #define V_isn_offset VNET(isn_offset) #define V_isn_offset_old VNET(isn_offset_old) tcp_seq tcp_new_isn(struct tcpcb *tp) { MD5_CTX isn_ctx; u_int32_t md5_buffer[4]; tcp_seq new_isn; u_int32_t projected_offset; INP_WLOCK_ASSERT(tp->t_inpcb); ISN_LOCK(); /* Seed if this is the first use, reseed if requested. */ if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) && (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz) < (u_int)ticks))) { read_random(&V_isn_secret, sizeof(V_isn_secret)); V_isn_last_reseed = ticks; } /* Compute the md5 hash and return the ISN. */ MD5Init(&isn_ctx); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, sizeof(struct in6_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr, sizeof(struct in6_addr)); } else #endif { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr, sizeof(struct in_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr, sizeof(struct in_addr)); } MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret)); MD5Final((u_char *) &md5_buffer, &isn_ctx); new_isn = (tcp_seq) md5_buffer[0]; V_isn_offset += ISN_STATIC_INCREMENT + (arc4random() & ISN_RANDOM_INCREMENT); if (ticks != V_isn_last) { projected_offset = V_isn_offset_old + ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last); if (SEQ_GT(projected_offset, V_isn_offset)) V_isn_offset = projected_offset; V_isn_offset_old = V_isn_offset; V_isn_last = ticks; } new_isn += V_isn_offset; ISN_UNLOCK(); return (new_isn); } /* * When a specific ICMP unreachable message is received and the * connection state is SYN-SENT, drop the connection. This behavior * is controlled by the icmp_may_rst sysctl. */ struct inpcb * tcp_drop_syn_sent(struct inpcb *inp, int errno) { struct tcpcb *tp; INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); if (tp->t_state != TCPS_SYN_SENT) return (inp); tp = tcp_drop(tp, errno); if (tp != NULL) return (inp); else return (NULL); } /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value. Also nudge TCP to send something, since we * know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ static struct inpcb * tcp_mtudisc_notify(struct inpcb *inp, int error) { tcp_mtudisc(inp, -1); return (inp); } static void tcp_mtudisc(struct inpcb *inp, int mtuoffer) { struct tcpcb *tp; struct socket *so; INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return; tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL")); tcp_mss_update(tp, -1, mtuoffer, NULL, NULL); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); /* If the mss is larger than the socket buffer, decrease the mss. */ if (so->so_snd.sb_hiwat < tp->t_maxseg) tp->t_maxseg = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); TCPSTAT_INC(tcps_mturesent); tp->t_rtttime = 0; tp->snd_nxt = tp->snd_una; tcp_free_sackholes(tp); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_SACK_PERMIT) EXIT_FASTRECOVERY(tp->t_flags); tp->t_fb->tfb_tcp_output(tp); } #ifdef INET /* * Look-up the routing entry to the peer of this inpcb. If no route * is found and it cannot be allocated, then return 0. This routine * is called by TCP routines that access the rmx structure and by * tcp_mss_update to get the peer/interface MTU. */ u_long tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct nhop4_extended nh4; struct ifnet *ifp; u_long maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer")); if (inc->inc_faddr.s_addr != INADDR_ANY) { if (fib4_lookup_nh_ext(inc->inc_fibnum, inc->inc_faddr, NHR_REF, 0, &nh4) != 0) return (0); ifp = nh4.nh_ifp; maxmtu = nh4.nh_mtu; /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO4 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } fib4_free_nh_ext(inc->inc_fibnum, &nh4); } return (maxmtu); } #endif /* INET */ #ifdef INET6 u_long tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct nhop6_extended nh6; struct in6_addr dst6; uint32_t scopeid; struct ifnet *ifp; u_long maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer")); if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid); if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0, 0, &nh6) != 0) return (0); ifp = nh6.nh_ifp; maxmtu = nh6.nh_mtu; /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO6 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } fib6_free_nh_ext(inc->inc_fibnum, &nh6); } return (maxmtu); } #endif /* INET6 */ /* * Calculate effective SMSS per RFC5681 definition for a given TCP * connection at its current state, taking into account SACK and etc. */ u_int tcp_maxseg(const struct tcpcb *tp) { u_int optlen; if (tp->t_flags & TF_NOOPT) return (tp->t_maxseg); /* * Here we have a simplified code from tcp_addoptions(), * without a proper loop, and having most of paddings hardcoded. * We might make mistakes with padding here in some edge cases, * but this is harmless, since result of tcp_maxseg() is used * only in cwnd and ssthresh estimations. */ #define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4) if (TCPS_HAVEESTABLISHED(tp->t_state)) { if (tp->t_flags & TF_RCVD_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = 0; #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { optlen += TCPOLEN_SACKHDR; optlen += tp->rcv_numsacks * TCPOLEN_SACK; optlen = PAD(optlen); } } else { if (tp->t_flags & TF_REQ_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = PAD(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) optlen += PAD(TCPOLEN_WINDOW); #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if (tp->t_flags & TF_SACK_PERMIT) optlen += PAD(TCPOLEN_SACK_PERMITTED); } #undef PAD optlen = min(optlen, TCP_MAXOLEN); return (tp->t_maxseg - optlen); } #ifdef IPSEC /* compute ESP/AH header size for TCP, including outer IP header. */ size_t ipsec_hdrsiz_tcp(struct tcpcb *tp) { struct inpcb *inp; struct mbuf *m; size_t hdrsiz; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif struct tcphdr *th; if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) || (!key_havesp(IPSEC_DIR_OUTBOUND))) return (0); m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) return (0); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); tcpip_fillheaders(inp, ip6, th); hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } else #endif /* INET6 */ { ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); tcpip_fillheaders(inp, ip, th); hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } m_free(m); return (hdrsiz); } #endif /* IPSEC */ #ifdef TCP_SIGNATURE /* * Callback function invoked by m_apply() to digest TCP segment data * contained within an mbuf chain. */ static int tcp_signature_apply(void *fstate, void *data, u_int len) { MD5Update(fstate, (u_char *)data, len); return (0); } /* * XXX The key is retrieved from the system's PF_KEY SADB, by keying a * search with the destination IP address, and a 'magic SPI' to be * determined by the application. This is hardcoded elsewhere to 1179 */ struct secasvar * tcp_get_sav(struct mbuf *m, u_int direction) { union sockaddr_union dst; struct secasvar *sav; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; char ip6buf[INET6_ADDRSTRLEN]; #endif /* Extract the destination from the IP header in the mbuf. */ bzero(&dst, sizeof(union sockaddr_union)); ip = mtod(m, struct ip *); #ifdef INET6 ip6 = NULL; /* Make the compiler happy. */ #endif switch (ip->ip_v) { #ifdef INET case IPVERSION: dst.sa.sa_len = sizeof(struct sockaddr_in); dst.sa.sa_family = AF_INET; dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ? ip->ip_src : ip->ip_dst; break; #endif #ifdef INET6 case (IPV6_VERSION >> 4): ip6 = mtod(m, struct ip6_hdr *); dst.sa.sa_len = sizeof(struct sockaddr_in6); dst.sa.sa_family = AF_INET6; dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ? ip6->ip6_src : ip6->ip6_dst; break; #endif default: return (NULL); /* NOTREACHED */ break; } /* Look up an SADB entry which matches the address of the peer. */ sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI)); if (sav == NULL) { ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__, (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) : #ifdef INET6 (ip->ip_v == (IPV6_VERSION >> 4)) ? ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) : #endif "(unsupported)")); } return (sav); } /* * Compute TCP-MD5 hash of a TCP segment. (RFC2385) * * Parameters: * m pointer to head of mbuf chain * len length of TCP segment data, excluding options * optlen length of TCP segment options * buf pointer to storage for computed MD5 digest * sav pointer to security assosiation * * We do this over ip, tcphdr, segment data, and the key in the SADB. * When called from tcp_input(), we can be sure that th_sum has been * zeroed out and verified already. * * Releases reference to SADB key before return. * * Return 0 if successful, otherwise return -1. * */ int tcp_signature_do_compute(struct mbuf *m, int len, int optlen, u_char *buf, struct secasvar *sav) { #ifdef INET struct ippseudo ippseudo; #endif MD5_CTX ctx; int doff; struct ip *ip; #ifdef INET struct ipovly *ipovly; #endif struct tcphdr *th; #ifdef INET6 struct ip6_hdr *ip6; struct in6_addr in6; uint32_t plen; uint16_t nhdr; #endif u_short savecsum; KASSERT(m != NULL, ("NULL mbuf chain")); KASSERT(buf != NULL, ("NULL signature pointer")); /* Extract the destination from the IP header in the mbuf. */ ip = mtod(m, struct ip *); #ifdef INET6 ip6 = NULL; /* Make the compiler happy. */ #endif MD5Init(&ctx); /* * Step 1: Update MD5 hash with IP(v6) pseudo-header. * * XXX The ippseudo header MUST be digested in network byte order, * or else we'll fail the regression test. Assume all fields we've * been doing arithmetic on have been in host byte order. * XXX One cannot depend on ipovly->ih_len here. When called from * tcp_output(), the underlying ip_len member has not yet been set. */ switch (ip->ip_v) { #ifdef INET case IPVERSION: ipovly = (struct ipovly *)ip; ippseudo.ippseudo_src = ipovly->ih_src; ippseudo.ippseudo_dst = ipovly->ih_dst; ippseudo.ippseudo_pad = 0; ippseudo.ippseudo_p = IPPROTO_TCP; ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) + optlen); MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo)); th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip)); doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen; break; #endif #ifdef INET6 /* * RFC 2385, 2.0 Proposal * For IPv6, the pseudo-header is as described in RFC 2460, namely the * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero- * extended next header value (to form 32 bits), and 32-bit segment * length. * Note: Upper-Layer Packet Length comes before Next Header. */ case (IPV6_VERSION >> 4): in6 = ip6->ip6_src; in6_clearscope(&in6); MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); in6 = ip6->ip6_dst; in6_clearscope(&in6); MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); plen = htonl(len + sizeof(struct tcphdr) + optlen); MD5Update(&ctx, (char *)&plen, sizeof(uint32_t)); nhdr = 0; MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); nhdr = IPPROTO_TCP; MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr)); doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen; break; #endif default: KEY_FREESAV(&sav); return (-1); /* NOTREACHED */ break; } /* * Step 2: Update MD5 hash with TCP header, excluding options. * The TCP checksum must be set to zero. */ savecsum = th->th_sum; th->th_sum = 0; MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); th->th_sum = savecsum; /* * Step 3: Update MD5 hash with TCP segment data. * Use m_apply() to avoid an early m_pullup(). */ if (len > 0) m_apply(m, doff, len, tcp_signature_apply, &ctx); /* * Step 4: Update MD5 hash with shared secret. */ MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); MD5Final(buf, &ctx); key_sa_recordxfer(sav, m); KEY_FREESAV(&sav); return (0); } /* * Compute TCP-MD5 hash of a TCP segment. (RFC2385) * * Return 0 if successful, otherwise return -1. */ int tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen, u_char *buf, u_int direction) { struct secasvar *sav; if ((sav = tcp_get_sav(m, direction)) == NULL) return (-1); return (tcp_signature_do_compute(m, len, optlen, buf, sav)); } /* * Verify the TCP-MD5 hash of a TCP segment. (RFC2385) * * Parameters: * m pointer to head of mbuf chain * len length of TCP segment data, excluding options * optlen length of TCP segment options * buf pointer to storage for computed MD5 digest * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) * * Return 1 if successful, otherwise return 0. */ int tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen, struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) { char tmpdigest[TCP_SIGLEN]; if (tcp_sig_checksigs == 0) return (1); if ((tcpbflag & TF_SIGNATURE) == 0) { if ((to->to_flags & TOF_SIGNATURE) != 0) { /* * If this socket is not expecting signature but * the segment contains signature just fail. */ TCPSTAT_INC(tcps_sig_err_sigopt); TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } /* Signature is not expected, and not present in segment. */ return (1); } /* * If this socket is expecting signature but the segment does not * contain any just fail. */ if ((to->to_flags & TOF_SIGNATURE) == 0) { TCPSTAT_INC(tcps_sig_err_nosigopt); TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0], IPSEC_DIR_INBOUND) == -1) { TCPSTAT_INC(tcps_sig_err_buildsig); TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) { TCPSTAT_INC(tcps_sig_rcvbadsig); return (0); } TCPSTAT_INC(tcps_sig_rcvgoodsig); return (1); } #endif /* TCP_SIGNATURE */ static int sysctl_drop(SYSCTL_HANDLER_ARGS) { /* addrs[0] is a foreign socket, addrs[1] is a local one. */ struct sockaddr_storage addrs[2]; struct inpcb *inp; struct tcpcb *tp; struct tcptw *tw; struct sockaddr_in *fin, *lin; #ifdef INET6 struct sockaddr_in6 *fin6, *lin6; #endif int error; inp = NULL; fin = lin = NULL; #ifdef INET6 fin6 = lin6 = NULL; #endif error = 0; if (req->oldptr != NULL || req->oldlen != 0) return (EINVAL); if (req->newptr == NULL) return (EPERM); if (req->newlen < sizeof(addrs)) return (ENOMEM); error = SYSCTL_IN(req, &addrs, sizeof(addrs)); if (error) return (error); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: fin6 = (struct sockaddr_in6 *)&addrs[0]; lin6 = (struct sockaddr_in6 *)&addrs[1]; if (fin6->sin6_len != sizeof(struct sockaddr_in6) || lin6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) { if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr)) return (EINVAL); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]); fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; break; } error = sa6_embedscope(fin6, V_ip6_use_defzone); if (error) return (error); error = sa6_embedscope(lin6, V_ip6_use_defzone); if (error) return (error); break; #endif #ifdef INET case AF_INET: fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; if (fin->sin_len != sizeof(struct sockaddr_in) || lin->sin_len != sizeof(struct sockaddr_in)) return (EINVAL); break; #endif default: return (EINVAL); } INP_INFO_RLOCK(&V_tcbinfo); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr, fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif #ifdef INET case AF_INET: inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port, lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif } if (inp != NULL) { if (inp->inp_flags & INP_TIMEWAIT) { /* * XXXRW: There currently exists a state where an * inpcb is present, but its timewait state has been * discarded. For now, don't allow dropping of this * type of inpcb. */ tw = intotw(inp); if (tw != NULL) tcp_twclose(tw, 0); else INP_WUNLOCK(inp); } else if (!(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket->so_options & SO_ACCEPTCONN)) { tp = intotcpcb(inp); tp = tcp_drop(tp, ECONNABORTED); if (tp != NULL) INP_WUNLOCK(inp); } else INP_WUNLOCK(inp); } else error = ESRCH; INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL, 0, sysctl_drop, "", "Drop TCP connection"); /* * Generate a standardized TCP log line for use throughout the * tcp subsystem. Memory allocation is done with M_NOWAIT to * allow use in the interrupt context. * * NB: The caller MUST free(s, M_TCPLOG) the returned string. * NB: The function may return NULL if memory allocation failed. * * Due to header inclusion and ordering limitations the struct ip * and ip6_hdr pointers have to be passed as void pointers. */ char * tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_in_vain == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } char * tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_debug == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { char *s, *sp; size_t size; struct ip *ip; #ifdef INET6 const struct ip6_hdr *ip6; ip6 = (const struct ip6_hdr *)ip6hdr; #endif /* INET6 */ ip = (struct ip *)ip4hdr; /* * The log line looks like this: * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2" */ size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") + sizeof(PRINT_TH_FLAGS) + 1 + #ifdef INET6 2 * INET6_ADDRSTRLEN; #else 2 * INET_ADDRSTRLEN; #endif /* INET6 */ s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT); if (s == NULL) return (NULL); strcat(s, "TCP: ["); sp = s + strlen(s); if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) { inet_ntoa_r(inc->inc_faddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); inet_ntoa_r(inc->inc_laddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); #ifdef INET6 } else if (inc) { ip6_sprintf(sp, &inc->inc6_faddr); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); ip6_sprintf(sp, &inc->inc6_laddr); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); } else if (ip6 && th) { ip6_sprintf(sp, &ip6->ip6_src); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); ip6_sprintf(sp, &ip6->ip6_dst); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET6 */ #ifdef INET } else if (ip && th) { inet_ntoa_r(ip->ip_src, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); inet_ntoa_r(ip->ip_dst, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET */ } else { free(s, M_TCPLOG); return (NULL); } sp = s + strlen(s); if (th) sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS); if (*(s + size - 1) != '\0') panic("%s: string too long", __func__); return (s); } /* * A subroutine which makes it easy to track TCP state changes with DTrace. * This function shouldn't be called for t_state initializations that don't * correspond to actual TCP state transitions. */ void tcp_state_change(struct tcpcb *tp, int newstate) { #if defined(KDTRACE_HOOKS) int pstate = tp->t_state; #endif TCPSTATES_DEC(tp->t_state); TCPSTATES_INC(newstate); tp->t_state = newstate; TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate); } Index: user/alc/PQ_LAUNDRY/sys/netinet6/icmp6.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/netinet6/icmp6.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/netinet6/icmp6.c (revision 303642) @@ -1,2872 +1,2870 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #define MBUF_PRIVATE /* XXXRW: Optimisation tries to avoid M_EXT mbufs */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct domain inet6domain; VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat); VNET_PCPUSTAT_SYSINIT(icmp6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(icmp6stat); #endif /* VIMAGE */ VNET_DECLARE(struct inpcbinfo, ripcbinfo); VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(int, icmp6errppslim); static VNET_DEFINE(int, icmp6errpps_count) = 0; static VNET_DEFINE(struct timeval, icmp6errppslim_last); VNET_DECLARE(int, icmp6_nodeinfo); #define V_ripcbinfo VNET(ripcbinfo) #define V_ripcb VNET(ripcb) #define V_icmp6errppslim VNET(icmp6errppslim) #define V_icmp6errpps_count VNET(icmp6errpps_count) #define V_icmp6errppslim_last VNET(icmp6errppslim_last) #define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) static void icmp6_errcount(int, int); static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *, struct in6_addr *); static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *, struct ifnet **, struct in6_addr *); static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int); static int icmp6_notify_error(struct mbuf **, int, int, int); /* * Kernel module interface for updating icmp6stat. The argument is an index * into icmp6stat treated as an array of u_quad_t. While this encodes the * general layout of icmp6stat into the caller, it doesn't encode its * location, so that future changes to add, for example, per-CPU stats * support won't cause binary compatibility problems for kernel modules. */ void kmod_icmp6stat_inc(int statnum) { counter_u64_add(VNET(icmp6stat)[statnum], 1); } static void icmp6_errcount(int type, int code) { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: ICMP6STAT_INC(icp6s_odst_unreach_noroute); return; case ICMP6_DST_UNREACH_ADMIN: ICMP6STAT_INC(icp6s_odst_unreach_admin); return; case ICMP6_DST_UNREACH_BEYONDSCOPE: ICMP6STAT_INC(icp6s_odst_unreach_beyondscope); return; case ICMP6_DST_UNREACH_ADDR: ICMP6STAT_INC(icp6s_odst_unreach_addr); return; case ICMP6_DST_UNREACH_NOPORT: ICMP6STAT_INC(icp6s_odst_unreach_noport); return; } break; case ICMP6_PACKET_TOO_BIG: ICMP6STAT_INC(icp6s_opacket_too_big); return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: ICMP6STAT_INC(icp6s_otime_exceed_transit); return; case ICMP6_TIME_EXCEED_REASSEMBLY: ICMP6STAT_INC(icp6s_otime_exceed_reassembly); return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: ICMP6STAT_INC(icp6s_oparamprob_header); return; case ICMP6_PARAMPROB_NEXTHEADER: ICMP6STAT_INC(icp6s_oparamprob_nextheader); return; case ICMP6_PARAMPROB_OPTION: ICMP6STAT_INC(icp6s_oparamprob_option); return; } break; case ND_REDIRECT: ICMP6STAT_INC(icp6s_oredirect); return; } ICMP6STAT_INC(icp6s_ounknown); } /* * A wrapper function for icmp6_error() necessary when the erroneous packet * may not contain enough scope zone information. */ void icmp6_error2(struct mbuf *m, int type, int code, int param, struct ifnet *ifp) { struct ip6_hdr *ip6; if (ifp == NULL) return; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif ip6 = mtod(m, struct ip6_hdr *); if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) return; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) return; icmp6_error(m, type, code, param); } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(struct mbuf *m, int type, int code, int param) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; ICMP6STAT_INC(icp6s_error); /* count per-type-code statistics */ icmp6_errcount(type, code); #ifdef M_DECRYPTED /*not openbsd*/ if (m->m_flags & M_DECRYPTED) { ICMP6STAT_INC(icp6s_canterror); goto freeit; } #endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif oip6 = mtod(m, struct ip6_hdr *); /* * If the destination address of the erroneous packet is a multicast * address, or the packet was sent using link-layer multicast, * we should basically suppress sending an error (RFC 2463, Section * 2.4). * We have two exceptions (the item e.2 in that section): * - the Packet Too Big message can be sent for path MTU discovery. * - the Parameter Problem Message that can be allowed an icmp6 error * in the option type field. This check has been done in * ip6_unknown_opt(), so we can just check the type and code. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* * RFC 2463, 2.4 (e.5): source address check. * XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ ICMP6STAT_INC(icp6s_canterror); goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { ICMP6STAT_INC(icp6s_toofreq); goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */ if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; in6_clearscope(&oip6->ip6_src); in6_clearscope(&oip6->ip6_dst); icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and output path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m->m_pkthdr.rcvif = NULL; ICMP6STAT_INC(icp6s_outhist[type]); icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ return; freeit: /* * If we can't tell whether or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ int icmp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp, *n; struct ifnet *ifp; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; int icmp6len = m->m_pkthdr.len - *offp; int code, sum, noff; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; int ip6len, error; ifp = m->m_pkthdr.rcvif; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); if (icmp6len < sizeof(struct icmp6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } /* * Check multicast group membership. * Note: SSM filters are not applied for ICMPv6 traffic. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct in6_multi *inm; inm = in6m_lookup(ifp, &ip6->ip6_dst); if (inm == NULL) { IP6STAT_INC(ip6s_notmember); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); goto freeit; } } /* * calculate the checksum */ #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return IPPROTO_DONE; } #endif code = icmp6->icmp6_code; if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, ip6_sprintf(ip6bufs, &ip6->ip6_src))); ICMP6STAT_INC(icp6s_checksum); goto freeit; } ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); icmp6_ifstat_inc(ifp, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(ifp, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: + case ICMP6_DST_UNREACH_ADDR: /* PRC_HOSTDEAD is a DOS */ code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ - break; - case ICMP6_DST_UNREACH_ADDR: - code = PRC_HOSTDEAD; break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; break; case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; break; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig); /* validation is made in icmp6_mtudisc_update */ code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; break; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(ifp, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: code = PRC_TIMXCEED_INTRANS; break; case ICMP6_TIME_EXCEED_REASSEMBLY: code = PRC_TIMXCEED_REASS; break; default: goto badcode; } goto deliver; break; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(ifp, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; break; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(ifp, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { /* Give up remote */ break; } if (!M_WRITABLE(n) || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; int n0len; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN); n = m_gethdr(M_NOWAIT, n0->m_type); if (n == NULL) { /* Give up remote */ m_freem(n0); break; } m_move_pkthdr(n, n0); /* FIB copied. */ n0len = n0->m_pkthdr.len; /* save for use below */ /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); noff = sizeof(struct ip6_hdr); /* new mbuf contains only ipv6+icmpv6 headers */ n->m_len = noff + sizeof(struct icmp6_hdr); /* * Adjust mbuf. ip6_plen will be adjusted in * ip6_output(). */ m_adj(n0, off + sizeof(struct icmp6_hdr)); /* recalculate complete packet size */ n->m_pkthdr.len = n0len + (noff - off); n->m_next = n0; } else { nip6 = mtod(n, struct ip6_hdr *); IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off, sizeof(*nicmp6)); noff = off; } nicmp6->icmp6_type = ICMP6_ECHO_REPLY; nicmp6->icmp6_code = 0; if (n) { ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]); icmp6_reflect(n, noff); } break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(ifp, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: case MLD_LISTENER_DONE: case MLDV2_LISTENER_REPORT: /* * Drop MLD traffic which is not link-local, has a hop limit * of greater than 1 hop, or which does not have the * IPv6 HBH Router Alert option. * As IPv6 HBH options are stripped in ip6_input() we must * check an mbuf header flag. * XXX Should we also sanity check that these messages * were directed to a link-local multicast prefix? */ if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0) goto freeit; if (mld_input(m, off, icmp6len) != 0) return (IPPROTO_DONE); /* m stays. */ break; case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!V_icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), IPPROTO_DONE); #endif n = m_copy(m, 0, M_COPYALL); if (n) n = ni6_input(n, off); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { struct prison *pr; u_char *p; int maxhlen, hlen; /* * XXX: this combination of flags is pointless, * but should we keep this for compatibility? */ if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK)) != (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK)) break; if (code != 0) goto badcode; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN); n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { /* Give up remote */ break; } if (!m_dup_pkthdr(n, m, M_NOWAIT)) { /* * Previous code did a blind M_COPY_PKTHDR * and said "just for rcvif". If true, then * we could tolerate the dup failing (due to * the deep copy of the tag chain). For now * be conservative and just fail. */ m_free(n); n = NULL; } maxhlen = M_TRAILINGSPACE(n) - (sizeof(*nip6) + sizeof(*nicmp6) + 4); pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); hlen = strlen(pr->pr_hostname); if (maxhlen > hlen) maxhlen = hlen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); /* meaningless TTL */ bcopy(pr->pr_hostname, p + 4, maxhlen); mtx_unlock(&pr->pr_mtx); noff = sizeof(struct ip6_hdr); n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } if (n) { ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]); icmp6_reflect(n, noff); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* give up local */ /* Send incoming SeND packet to user space. */ if (send_sendso_input_hook != NULL) { IP6_EXTHDR_CHECK(m, off, icmp6len, IPPROTO_DONE); error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); /* -1 == no app on SEND socket */ if (error == 0) return (IPPROTO_DONE); nd6_rs_input(m, off, icmp6len); } else nd6_rs_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { IP6_EXTHDR_CHECK(n, off, icmp6len, IPPROTO_DONE); error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; /* -1 == no app on SEND socket */ nd6_rs_input(n, off, icmp6len); } else nd6_rs_input(n, off, icmp6len); /* m stays. */ break; case ND_ROUTER_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_ra_input(m, off, icmp6len); } else nd6_ra_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_ra_input(n, off, icmp6len); } else nd6_ra_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_ns_input(m, off, icmp6len); } else nd6_ns_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_ns_input(n, off, icmp6len); } else nd6_ns_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_na_input(m, off, icmp6len); } else nd6_na_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_na_input(n, off, icmp6len); } else nd6_na_input(n, off, icmp6len); /* m stays. */ break; case ND_REDIRECT: icmp6_ifstat_inc(ifp, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); icmp6_redirect_input(m, off); } else icmp6_redirect_input(m, off); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; icmp6_redirect_input(n, off); } else icmp6_redirect_input(n, off); /* m stays. */ break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp ? ifp->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(&m, off, icmp6len, code) != 0) { /* In this case, m should've been freed. */ return (IPPROTO_DONE); } break; badcode: ICMP6STAT_INC(icp6s_badcode); break; badlen: ICMP6STAT_INC(icp6s_badlen); break; } /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, *offp); return IPPROTO_DONE; freeit: m_freem(m); return IPPROTO_DONE; } static int icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) { struct mbuf *m = *mp; struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void (*ctlfunc)(int, struct sockaddr *, void *); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ip6_rthdr0 *rth0; int rthlen; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* * When the erroneous packet contains a * routing header, we should examine the * header to determine the final destination. * Otherwise, we can't properly update * information that depends on the final * destination (e.g. path MTU). */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); rth = (struct ip6_rthdr *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif rthlen = (rth->ip6r_len + 1) << 3; /* * XXX: currently there is no * officially defined type other * than type-0. * Note that if the segment left field * is 0, all intermediate hops must * have been passed. */ if (rth->ip6r_segleft && rth->ip6r_type == IPV6_RTHDR_TYPE_0) { int hops; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); rth0 = (struct ip6_rthdr0 *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, struct ip6_rthdr0 *, m, eoff, rthlen); if (rth0 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* just ignore a bogus header */ if ((rth0->ip6r0_len % 2) == 0 && (hops = rth0->ip6r0_len/2)) finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); } eoff += rthlen; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. * XXX: there is no guarantee that the source or destination * addresses of the inner packet are in the same scope as * the addresses of the icmp packet. But there is no other * way to determine the zone. */ eip6 = (struct ip6_hdr *)(icmp6 + 1); bzero(&icmp6dst, sizeof(icmp6dst)); icmp6dst.sin6_len = sizeof(struct sockaddr_in6); icmp6dst.sin6_family = AF_INET6; if (finaldst == NULL) icmp6dst.sin6_addr = eip6->ip6_dst; else icmp6dst.sin6_addr = *finaldst; if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; bzero(&icmp6src, sizeof(icmp6src)); icmp6src.sin6_len = sizeof(struct sockaddr_in6); icmp6src.sin6_family = AF_INET6; icmp6src.sin6_addr = eip6->ip6_src; if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } ctlfunc = (void (*)(int, struct sockaddr *, void *)) (inet6sw[ip6_protox[nxt]].pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, &ip6cp); } } *mp = m; return (0); freeit: m_freem(m); return (-1); } void icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) { struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct in_conninfo inc; #if 0 /* * RFC2460 section 5, last paragraph. * even though minimum link MTU for IPv6 is IPV6_MMTU, * we may see ICMPv6 too big with mtu < IPV6_MMTU * due to packet translator in the middle. * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for * special handling. */ if (mtu < IPV6_MMTU) return; #endif /* * we reject ICMPv6 too big with abnormally small value. * XXX what is the good definition of "abnormally small"? */ if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) return; if (!validated) return; /* * In case the suggested mtu is less than IPV6_MMTU, we * only need to remember that it was for above mentioned * "alwaysfrag" case. * Try to be as close to the spec as possible. */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU - 8; bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) return; if (mtu < tcp_maxmtu6(&inc, NULL)) { tcp_hc_updatemtu(&inc, mtu); ICMP6STAT_INC(icp6s_pmtuchg); } } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ static struct mbuf * ni6_input(struct mbuf *m, int off) { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; struct prison *pr; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct in6_addr in6_subj; /* subject address */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct in6_ifaddr *ia6 = NULL; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return (NULL); } #endif /* * Validate IPv6 source address. * The default configuration MUST be to refuse answering queries from * global-scope addresses according to RFC4602. * Notes: * - it's not very clear what "refuse" means; this implementation * simply drops it. * - it's not very easy to identify global-scope (unicast) addresses * since there are many prefixes for them. It should be safer * and in practice sufficient to check "all" but loopback and * link-local (note that site-local unicast was deprecated and * ULA is defined as global scope-wise) */ if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 && !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) goto bad; /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [RFC4602, Section 5.] */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 == NULL) goto bad; /* XXX impossible */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { ifa_free(&ia6->ia_ifa); nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } ifa_free(&ia6->ia_ifa); } /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(struct in6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (caddr_t)&in6_subj); if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL)) goto bad; subj = (char *)&in6_subj; if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, * if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), 0); mtx_unlock(&pr->pr_mtx); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0) goto bad; break; case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; case NI_QTYPE_IPV4ADDR: /* unsupported - should respond with unknown Qtype? */ break; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* Allocate an mbuf to reply. */ if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } if (replylen > MHLEN) n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR); else n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(m); return (NULL); } m_move_pkthdr(n, m); /* just for recvif and FIB */ n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); bcopy(&v, nni6 + 1, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in hostname? */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n->m_next = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), oldfqdn); mtx_unlock(&pr->pr_mtx); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); /* XXX: reset mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: break; /* XXX impossible! */ } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return (n); bad: m_freem(m); if (n) m_freem(n); return (NULL); } /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. * * old - return pascal string if non-zero */ static struct mbuf * ni6_nametodns(const char *name, int namelen, int old) { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */ if (len > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) goto fail; if (old) { m->m_len = len; *mtod(m, char *) = namelen; bcopy(name, mtod(m, char *) + 1, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p && *p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; bcopy(p, cp, i); cp += i; p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(const char *a, int alen, const char *b, int blen) { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && bcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (bcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, struct in6_addr *subj) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return (0); break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return (0); } } IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { addrsofif = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } addrsofif++; /* count the address */ } IF_ADDR_RUNLOCK(ifp); if (iffound) { *ifpp = ifp; IFNET_RUNLOCK_NOSLEEP(); return (addrsofif); } addrs += addrsofif; } IFNET_RUNLOCK_NOSLEEP(); return (addrs); } static int ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, struct ifnet *ifp0, int resid) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return (0); /* needless to copy */ IFNET_RLOCK_NOSLEEP(); ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); again: for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { IF_ADDR_RUNLOCK(ifp); /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. */ if (ifa6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > time_uptime) ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime); else ltime = 0; } bcopy(<ime, cp, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ifa6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); in6_clearscope((struct in6_addr *)cp); /* XXX */ cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } IF_ADDR_RUNLOCK(ifp); if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(struct mbuf **mp, int off) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct inpcb *in6p; struct inpcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return (IPPROTO_DONE); } #endif /* * XXX: the address may have embedded scope zone ID, which should be * hidden from applications. */ bzero(&fromsa, sizeof(fromsa)); fromsa.sin6_family = AF_INET6; fromsa.sin6_len = sizeof(struct sockaddr_in6); fromsa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&fromsa)) { m_freem(m); return (IPPROTO_DONE); } INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p != IPPROTO_ICMPV6) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; INP_RLOCK(in6p); if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) { INP_RUNLOCK(in6p); continue; } if (last != NULL) { struct mbuf *n = NULL; /* * Recent network drivers tend to allocate a single * mbuf cluster, rather than to make a couple of * mbufs without clusters. Also, since the IPv6 code * path tries to avoid m_pullup(), it is highly * probable that we still have an mbuf cluster here * even though the necessary length can be stored in an * mbuf's internal buffer. * Meanwhile, the default size of the receive socket * buffer for raw sockets is not so large. This means * the possibility of packet loss is relatively higher * than before. To avoid this scenario, we copy the * received data to a separate mbuf that does not use * a cluster, if possible. * XXX: it is better to copy the data after stripping * intermediate headers. */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; } else { m_free(n); n = NULL; } } } if (n != NULL || (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, off); SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked( &last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) { m_freem(opts); } SOCKBUF_UNLOCK( &last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, m, &opts); /* strip intermediate headers */ m_adj(m, off); /* avoid using mbuf clusters if possible (see above) */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { struct mbuf *n; n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; m_freem(m); m = n; } else { m_freem(n); n = NULL; } } } SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); INP_RUNLOCK(last); } else { m_freem(m); IP6STAT_DEC(ip6s_delivered); } return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ void icmp6_reflect(struct mbuf *m, size_t off) { struct in6_addr src6, *srcp; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia = NULL; struct ifnet *outif = NULL; int plen; int type, code, hlim; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ #ifdef DIAGNOSTIC if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) panic("assumption failed in icmp6_reflect"); #endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ hlim = 0; srcp = NULL; /* * If the incoming packet was addressed directly to us (i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia != NULL && !(ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { src6 = ia->ia_addr.sin6_addr; srcp = &src6; if (m->m_pkthdr.rcvif != NULL) { /* XXX: This may not be the outgoing interface */ hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; } else hlim = V_ip6_defhlim; } if (ia != NULL) ifa_free(&ia->ia_ifa); } if (srcp == NULL) { int error; struct in6_addr dst6; uint32_t scopeid; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, scopeid, NULL, &src6, &hlim); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", ip6_sprintf(ip6buf, &ip6->ip6_dst), error)); goto bad; } srcp = &src6; } /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; ip6->ip6_src = *srcp; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = hlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); return; bad: m_freem(m); return; } void icmp6_fasttimo(void) { mld_fasttimo(); } void icmp6_slowtimo(void) { mld_slowtimo(); } static const char * icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6, struct in6_addr *tgt6) { static char buf[1024]; char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; char ip6buft[INET6_ADDRSTRLEN]; snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6), ip6_sprintf(ip6buft, tgt6)); return buf; } void icmp6_redirect_input(struct mbuf *m, int off) { struct ifnet *ifp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; char ip6buf[INET6_ADDRSTRLEN]; M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__)); ifp = m->m_pkthdr.rcvif; /* XXX if we are router, we don't update route by icmp6 redirect */ if (V_ip6_forwarding) goto freeit; if (!V_icmp6_rediraccept) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) || in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) { goto freeit; } /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", ip6_sprintf(ip6buf, &src6))); goto bad; } if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim)); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct nhop6_basic nh6; struct in6_addr kdst; uint32_t scopeid; in6_splitscope(&reddst6, &kdst, &scopeid); if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){ if ((nh6.nh_flags & NHF_GATEWAY) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(ip6buf, &nh6.nh_addr), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } else { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n", __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", __func__, ip6_sprintf(ip6buf, &redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* Validation passed. */ /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); /* * Install a gateway route in the better-router case or an interface * route in the on-link-destination case. */ { struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; struct sockaddr *gw; int rt_flags; u_int fibnum; bzero(&sdst, sizeof(sdst)); bzero(&ssrc, sizeof(ssrc)); sdst.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rt_flags = RTF_HOST; if (is_router) { bzero(&sgw, sizeof(sgw)); sgw.sin6_family = AF_INET6; sgw.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); gw = (struct sockaddr *)&sgw; rt_flags |= RTF_GATEWAY; } else gw = ifp->if_addr->ifa_addr; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) in6_rtredirect((struct sockaddr *)&sdst, gw, (struct sockaddr *)NULL, rt_flags, (struct sockaddr *)&ssrc, fibnum); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; bzero(&sdst, sizeof(sdst)); sdst.sin6_family = AF_INET6; sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); } freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badredirect); m_freem(m); } void icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct m_tag *mtag; struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; struct llentry *ln = NULL; size_t maxlen; u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; icmp6_errcount(ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!V_ip6_forwarding) goto fail; /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = sip6->ip6_src; if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto fail; M_SETFIB(m, rt->rt_fibnum); maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; ifp_ll6 = &ia->ia_addr.sin6_addr; /* XXXRW: reference released prematurely. */ ifa_free(&ia->ia_ifa); } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; } else router_ll6 = (struct in6_addr *)NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!router_ll6) goto fail; bcopy(router_ll6, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); if (!router_ll6) goto nolladdropt; { /* target lladdr option */ int len; struct nd_opt_hdr *nd_opt; char *lladdr; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(router_ll6, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; if (ln->la_flags & LLE_VALID) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); bcopy(ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } } nolladdropt: if (ln != NULL) LLE_RUNLOCK(ln); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ #ifdef M_DECRYPTED /*not openbsd*/ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; #endif if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; /* This is just for simplicity. */ if (m0->m_pkthdr.len != m0->m_len) { if (m0->m_next) { m_freem(m0->m_next); m0->m_next = NULL; } m0->m_pkthdr.len = m0->m_len; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, * the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by * original ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (m0->m_next || m0->m_pkthdr.len != m0->m_len) panic("assumption failed in %s:%d", __FILE__, __LINE__); if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } else { /* enough room, pad or truncate */ size_t extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* pad if easy enough, truncate if not */ if (8 - extra <= M_TRAILINGSPACE(m0)) { /* pad */ m0->m_len += (8 - extra); m0->m_pkthdr.len += (8 - extra); } else { /* truncate */ m0->m_pkthdr.len -= extra; m0->m_len -= extra; } } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; bzero(nd_opt_rh, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m_tag_delete_chain(m0, NULL); m0->m_flags &= ~M_PKTHDR; m->m_next = m0; m->m_pkthdr.len = m->m_len + m0->m_len; m0 = NULL; } noredhdropt:; if (m0) { m_freem(m0); m0 = NULL; } /* XXX: clear embedded link IDs in the inner header */ in6_clearscope(&sip6->ip6_src); in6_clearscope(&sip6->ip6_dst); in6_clearscope(&nd_rd->nd_rd_target); in6_clearscope(&nd_rd->nd_rd_dst); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto fail; *(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type; m_tag_prepend(m, mtag); } /* send the packet to outside... */ ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]); return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0; int optlen; struct inpcb *inp = sotoinpcb(so); int level, op, optname; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; } else level = op = optname = optlen = 0; if (level != IPPROTO_ICMPV6) { return EINVAL; } switch (op) { case PRCO_SETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; if (optlen != sizeof(ic6f)) { error = EMSGSIZE; break; } error = sooptcopyin(sopt, &ic6f, optlen, optlen); if (error == 0) { INP_WLOCK(inp); *inp->in6p_icmp6filt = ic6f; INP_WUNLOCK(inp); } break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; INP_RLOCK(inp); ic6f = *inp->in6p_icmp6filt; INP_RUNLOCK(inp); error = sooptcopyout(sopt, &ic6f, sizeof(ic6f)); break; } default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? * * dst - not used at this moment * type - not used at this moment * code - not used at this moment */ static int icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code) { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count, V_icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } Index: user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c (revision 303642) @@ -1,3072 +1,3077 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #include #include #endif /* IPSEC */ #ifdef SCTP #include #include #endif #include #include #ifdef FLOWTABLE #include #endif extern int in6_mcast_loop; struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, struct ucred *, int); static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *); static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, struct ucred *, int, int, int); static int ip6_copyexthdr(struct mbuf **, caddr_t, int); static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static int ip6_getpmtu(struct route_in6 *, int, - struct ifnet *, const struct in6_addr *, u_long *, int *, u_int); + struct ifnet *, const struct in6_addr *, u_long *, int *, u_int, + u_int); static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, - u_long *, int *); + u_long *, int *, u_int); static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); /* * Make an extension header from option data. hp is the source, and * mp is the destination. */ #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } while (/*CONSTCOND*/ 0) /* * Form a chain of extension headers. * m is the extension header mbuf * mp is the previous mbuf in the chain * p is the next header * i is the type of option. */ #define MAKE_CHAIN(m, mp, p, i)\ do {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } while (/*CONSTCOND*/ 0) void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) { u_short csum; csum = in_cksum_skip(m, offset + plen, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ if (offset + sizeof(u_short) > m->m_len) { printf("%s: delayed m_pullup, m->len: %d plen %u off %u " "csum_flags=%b\n", __func__, m->m_len, plen, offset, (int)m->m_pkthdr.csum_flags, CSUM_BITS); /* * XXX this should not happen, but if it does, the correct * behavior may be to insert the checksum in the appropriate * next mbuf in the chain. */ return; } *(u_short *)(m->m_data + offset) = csum; } int ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, int mtu, uint32_t id) { struct mbuf *m, **mnext, *m_frgpart; struct ip6_hdr *ip6, *mhip6; struct ip6_frag *ip6f; int off; int error; int tlen = m0->m_pkthdr.len; m = m0; ip6 = mtod(m, struct ip6_hdr *); mnext = &m->m_nextpkt; for (off = hlen; off < tlen; off += mtu) { m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { IP6STAT_INC(ip6s_odropped); return (error); } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + mtu >= tlen) mtu = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(mtu + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copy(m0, off, mtu)) == NULL) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m_cat(m, m_frgpart); m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f); m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; m->m_pkthdr.rcvif = NULL; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; IP6STAT_INC(ip6s_ofragments); in6_ifstat_inc(ifp, ifs6_out_fragcreat); } return (0); } /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route_in6 ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_mtu. * * ifpp - XXX: just for statistics */ /* * XXX TODO: no flowid is assigned for outbound flows? */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct inpcb *inp) { struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; int hlen, tlen, len; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; struct in6_addr odst; int error = 0; struct in6_ifaddr *ia = NULL; u_long mtu; int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr src0, dst0; u_int32_t zone; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int sw_csum, tso; int needfiblookup; uint32_t fibnum; struct m_tag *fwd_tag = NULL; uint32_t id; if (inp != NULL) { M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { /* unconditionally set flowid */ m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ if (opt->ip6po_rthdr) { /* * Destination options header(1st part) * This only makes sense with a routing header. * See Section 9.2 of RFC 3542. * Disabling this part just for MIP6 convenience is * a bad idea. We need to think carefully about a * way to make the advanced API coexist with MIP6 * options, which might automatically be inserted in * the kernel. */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); } /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } #ifdef IPSEC /* * IPSec checking which handles several cases. * FAST IPSEC: We re-injected the packet. * XXX: need scope argument. */ switch(ip6_ipsec_output(&m, inp, &error)) { case 1: /* Bad packet */ goto freehdrs; case -1: /* IPSec done */ goto done; case 0: /* No IPSec */ default: break; } #endif /* IPSEC */ /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If there is at least one extension header, * separate IP6 header from the payload. */ if (optlen && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ u_char *nexthdrp = &ip6->ip6_nxt; mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. the goal here is to make mprev point the * mbuf prior to dest2. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); /* * If there is a routing header, discard the packet. */ if (exthdrs.ip6e_rthdr) { error = EINVAL; goto bad; } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_UNSPECSRC) == 0) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } IP6STAT_INC(ip6s_localout); /* * Route packet. */ if (ro == NULL) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } else ro->ro_flags |= RT_LLE_CACHE; ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET6, m, (struct route *)ro); #endif fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); again: /* * if specified, try to fill in the traffic class field. * do not override if a non-zero value is already set. * we check the diffserv field and the ecn field separately. */ if (opt && opt->ip6po_tclass >= 0) { int mask = 0; if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) mask |= 0xfc; if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) mask |= 0x03; if (mask != 0) ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); } /* fill in or override the hop limit field, if necessary. */ if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (im6o != NULL) ip6->ip6_hlim = im6o->im6o_multicast_hlim; else ip6->ip6_hlim = V_ip6_defmcasthlim; } /* * Validate route against routing table additions; * a better/more specific route might have been added. * Make sure address family is set in route. */ if (inp) { ro->ro_dst.sin6_family = AF_INET6; RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum); } if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) && ro->ro_dst.sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; } else { if (fwd_tag == NULL) { bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; } error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp, &rt, fibnum); if (error != 0) { if (ifp != NULL) in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } } if (rt == NULL) { /* * If in6_selectroute() does not return a route entry, * dst may not have been updated. */ *dst = dst_sa; /* XXX */ } /* * then rt (for unicast) and ifp must be non-NULL valid values. */ if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); } if (rt != NULL) { ia = (struct in6_ifaddr *)(rt->rt_ifa); counter_u64_add(rt->rt_pksent, 1); } /* * The outgoing interface must be in the zone of source and * destination addresses. */ origifp = ifp; src0 = ip6->ip6_src; if (in6_setscope(&src0, origifp, &zone)) goto badscope; bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id) goto badscope; dst0 = ip6->ip6_dst; if (in6_setscope(&dst0, origifp, &zone)) goto badscope; /* re-initialize to be sure */ bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) { goto badscope; } /* We should use ia_ifp to support the case of * sending packets to an address of our own. */ if (ia != NULL && ia->ia_ifp) ifp = ia->ia_ifp; /* scope check is done. */ goto routefound; badscope: IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(origifp, ifs6_out_discard); if (error == 0) error = EHOSTUNREACH; /* XXX */ goto bad; routefound: if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (opt && opt->ip6po_nextroute.ro_rt) { /* * The nexthop is explicitly specified by the * application. We assume the next hop is an IPv6 * address. */ dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; } else if ((rt->rt_flags & RTF_GATEWAY)) dst = (struct sockaddr_in6 *)rt->rt_gateway; } if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ } else { m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if (!(ifp->if_flags & IFF_MULTICAST)) { IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } if ((im6o == NULL && in6_mcast_loop) || (im6o && im6o->im6o_multicast_loop)) { /* * Loop back multicast datagram if not expressly * forbidden to do so, even if we have not joined * the address; protocols will filter it later, * thus deferring a hash lookup and lock acquisition * at the expense of an m_copym(). */ ip6_mloopback(ifp, m); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IPV6_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip6_mloopback(), * above, will be forwarded by the ip6_input() routine, * if necessary. */ if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { /* * XXX: ip6_mforward expects that rcvif is NULL * when it is called from the originating path. * However, it may not always be the case. */ m->m_pkthdr.rcvif = NULL; if (ip6_mforward(ip6, ifp, m) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* Determine path MTU. */ if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, - &mtu, &alwaysfrag, fibnum)) != 0) + &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0) goto bad; /* * The caller of this function may specify to use the minimum MTU * in some cases. * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU * setting. The logic is a bit complicated; by default, unicast * packets will follow path MTU while multicast packets will be sent at * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets * including unicast ones will be sent at the minimum MTU. Multicast * packets will always be sent at the minimum MTU unless * IP6PO_MINMTU_DISABLE is explicitly specified. * See RFC 3542 for more details. */ if (mtu > IPV6_MMTU) { if ((flags & IPV6_MINMTU)) mtu = IPV6_MMTU; else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) mtu = IPV6_MMTU; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && (opt == NULL || opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { mtu = IPV6_MMTU; } } /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy; /* XXX unused */ u_int32_t plen = 0; /* XXX: ip6_process will check the value */ #ifdef DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not contiguous"); #endif /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * contiguous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy, &plen) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ if (in6_localip(&ip6->ip6_dst)) { m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } else { RO_RTFREE(ro); needfiblookup = 1; /* Redo the routing table lookup. */ } } /* See if fib was changed by packet filter. */ if (fibnum != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; fibnum = M_GETFIB(m); RO_RTFREE(ro); needfiblookup = 1; } if (needfiblookup) goto again; /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { dst = (struct sockaddr_in6 *)&ro->ro_dst; bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); goto again; } passout: /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. * * the logic here is rather complex: * 1: normal case (dontfrag == 0, alwaysfrag == 0) * 1-a: send as is if tlen <= path mtu * 1-b: fragment if tlen > path mtu * * 2: if user asks us not to fragment (dontfrag == 1) * 2-a: send as is if tlen <= interface mtu * 2-b: error if tlen > interface mtu * * 3: if we always need to attach fragment header (alwaysfrag == 1) * always fragment * * 4: if dontfrag == 1 && alwaysfrag == 1 * error, as we cannot handle this conflicting request */ sw_csum = m->m_pkthdr.csum_flags; if (!hdrsplit) { tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0; sw_csum &= ~ifp->if_hwassist; } else tso = 0; /* * If we added extension headers, we will not do TSO and calculate the * checksums ourselves for now. * XXX-BZ Need a framework to know when the NIC can handle it, even * with ext. hdrs. */ if (sw_csum & CSUM_DELAY_DATA_IPV6) { sw_csum &= ~CSUM_DELAY_DATA_IPV6; in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); } #ifdef SCTP if (sw_csum & CSUM_SCTP_IPV6) { sw_csum &= ~CSUM_SCTP_IPV6; sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); } #endif m->m_pkthdr.csum_flags &= ifp->if_hwassist; tlen = m->m_pkthdr.len; if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso) dontfrag = 1; else dontfrag = 0; if (dontfrag && alwaysfrag) { /* case 4 */ /* conflicting request - can't transmit */ error = EMSGSIZE; goto bad; } if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* case 2-b */ /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the * outgoing interface. * Notify the error by sending IPV6_PATHMTU ancillary data if * application wanted to know the MTU value. Also return an * error code (this is not described in the API spec). */ if (inp != NULL) ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu); error = EMSGSIZE; goto bad; } /* * transmit packet without fragmentation */ if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ counter_u64_add(ia6->ia_ifa.ifa_opackets, 1); counter_u64_add(ia6->ia_ifa.ifa_obytes, m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); goto done; } /* * try to fragment the packet. case 1-b and 3 */ if (mtu < IPV6_MMTU) { /* path MTU cannot be less than IPV6_MMTU */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } /* * If the interface will not calculate checksums on * fragmented packets, then do it here. * XXX-BZ handle the hw offloading case. Need flags. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { in6_delayed_cksum(m, plen, hlen); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { sctp_delayed_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } #endif /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ m0 = m; id = htonl(ip6_randomid()); if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id))) goto sendorfree; in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); } else m_freem(m); } if (error == 0) IP6STAT_INC(ip6s_fragmented); done: /* * Release the route if using our private route, or if * (with flowtable) we don't have our own reference. */ if (ro == &ip6route || ro->ro_flags & RT_NORTREF) RO_RTFREE(ro); return (error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* FALLTHROUGH */ bad: if (m) m_freem(m); goto done; } static int ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) { struct mbuf *m; if (hlen > MCLBYTES) return (ENOBUFS); /* XXX */ if (hlen > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return (0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) { struct mbuf *mopt; u_char *optbuf; u_int32_t v; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == NULL) { mopt = m_get(M_NOWAIT, MT_DATA); if (mopt == NULL) return (ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { /* * XXX assumption: * - exthdrs->ip6e_hbh is not referenced from places * other than exthdrs. * - exthdrs->ip6e_hbh is not an mbuf chain. */ int oldoptlen = mopt->m_len; struct mbuf *n; /* * XXX: give up if the whole (new) hbh header does * not fit even in an mbuf cluster. */ if (oldoptlen + JUMBOOPTLEN > MCLBYTES) return (ENOBUFS); /* * As a consequence, we must always prepare a cluster * at this point. */ n = m_getcl(M_NOWAIT, MT_DATA, 0); if (n == NULL) return (ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen); optbuf = mtod(n, caddr_t) + oldoptlen; m_freem(mopt); mopt = exthdrs->ip6e_hbh = n; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); bcopy(&v, &optbuf[4], sizeof(u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return (0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, struct ip6_frag **frghdrp) { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_NOWAIT); if (n == NULL) return (ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if (M_WRITABLE(mlast) && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; mfrg = m_get(M_NOWAIT, MT_DATA); if (mfrg == NULL) return (ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return (0); } /* * Calculates IPv6 path mtu for destination @dst. * Resulting MTU is stored in @mtup. * * Returns 0 on success. */ static int ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup) { struct nhop6_extended nh6; struct in6_addr kdst; uint32_t scopeid; struct ifnet *ifp; u_long mtu; int error; in6_splitscope(dst, &kdst, &scopeid); if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0) return (EHOSTUNREACH); ifp = nh6.nh_ifp; mtu = nh6.nh_mtu; - error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL); + error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0); fib6_free_nh_ext(fibnum, &nh6); return (error); } /* * Calculates IPv6 path MTU for @dst based on transmit @ifp, * and cached data in @ro_pmtu. * MTU from (successful) route lookup is saved (along with dst) * inside @ro_pmtu to avoid subsequent route lookups after packet * filter processing. * * Stores mtu and always-frag value into @mtup and @alwaysfragp. * Returns 0 on success. */ static int ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup, - int *alwaysfragp, u_int fibnum) + int *alwaysfragp, u_int fibnum, u_int proto) { struct nhop6_basic nh6; struct in6_addr kdst; uint32_t scopeid; struct sockaddr_in6 *sa6_dst; u_long mtu; mtu = 0; if (do_lookup) { /* * Here ro_pmtu has final destination address, while * ro might represent immediate destination. * Use ro_pmtu destination since mtu might differ. */ sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) ro_pmtu->ro_mtu = 0; if (ro_pmtu->ro_mtu == 0) { bzero(sa6_dst, sizeof(*sa6_dst)); sa6_dst->sin6_family = AF_INET6; sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; in6_splitscope(dst, &kdst, &scopeid); if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0, &nh6) == 0) ro_pmtu->ro_mtu = nh6.nh_mtu; } mtu = ro_pmtu->ro_mtu; } if (ro_pmtu->ro_rt) mtu = ro_pmtu->ro_rt->rt_mtu; - return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp)); + return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto)); } /* * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and * hostcache data for @dst. * Stores mtu and always-frag value into @mtup and @alwaysfragp. * * Returns 0 on success. */ static int ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu, - u_long *mtup, int *alwaysfragp) + u_long *mtup, int *alwaysfragp, u_int proto) { u_long mtu = 0; int alwaysfrag = 0; int error = 0; if (rt_mtu > 0) { u_int32_t ifmtu; struct in_conninfo inc; bzero(&inc, sizeof(inc)); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; ifmtu = IN6_LINKMTU(ifp); - mtu = tcp_hc_getmtu(&inc); + + /* TCP is known to react to pmtu changes so skip hc */ + if (proto != IPPROTO_TCP) + mtu = tcp_hc_getmtu(&inc); + if (mtu) mtu = min(mtu, rt_mtu); else mtu = rt_mtu; if (mtu == 0) mtu = ifmtu; else if (mtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: * if we record ICMPv6 too big message with * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU * or smaller, with framgent header attached. * (fragment header is needed regardless from the * packet size, for translators to identify packets) */ alwaysfrag = 1; mtu = IPV6_MMTU; } } else if (ifp) { mtu = IN6_LINKMTU(ifp); } else error = EHOSTUNREACH; /* XXX */ *mtup = mtu; if (alwaysfragp) *alwaysfragp = alwaysfrag; return (error); } /* * IP6 socket option processing. */ int ip6_ctloutput(struct socket *so, struct sockopt *sopt) { int optdatalen, uproto; void *optdata; struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct thread *td; #ifdef RSS uint32_t rss_bucket; int retval; #endif level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; td = sopt->sopt_td; error = 0; optval = 0; uproto = (int)so->so_proto->pr_protocol; if (level != IPPROTO_IPV6) { error = EINVAL; if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEADDR) != 0) in6p->inp_flags2 |= INP_REUSEADDR; else in6p->inp_flags2 &= ~INP_REUSEADDR; INP_WUNLOCK(in6p); error = 0; break; case SO_REUSEPORT: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEPORT) != 0) in6p->inp_flags2 |= INP_REUSEPORT; else in6p->inp_flags2 &= ~INP_REUSEPORT; INP_WUNLOCK(in6p); error = 0; break; case SO_SETFIB: INP_WLOCK(in6p); in6p->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(in6p); error = 0; break; default: break; } } } else { /* level == IPPROTO_IPV6 */ switch (op) { case SOPT_SET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif { struct mbuf *m; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; } /* * Use of some Hop-by-Hop options or some * Destination options, might require special * privilege. That is, normal applications * (without special privilege) might be forbidden * from setting certain options in outgoing packets, * and might never see certain options in received * packets. [RFC 2292 Section 6] * KAME specific note: * KAME prevents non-privileged users from sending or * receiving ANY hbh/dst options in order to avoid * overhead of parsing options in the kernel. */ case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) break; } /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_RECVTCLASS: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RECVRSSBUCKETID: #endif case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_BINDMULTI: #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: #endif if (optname == IPV6_BINDANY && td != NULL) { error = priv_check(td, PRIV_NETINET_BINDANY); if (error) break; } if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->inp_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ INP_WLOCK(in6p); \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTSET2292(bit) \ do { \ INP_WLOCK(in6p); \ in6p->inp_flags |= IN6P_RFC2292; \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) #define OPTSET2(bit, val) do { \ INP_WLOCK(in6p); \ if (val) \ in6p->inp_flags2 |= bit; \ else \ in6p->inp_flags2 &= ~bit; \ INP_WUNLOCK(in6p); \ } while (0) #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) case IPV6_RECVPKTINFO: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: { struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_RECVHOPLIMIT: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPLIMIT); break; case IPV6_RECVHOPOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDRDSTOPTS); break; case IPV6_RECVRTHDR: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: /* * We ignore this option for TCP * sockets. * (RFC3542 leaves this case * unspecified.) */ if (uproto != IPPROTO_TCP) OPTSET(IN6P_MTU); break; case IPV6_RECVFLOWID: OPTSET2(INP_RECVFLOWID, optval); break; #ifdef RSS case IPV6_RECVRSSBUCKETID: OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ if (in6p->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) in6p->inp_vflag &= ~INP_IPV4; else in6p->inp_vflag |= INP_IPV4; break; case IPV6_RECVTCLASS: /* cannot mix with RFC2292 XXX */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: OPTSET(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: OPTSET(INP_BINDANY); break; case IPV6_BINDMULTI: OPTSET2(INP_BINDMULTI, optval); break; #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { in6p->inp_rss_listen_bucket = optval; OPTSET2(INP_RSS_BUCKET_SET, 1); } else { error = EINVAL; } break; #endif } break; case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: if (optlen != sizeof(optval)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; { struct ip6_pktopts **optp; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: /* RFC 2292 */ if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_2292PKTINFO: OPTSET2292(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: OPTSET2292(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: /* * Check super-user privilege. * See comments for IPV6_RECVHOPOPTS. */ if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_HOPOPTS); break; case IPV6_2292DSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_2292RTHDR: OPTSET2292(IN6P_RTHDR); break; } break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: { /* new advanced API (RFC3542) */ u_char *optbuf; u_char optbuf_storage[MCLBYTES]; int optlen; struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } /* * We only ensure valsize is not too large * here. Further validation will be done * later. */ error = sooptcopyin(sopt, optbuf_storage, sizeof(optbuf_storage), 0); if (error) break; optlen = sopt->sopt_valsize; optbuf = optbuf_storage; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: case IPV6_MSFILTER: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: error = ip6_setmoptions(in6p, sopt); break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; INP_WLOCK(in6p); switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags &= ~(INP_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags |= INP_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->inp_flags &= ~(INP_HIGHPORT); in6p->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } INP_WUNLOCK(in6p); break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req; struct mbuf *m; if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; req = mtod(m, caddr_t); error = ipsec_set_policy(in6p, optname, req, m->m_len, (sopt->sopt_td != NULL) ? sopt->sopt_td->td_ucred : NULL); m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif /* * RFC3542 (effectively) deprecated the * semantics of the 2292-style pktoptions. * Since it was not reliable in nature (i.e., * applications had to expect the lack of some * information after all), it would make sense * to simplify this part by always returning * empty data. */ sopt->sopt_valsize = 0; break; case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: case IPV6_UNICAST_HOPS: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_FLOWID: case IPV6_FLOWTYPE: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RSSBUCKETID: case IPV6_RECVRSSBUCKETID: #endif case IPV6_BINDMULTI: switch (optname) { case IPV6_RECVHOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: optval = OPTBIT(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: optval = OPTBIT(IN6P_RTHDRDSTOPTS); break; case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; case IPV6_RECVPKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_RECVHOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_RECVRTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: optval = OPTBIT(IN6P_MTU); break; case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->inp_flags; if (flags & INP_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & INP_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } case IPV6_RECVTCLASS: optval = OPTBIT(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: optval = OPTBIT(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: optval = OPTBIT(INP_BINDANY); break; case IPV6_FLOWID: optval = in6p->inp_flowid; break; case IPV6_FLOWTYPE: optval = in6p->inp_flowtype; break; case IPV6_RECVFLOWID: optval = OPTBIT2(INP_RECVFLOWID); break; #ifdef RSS case IPV6_RSSBUCKETID: retval = rss_hash2bucket(in6p->inp_flowid, in6p->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; else error = EINVAL; break; case IPV6_RECVRSSBUCKETID: optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif case IPV6_BINDMULTI: optval = OPTBIT2(INP_BINDMULTI); break; } if (error) break; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PATHMTU: { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); /* * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. */ error = ip6_getpmtu_ctl(so->so_fibnum, &in6p->in6p_faddr, &pmtu); if (error) break; if (pmtu > IPV6_MAXPACKET) pmtu = IPV6_MAXPACKET; bzero(&mtuinfo, sizeof(mtuinfo)); mtuinfo.ip6m_mtu = (u_int32_t)pmtu; optdata = (void *)&mtuinfo; optdatalen = sizeof(mtuinfo); error = sooptcopyout(sopt, optdata, optdatalen); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292RTHDR: case IPV6_2292DSTOPTS: switch (optname) { case IPV6_2292PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_2292RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_2292DSTOPTS: optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: error = ip6_getpcbopt(in6p->in6p_outputopts, optname, sopt); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_MSFILTER: error = ip6_getmoptions(in6p, sopt); break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m = NULL; struct mbuf **mp = &m; size_t ovalsize = sopt->sopt_valsize; caddr_t oval = (caddr_t)sopt->sopt_val; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; sopt->sopt_valsize = ovalsize; sopt->sopt_val = oval; if (m) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec_get_policy(in6p, req, len, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0 && m) m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; } } return (error); } int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0, optval, optlen; const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); struct inpcb *in6p = sotoinpcb(so); int level, op, optname; level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; if (level != IPPROTO_IPV6) { return (EINVAL); } switch (optname) { case IPV6_CHECKSUM: /* * For ICMPv6 sockets, no modification allowed for checksum * offset, permit "no change" values to help existing apps. * * RFC3542 says: "An attempt to set IPV6_CHECKSUM * for an ICMPv6 socket will fail." * The current behavior does not meet RFC3542. */ switch (op) { case SOPT_SET: if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; if ((optval % 2) != 0) { /* the API assumes even offset values */ error = EINVAL; } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (optval != icmp6off) error = EINVAL; } else in6p->in6p_cksum = optval; break; case SOPT_GET: if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) optval = icmp6off; else optval = in6p->in6p_cksum; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EINVAL; break; } break; default: error = ENOPROTOOPT; break; } return (error); } /* * Set up IP6 options in pcb for insertion in output packets or * specifying behavior of outgoing packets. */ static int ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so, struct sockopt *sopt) { struct ip6_pktopts *opt = *pktopt; int error = 0; struct thread *td = sopt->sopt_td; /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, -1); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = NULL; if (!m || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ free(opt, M_IP6OPT); return (0); } /* set options specified by user. */ if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ? td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) { ip6_clearpktopts(opt, -1); /* XXX: discard all options */ free(opt, M_IP6OPT); return (error); } *pktopt = opt; return (0); } /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void ip6_initpktopts(struct ip6_pktopts *opt) { bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ opt->ip6po_tclass = -1; /* -1 means default traffic class */ opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM; } static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, struct ucred *cred, int uproto) { struct ip6_pktopts *opt; if (*pktopt == NULL) { *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, M_WAITOK); ip6_initpktopts(*pktopt); } opt = *pktopt; return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto)); } static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) { void *optdata = NULL; int optdatalen = 0; struct ip6_ext *ip6e; int error = 0; struct in6_pktinfo null_pktinfo; int deftclass = 0, on; int defminmtu = IP6PO_MINMTU_MCASTONLY; int defpreftemp = IP6PO_TEMPADDR_SYSTEM; switch (optname) { case IPV6_PKTINFO: optdata = (void *)&null_pktinfo; if (pktopt && pktopt->ip6po_pktinfo) { bcopy(pktopt->ip6po_pktinfo, &null_pktinfo, sizeof(null_pktinfo)); in6_clearscope(&null_pktinfo.ipi6_addr); } else { /* XXX: we don't have to do this every time... */ bzero(&null_pktinfo, sizeof(null_pktinfo)); } optdatalen = sizeof(struct in6_pktinfo); break; case IPV6_TCLASS: if (pktopt && pktopt->ip6po_tclass >= 0) optdata = (void *)&pktopt->ip6po_tclass; else optdata = (void *)&deftclass; optdatalen = sizeof(int); break; case IPV6_HOPOPTS: if (pktopt && pktopt->ip6po_hbh) { optdata = (void *)pktopt->ip6po_hbh; ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDR: if (pktopt && pktopt->ip6po_rthdr) { optdata = (void *)pktopt->ip6po_rthdr; ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDRDSTOPTS: if (pktopt && pktopt->ip6po_dest1) { optdata = (void *)pktopt->ip6po_dest1; ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_DSTOPTS: if (pktopt && pktopt->ip6po_dest2) { optdata = (void *)pktopt->ip6po_dest2; ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_NEXTHOP: if (pktopt && pktopt->ip6po_nexthop) { optdata = (void *)pktopt->ip6po_nexthop; optdatalen = pktopt->ip6po_nexthop->sa_len; } break; case IPV6_USE_MIN_MTU: if (pktopt) optdata = (void *)&pktopt->ip6po_minmtu; else optdata = (void *)&defminmtu; optdatalen = sizeof(int); break; case IPV6_DONTFRAG: if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) on = 1; else on = 0; optdata = (void *)&on; optdatalen = sizeof(on); break; case IPV6_PREFER_TEMPADDR: if (pktopt) optdata = (void *)&pktopt->ip6po_prefer_tempaddr; else optdata = (void *)&defpreftemp; optdatalen = sizeof(int); break; default: /* should not happen */ #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); #endif return (ENOPROTOOPT); } error = sooptcopyout(sopt, optdata, optdatalen); return (error); } void ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) { if (pktopt == NULL) return; if (optname == -1 || optname == IPV6_PKTINFO) { if (pktopt->ip6po_pktinfo) free(pktopt->ip6po_pktinfo, M_IP6OPT); pktopt->ip6po_pktinfo = NULL; } if (optname == -1 || optname == IPV6_HOPLIMIT) pktopt->ip6po_hlim = -1; if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { if (pktopt->ip6po_nextroute.ro_rt) { RTFREE(pktopt->ip6po_nextroute.ro_rt); pktopt->ip6po_nextroute.ro_rt = NULL; } if (pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; } if (optname == -1 || optname == IPV6_HOPOPTS) { if (pktopt->ip6po_hbh) free(pktopt->ip6po_hbh, M_IP6OPT); pktopt->ip6po_hbh = NULL; } if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { if (pktopt->ip6po_dest1) free(pktopt->ip6po_dest1, M_IP6OPT); pktopt->ip6po_dest1 = NULL; } if (optname == -1 || optname == IPV6_RTHDR) { if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; if (pktopt->ip6po_route.ro_rt) { RTFREE(pktopt->ip6po_route.ro_rt); pktopt->ip6po_route.ro_rt = NULL; } } if (optname == -1 || optname == IPV6_DSTOPTS) { if (pktopt->ip6po_dest2) free(pktopt->ip6po_dest2, M_IP6OPT); pktopt->ip6po_dest2 = NULL; } } #define PKTOPT_EXTHDRCPY(type) \ do {\ if (src->type) {\ int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ if (dst->type == NULL && canwait == M_NOWAIT)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ } while (/*CONSTCOND*/ 0) static int copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) { if (dst == NULL || src == NULL) { printf("ip6_clearpktopts: invalid argument\n"); return (EINVAL); } dst->ip6po_hlim = src->ip6po_hlim; dst->ip6po_tclass = src->ip6po_tclass; dst->ip6po_flags = src->ip6po_flags; dst->ip6po_minmtu = src->ip6po_minmtu; dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); PKTOPT_EXTHDRCPY(ip6po_dest2); PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return (0); bad: ip6_clearpktopts(dst, -1); return (ENOBUFS); } #undef PKTOPT_EXTHDRCPY struct ip6_pktopts * ip6_copypktopts(struct ip6_pktopts *src, int canwait) { int error; struct ip6_pktopts *dst; dst = malloc(sizeof(*dst), M_IP6OPT, canwait); if (dst == NULL) return (NULL); ip6_initpktopts(dst); if ((error = copypktopts(dst, src, canwait)) != 0) { free(dst, M_IP6OPT); return (NULL); } return (dst); } void ip6_freepcbopts(struct ip6_pktopts *pktopt) { if (pktopt == NULL) return; ip6_clearpktopts(pktopt, -1); free(pktopt, M_IP6OPT); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto) { struct cmsghdr *cm = NULL; if (control == NULL || opt == NULL) return (EINVAL); ip6_initpktopts(opt); if (stickyopt) { int error; /* * If stickyopt is provided, make a local copy of the options * for this particular packet, then override them by ancillary * objects. * XXX: copypktopts() does not copy the cached route to a next * hop (if any). This is not very good in terms of efficiency, * but we can allow this since this option should be rarely * used. */ if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) return (error); } /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return (EINVAL); for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { int error; if (control->m_len < CMSG_LEN(0)) return (EINVAL); cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return (EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto); if (error) return (error); } return (0); } /* * Set a particular packet option, as a sticky option or an ancillary data * item. "len" can be 0 only when it's a sticky option. * We have 4 cases of combination of "sticky" and "cmsg": * "sticky=0, cmsg=0": impossible * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data * "sticky=1, cmsg=0": RFC3542 socket option * "sticky=1, cmsg=1": RFC2292 socket option */ static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, struct ucred *cred, int sticky, int cmsg, int uproto) { int minmtupolicy, preftemp; int error; if (!sticky && !cmsg) { #ifdef DIAGNOSTIC printf("ip6_setpktopt: impossible case\n"); #endif return (EINVAL); } /* * IPV6_2292xxx is for backward compatibility to RFC2292, and should * not be specified in the context of RFC3542. Conversely, * RFC3542 types should not be specified in the context of RFC2292. */ if (!cmsg) { switch (optname) { case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292NEXTHOP: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: case IPV6_2292PKTOPTIONS: return (ENOPROTOOPT); } } if (sticky && cmsg) { switch (optname) { case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_NEXTHOP: case IPV6_HOPOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_USE_MIN_MTU: case IPV6_DONTFRAG: case IPV6_TCLASS: case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */ return (ENOPROTOOPT); } } switch (optname) { case IPV6_2292PKTINFO: case IPV6_PKTINFO: { struct ifnet *ifp = NULL; struct in6_pktinfo *pktinfo; if (len != sizeof(struct in6_pktinfo)) return (EINVAL); pktinfo = (struct in6_pktinfo *)buf; /* * An application can clear any sticky IPV6_PKTINFO option by * doing a "regular" setsockopt with ipi6_addr being * in6addr_any and ipi6_ifindex being zero. * [RFC 3542, Section 6] */ if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && pktinfo->ipi6_ifindex == 0 && IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { ip6_clearpktopts(opt, optname); break; } if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { return (EINVAL); } if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr)) return (EINVAL); /* validate the interface index if specified. */ if (pktinfo->ipi6_ifindex > V_if_index) return (ENXIO); if (pktinfo->ipi6_ifindex) { ifp = ifnet_byindex(pktinfo->ipi6_ifindex); if (ifp == NULL) return (ENXIO); } if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)) return (ENETDOWN); if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { struct in6_ifaddr *ia; in6_setscope(&pktinfo->ipi6_addr, ifp, NULL); ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr); if (ia == NULL) return (EADDRNOTAVAIL); ifa_free(&ia->ia_ifa); } /* * We store the address anyway, and let in6_selectsrc() * validate the specified address. This is because ipi6_addr * may not have enough information about its scope zone, and * we may need additional information (such as outgoing * interface or the scope zone of a destination address) to * disambiguate the scope. * XXX: the delay of the validation may confuse the * application when it is used as a sticky option. */ if (opt->ip6po_pktinfo == NULL) { opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), M_IP6OPT, M_NOWAIT); if (opt->ip6po_pktinfo == NULL) return (ENOBUFS); } bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); break; } case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: { int *hlimp; /* * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT * to simplify the ordering among hoplimit options. */ if (optname == IPV6_HOPLIMIT && sticky) return (ENOPROTOOPT); if (len != sizeof(int)) return (EINVAL); hlimp = (int *)buf; if (*hlimp < -1 || *hlimp > 255) return (EINVAL); opt->ip6po_hlim = *hlimp; break; } case IPV6_TCLASS: { int tclass; if (len != sizeof(int)) return (EINVAL); tclass = *(int *)buf; if (tclass < -1 || tclass > 255) return (EINVAL); opt->ip6po_tclass = tclass; break; } case IPV6_2292NEXTHOP: case IPV6_NEXTHOP: if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { /* just remove the option */ ip6_clearpktopts(opt, IPV6_NEXTHOP); break; } /* check if cmsg_len is large enough for sa_len */ if (len < sizeof(struct sockaddr) || len < *buf) return (EINVAL); switch (((struct sockaddr *)buf)->sa_family) { case AF_INET6: { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; int error; if (sa6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { return (EINVAL); } if ((error = sa6_embedscope(sa6, V_ip6_use_defzone)) != 0) { return (error); } break; } case AF_LINK: /* should eventually be supported */ default: return (EAFNOSUPPORT); } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_NEXTHOP); opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); if (opt->ip6po_nexthop == NULL) return (ENOBUFS); bcopy(buf, opt->ip6po_nexthop, *buf); break; case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; /* * XXX: We don't allow a non-privileged user to set ANY HbH * options, since per-option restriction has too much * overhead. */ if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, IPV6_HOPOPTS); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_hbh)) return (EINVAL); hbh = (struct ip6_hbh *)buf; hbhlen = (hbh->ip6h_len + 1) << 3; if (len != hbhlen) return (EINVAL); /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_HOPOPTS); opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_hbh == NULL) return (ENOBUFS); bcopy(hbh, opt->ip6po_hbh, hbhlen); break; } case IPV6_2292DSTOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: { struct ip6_dest *dest, **newdest = NULL; int destlen; if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */ error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, optname); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_dest)) return (EINVAL); dest = (struct ip6_dest *)buf; destlen = (dest->ip6d_len + 1) << 3; if (len != destlen) return (EINVAL); /* * Determine the position that the destination options header * should be inserted; before or after the routing header. */ switch (optname) { case IPV6_2292DSTOPTS: /* * The old advacned API is ambiguous on this point. * Our approach is to determine the position based * according to the existence of a routing header. * Note, however, that this depends on the order of the * extension headers in the ancillary data; the 1st * part of the destination options header must appear * before the routing header in the ancillary data, * too. * RFC3542 solved the ambiguity by introducing * separate ancillary data or option types. */ if (opt->ip6po_rthdr == NULL) newdest = &opt->ip6po_dest1; else newdest = &opt->ip6po_dest2; break; case IPV6_RTHDRDSTOPTS: newdest = &opt->ip6po_dest1; break; case IPV6_DSTOPTS: newdest = &opt->ip6po_dest2; break; } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, optname); *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); if (*newdest == NULL) return (ENOBUFS); bcopy(dest, *newdest, destlen); break; } case IPV6_2292RTHDR: case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; if (len == 0) { ip6_clearpktopts(opt, IPV6_RTHDR); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_rthdr)) return (EINVAL); rth = (struct ip6_rthdr *)buf; rthlen = (rth->ip6r_len + 1) << 3; if (len != rthlen) return (EINVAL); switch (rth->ip6r_type) { case IPV6_RTHDR_TYPE_0: if (rth->ip6r_len == 0) /* must contain one addr */ return (EINVAL); if (rth->ip6r_len % 2) /* length must be even */ return (EINVAL); if (rth->ip6r_len / 2 != rth->ip6r_segleft) return (EINVAL); break; default: return (EINVAL); /* not supported */ } /* turn off the previous option */ ip6_clearpktopts(opt, IPV6_RTHDR); opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_rthdr == NULL) return (ENOBUFS); bcopy(rth, opt->ip6po_rthdr, rthlen); break; } case IPV6_USE_MIN_MTU: if (len != sizeof(int)) return (EINVAL); minmtupolicy = *(int *)buf; if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && minmtupolicy != IP6PO_MINMTU_DISABLE && minmtupolicy != IP6PO_MINMTU_ALL) { return (EINVAL); } opt->ip6po_minmtu = minmtupolicy; break; case IPV6_DONTFRAG: if (len != sizeof(int)) return (EINVAL); if (uproto == IPPROTO_TCP || *(int *)buf == 0) { /* * we ignore this option for TCP sockets. * (RFC3542 leaves this case unspecified.) */ opt->ip6po_flags &= ~IP6PO_DONTFRAG; } else opt->ip6po_flags |= IP6PO_DONTFRAG; break; case IPV6_PREFER_TEMPADDR: if (len != sizeof(int)) return (EINVAL); preftemp = *(int *)buf; if (preftemp != IP6PO_TEMPADDR_SYSTEM && preftemp != IP6PO_TEMPADDR_NOTPREFER && preftemp != IP6PO_TEMPADDR_PREFER) { return (EINVAL); } opt->ip6po_prefer_tempaddr = preftemp; break; default: return (ENOPROTOOPT); } /* end of switch */ return (0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(struct ifnet *ifp, struct mbuf *m) { struct mbuf *copym; struct ip6_hdr *ip6; copym = m_copy(m, 0, M_COPYALL); if (copym == NULL) return; /* * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ if (!M_WRITABLE(copym) || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } ip6 = mtod(copym, struct ip6_hdr *); /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } if_simloop(ifp, copym, AF_INET6, 0); } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { m_freem(m); return ENOBUFS; } m_move_pkthdr(mh, m); M_ALIGN(mh, sizeof(*ip6)); m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(struct inpcb *in6p) { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); if (in6p->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen } Index: user/alc/PQ_LAUNDRY/sys/ofed/drivers/infiniband/core/cma.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/ofed/drivers/infiniband/core/cma.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/ofed/drivers/infiniband/core/cma.c (revision 303642) @@ -1,3865 +1,3888 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * Copyright (c) 2016 Chelsio Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define LINUXKPI_PARAM_PREFIX ibcore_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 static int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT; module_param_named(cma_response_timeout, cma_response_timeout, int, 0644); MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT (default=20)"); static int def_prec2sl = 3; module_param_named(def_prec2sl, def_prec2sl, int, 0644); MODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7"); static int unify_tcp_port_space = 1; module_param(unify_tcp_port_space, int, 0644); MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port " "space allocation (default=1)"); static int debug_level = 0; #define cma_pr(level, priv, format, arg...) \ printk(level "CMA: %p: %s: " format, ((struct rdma_id_priv *) priv) , __func__, ## arg) #define cma_dbg(priv, format, arg...) \ do { if (debug_level) cma_pr(KERN_DEBUG, priv, format, ## arg); } while (0) #define cma_warn(priv, format, arg...) \ cma_pr(KERN_WARNING, priv, format, ## arg) #define CMA_GID_FMT "%2.2x%2.2x:%2.2x%2.2x" #define CMA_GID_RAW_ARG(gid) ((u8 *)(gid))[12],\ ((u8 *)(gid))[13],\ ((u8 *)(gid))[14],\ ((u8 *)(gid))[15] #define CMA_GID_ARG(gid) CMA_GID_RAW_ARG((gid).raw) #define cma_debug_path(priv, pfx, p) \ cma_dbg(priv, pfx "sgid=" CMA_GID_FMT ",dgid=" \ CMA_GID_FMT "\n", CMA_GID_ARG(p.sgid), \ CMA_GID_ARG(p.dgid)) #define cma_debug_gid(priv, g) \ cma_dbg(priv, "gid=" CMA_GID_FMT "\n", CMA_GID_ARG(g) module_param_named(debug_level, debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "debug level default=0"); static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); static struct ib_client cma_client = { .name = "cma", .add = cma_add_one, .remove = cma_remove_one }; static struct ib_sa_client sa_client; static struct rdma_addr_client addr_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; static struct workqueue_struct *cma_free_wq; static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); static DEFINE_IDR(ipoib_ps); static DEFINE_IDR(ib_ps); struct cma_device { struct list_head list; struct ib_device *device; struct completion comp; atomic_t refcount; struct list_head id_list; }; struct rdma_bind_list { struct idr *ps; struct hlist_head owners; unsigned short port; }; enum { CMA_OPTION_AFONLY, }; /* * Device removal can occur at anytime, so we need extra handling to * serialize notifying the user of device removal with other callbacks. * We do this by disabling removal notification while a callback is in process, * and reporting it after the callback completes. */ struct rdma_id_private { struct rdma_cm_id id; struct rdma_bind_list *bind_list; struct socket *sock; struct hlist_node node; struct list_head list; /* listen_any_list or cma_device.list */ struct list_head listen_list; /* per device listens */ struct cma_device *cma_dev; struct list_head mc_list; int internal_id; enum rdma_cm_state state; spinlock_t lock; spinlock_t cm_lock; struct mutex qp_mutex; struct completion comp; atomic_t refcount; struct mutex handler_mutex; struct work_struct work; /* garbage coll */ int backlog; int timeout_ms; struct ib_sa_query *query; int query_id; union { struct ib_cm_id *ib; struct iw_cm_id *iw; } cm_id; u32 seq_num; u32 qkey; u32 qp_num; pid_t owner; u32 options; u8 srq; u8 tos; u8 reuseaddr; u8 afonly; int qp_timeout; /* cache for mc record params */ struct ib_sa_mcmember_rec rec; int is_valid_rec; }; struct cma_multicast { struct rdma_id_private *id_priv; union { struct ib_sa_multicast *ib; } multicast; struct list_head list; void *context; struct sockaddr_storage addr; struct kref mcref; }; struct cma_work { struct work_struct work; struct rdma_id_private *id; enum rdma_cm_state old_state; enum rdma_cm_state new_state; struct rdma_cm_event event; }; struct cma_ndev_work { struct work_struct work; struct rdma_id_private *id; struct rdma_cm_event event; }; struct iboe_mcast_work { struct work_struct work; struct rdma_id_private *id; struct cma_multicast *mc; }; union cma_ip_addr { struct in6_addr ip6; struct { __be32 pad[3]; __be32 addr; } ip4; }; struct cma_hdr { u8 cma_version; u8 ip_version; /* IP version: 7:4 */ __be16 port; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; }; struct sdp_hh { u8 bsdh[16]; u8 sdp_version; /* Major version: 7:4 */ u8 ip_version; /* IP version: 7:4 */ u8 sdp_specific1[10]; __be16 port; __be16 sdp_specific2; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; }; struct sdp_hah { u8 bsdh[16]; u8 sdp_version; }; #define CMA_VERSION 0x00 #define SDP_MAJ_VERSION 0x2 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { unsigned long flags; int ret; spin_lock_irqsave(&id_priv->lock, flags); ret = (id_priv->state == comp); spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } static int cma_comp_exch(struct rdma_id_private *id_priv, enum rdma_cm_state comp, enum rdma_cm_state exch) { unsigned long flags; int ret; spin_lock_irqsave(&id_priv->lock, flags); if ((ret = (id_priv->state == comp))) id_priv->state = exch; spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, enum rdma_cm_state exch) { unsigned long flags; enum rdma_cm_state old; spin_lock_irqsave(&id_priv->lock, flags); old = id_priv->state; id_priv->state = exch; spin_unlock_irqrestore(&id_priv->lock, flags); return old; } static inline u8 cma_get_ip_ver(struct cma_hdr *hdr) { return hdr->ip_version >> 4; } static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) { hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } static inline u8 sdp_get_majv(u8 sdp_version) { return sdp_version >> 4; } static inline u8 sdp_get_ip_ver(struct sdp_hh *hh) { return hh->ip_version >> 4; } static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) { hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { atomic_inc(&cma_dev->refcount); id_priv->cma_dev = cma_dev; id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); } static inline void cma_deref_dev(struct cma_device *cma_dev) { if (atomic_dec_and_test(&cma_dev->refcount)) complete(&cma_dev->comp); } static inline void release_mc(struct kref *kref) { struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); kfree(mc->multicast.ib); kfree(mc); } static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); list_del(&id_priv->list); cma_deref_dev(id_priv->cma_dev); id_priv->cma_dev = NULL; mutex_unlock(&lock); } static int cma_set_qkey(struct rdma_id_private *id_priv) { struct ib_sa_mcmember_rec rec; int ret = 0; if (id_priv->qkey) return 0; switch (id_priv->id.ps) { case RDMA_PS_UDP: id_priv->qkey = RDMA_UDP_QKEY; break; case RDMA_PS_IPOIB: ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, &rec.mgid, &rec); if (!ret) id_priv->qkey = be32_to_cpu(rec.qkey); break; default: break; } return ret; } static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) { int i; int err; struct ib_port_attr props; union ib_gid tmp; err = ib_query_port(device, port_num, &props); if (err) return 1; for (i = 0; i < props.gid_tbl_len; ++i) { err = ib_query_gid(device, port_num, i, &tmp); if (err) return 1; if (!memcmp(&tmp, gid, sizeof tmp)) return 0; } return -EAGAIN; } int rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type, void **cm_id) { int ret; u8 port; int found_dev = 0, found_cmid = 0; struct rdma_id_private *id_priv; struct rdma_id_private *dev_id_priv; struct cma_device *cma_dev; struct rdma_dev_addr dev_addr; union ib_gid gid; enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; memset(&dev_addr, 0, sizeof(dev_addr)); ret = rdma_translate_ip((struct sockaddr *)local_addr, &dev_addr, NULL); if (ret) goto err; /* find rdma device based on MAC address/gid */ mutex_lock(&lock); memcpy(&gid, dev_addr.src_dev_addr + rdma_addr_gid_offset(&dev_addr), sizeof(gid)); list_for_each_entry(cma_dev, &dev_list, list) for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) if ((rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) && (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IWARP)) { ret = find_gid_port(cma_dev->device, &gid, port); if (!ret) { found_dev = 1; goto out; } else if (ret == 1) { mutex_unlock(&lock); goto err; } } out: mutex_unlock(&lock); if (!found_dev) goto err; /* Traverse through the list of listening cm_id's to find the * desired cm_id based on rdma device & port number. */ list_for_each_entry(id_priv, &listen_any_list, list) list_for_each_entry(dev_id_priv, &id_priv->listen_list, listen_list) if (dev_id_priv->cma_dev == cma_dev) if (dev_id_priv->cm_id.iw->local_addr.sin_port == local_addr->sin_port) { *cm_id = (void *)dev_id_priv->cm_id.iw; found_cmid = 1; } return found_cmid ? 0 : -ENODEV; err: return -ENODEV; } EXPORT_SYMBOL(rdma_find_cmid_laddr); -static int cma_acquire_dev(struct rdma_id_private *id_priv) +static int cma_acquire_dev(struct rdma_id_private *id_priv, + struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid; int ret = -ENODEV; - u8 port; + u8 port, found_port; enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; if (dev_ll != IB_LINK_LAYER_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) return -EINVAL; mutex_lock(&lock); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &iboe_gid); memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); + if (listen_id_priv && + rdma_port_get_link_layer(listen_id_priv->id.device, + listen_id_priv->id.port_num) == dev_ll) { + cma_dev = listen_id_priv->cma_dev; + port = listen_id_priv->id.port_num; + if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, + &found_port, NULL); + else + ret = ib_find_cached_gid(cma_dev->device, &gid, + &found_port, NULL); + + if (!ret && (port == found_port)) { + id_priv->id.port_num = found_port; + goto out; + } + } list_for_each_entry(cma_dev, &dev_list, list) { for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + if (listen_id_priv && + listen_id_priv->cma_dev == cma_dev && + listen_id_priv->id.port_num == port) + continue; if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = find_gid_port(cma_dev->device, &iboe_gid, port); + ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL); else - ret = find_gid_port(cma_dev->device, &gid, port); + ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); - if (!ret) { + if (!ret && (port == found_port)) { id_priv->id.port_num = port; goto out; } else if (ret == 1) - break; - } + break; + } } } out: if (!ret) cma_attach_to_dev(id_priv, cma_dev); mutex_unlock(&lock); return ret; } static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) complete(&id_priv->comp); } static int cma_disable_callback(struct rdma_id_private *id_priv, enum rdma_cm_state state) { mutex_lock(&id_priv->handler_mutex); if (id_priv->state != state) { mutex_unlock(&id_priv->handler_mutex); return -EINVAL; } return 0; } struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) { struct rdma_id_private *id_priv; id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); if (!id_priv) return ERR_PTR(-ENOMEM); id_priv->owner = curthread->td_proc->p_pid; id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; spin_lock_init(&id_priv->lock); spin_lock_init(&id_priv->cm_lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); atomic_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); return &id_priv->id; } EXPORT_SYMBOL(rdma_create_id); static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) return ret; qp_attr.qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); if (ret) return ret; qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); return ret; } static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; return ib_modify_qp(qp, &qp_attr, qp_attr_mask); } int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct rdma_id_private *id_priv; struct ib_qp *qp; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id->device != pd->device) return -EINVAL; qp = ib_create_qp(pd, qp_init_attr); if (IS_ERR(qp)) return PTR_ERR(qp); if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); if (ret) goto err; id->qp = qp; id_priv->qp_num = qp->qp_num; id_priv->srq = (qp->srq != NULL); return 0; err: ib_destroy_qp(qp); return ret; } EXPORT_SYMBOL(rdma_create_qp); void rdma_destroy_qp(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); mutex_lock(&id_priv->qp_mutex); ib_destroy_qp(id_priv->id.qp); id_priv->id.qp = NULL; mutex_unlock(&id_priv->qp_mutex); } EXPORT_SYMBOL(rdma_destroy_qp); static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; union ib_gid sgid; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } /* Need to update QP attributes from default values. */ qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); if (ret) goto out; qp_attr.qp_state = IB_QPS_RTR; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, qp_attr.ah_attr.grh.sgid_index, &sgid); if (ret) goto out; if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == IB_LINK_LAYER_ETHERNET) { u32 scope_id = rdma_get_ipv6_scope_id(id_priv->id.device, id_priv->id.port_num); ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL, scope_id); if (ret) goto out; } if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_modify_qp_rts(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } qp_attr.qp_state = IB_QPS_RTS; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; if (conn_param) qp_attr.max_rd_atomic = conn_param->initiator_depth; if (id_priv->qp_timeout && id_priv->id.qp->qp_type == IB_QPT_RC) { qp_attr.timeout = id_priv->qp_timeout; qp_attr_mask |= IB_QP_TIMEOUT; } ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_modify_qp_err(struct rdma_id_private *id_priv) { struct ib_qp_attr qp_attr; int ret; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } qp_attr.qp_state = IB_QPS_ERR; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int ret; u16 pkey; if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == IB_LINK_LAYER_INFINIBAND) pkey = ib_addr_get_pkey(dev_addr); else pkey = 0xffff; ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, pkey, &qp_attr->pkey_index); if (ret) return ret; qp_attr->port_num = id_priv->id.port_num; *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { ret = cma_set_qkey(id_priv); if (ret) return ret; qp_attr->qkey = id_priv->qkey; *qp_attr_mask |= IB_QP_QKEY; } else { qp_attr->qp_access_flags = 0; *qp_attr_mask |= IB_QP_ACCESS_FLAGS; } return 0; } int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct rdma_id_private *id_priv; int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: if (!id_priv->cm_id.iw) { qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); break; default: ret = -ENOSYS; break; } return ret; } EXPORT_SYMBOL(rdma_init_qp_attr); static inline int cma_zero_addr(struct sockaddr *addr) { struct in6_addr *ip6; if (addr->sa_family == AF_INET) return ipv4_is_zeronet( ((struct sockaddr_in *)addr)->sin_addr.s_addr); else { ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; } } static inline int cma_loopback_addr(struct sockaddr *addr) { if (addr->sa_family == AF_INET) return ipv4_is_loopback( ((struct sockaddr_in *) addr)->sin_addr.s_addr); else return ipv6_addr_loopback( &((struct sockaddr_in6 *) addr)->sin6_addr); } static inline int cma_any_addr(struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } int rdma_cma_any_addr(struct sockaddr *addr) { return cma_any_addr(addr); } EXPORT_SYMBOL(rdma_cma_any_addr); static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) { if (src->sa_family != dst->sa_family) return -1; switch (src->sa_family) { case AF_INET: return ((struct sockaddr_in *) src)->sin_addr.s_addr != ((struct sockaddr_in *) dst)->sin_addr.s_addr; default: return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, &((struct sockaddr_in6 *) dst)->sin6_addr); } } static inline __be16 cma_port(struct sockaddr *addr) { if (addr->sa_family == AF_INET) return ((struct sockaddr_in *) addr)->sin_port; else return ((struct sockaddr_in6 *) addr)->sin6_port; } static inline int cma_any_port(struct sockaddr *addr) { return !cma_port(addr); } static int cma_get_net_info(void *hdr, enum rdma_port_space ps, u8 *ip_ver, __be16 *port, union cma_ip_addr **src, union cma_ip_addr **dst) { switch (ps) { case RDMA_PS_SDP: if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) != SDP_MAJ_VERSION) return -EINVAL; *ip_ver = sdp_get_ip_ver(hdr); *port = ((struct sdp_hh *) hdr)->port; *src = &((struct sdp_hh *) hdr)->src_addr; *dst = &((struct sdp_hh *) hdr)->dst_addr; break; default: if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) return -EINVAL; *ip_ver = cma_get_ip_ver(hdr); *port = ((struct cma_hdr *) hdr)->port; *src = &((struct cma_hdr *) hdr)->src_addr; *dst = &((struct cma_hdr *) hdr)->dst_addr; break; } if (*ip_ver != 4 && *ip_ver != 6) return -EINVAL; return 0; } static void cma_save_net_info(struct rdma_addr *addr, struct rdma_addr *listen_addr, u8 ip_ver, __be16 port, union cma_ip_addr *src, union cma_ip_addr *dst) { struct sockaddr_in *listen4, *ip4; struct sockaddr_in6 *listen6, *ip6; switch (ip_ver) { case 4: listen4 = (struct sockaddr_in *) &listen_addr->src_addr; ip4 = (struct sockaddr_in *) &addr->src_addr; ip4->sin_family = listen4->sin_family; ip4->sin_addr.s_addr = dst->ip4.addr; ip4->sin_port = listen4->sin_port; ip4->sin_len = sizeof(struct sockaddr_in); ip4 = (struct sockaddr_in *) &addr->dst_addr; ip4->sin_family = listen4->sin_family; ip4->sin_addr.s_addr = src->ip4.addr; ip4->sin_port = port; ip4->sin_len = sizeof(struct sockaddr_in); break; case 6: listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; ip6 = (struct sockaddr_in6 *) &addr->src_addr; ip6->sin6_family = listen6->sin6_family; ip6->sin6_addr = dst->ip6; ip6->sin6_port = listen6->sin6_port; ip6->sin6_len = sizeof(struct sockaddr_in6); ip6->sin6_scope_id = listen6->sin6_scope_id; ip6 = (struct sockaddr_in6 *) &addr->dst_addr; ip6->sin6_family = listen6->sin6_family; ip6->sin6_addr = src->ip6; ip6->sin6_port = port; ip6->sin6_len = sizeof(struct sockaddr_in6); ip6->sin6_scope_id = listen6->sin6_scope_id; break; default: break; } } static inline int cma_user_data_offset(enum rdma_port_space ps) { switch (ps) { case RDMA_PS_SDP: return 0; default: return sizeof(struct cma_hdr); } } static void cma_cancel_route(struct rdma_id_private *id_priv) { switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) { case IB_LINK_LAYER_INFINIBAND: if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); break; default: break; } } static void cma_cancel_listens(struct rdma_id_private *id_priv) { struct rdma_id_private *dev_id_priv; /* * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ mutex_lock(&lock); list_del(&id_priv->list); while (!list_empty(&id_priv->listen_list)) { dev_id_priv = list_entry(id_priv->listen_list.next, struct rdma_id_private, listen_list); /* sync with device removal to avoid duplicate destruction */ list_del_init(&dev_id_priv->list); list_del(&dev_id_priv->listen_list); mutex_unlock(&lock); rdma_destroy_id(&dev_id_priv->id); mutex_lock(&lock); } mutex_unlock(&lock); } static void cma_cancel_operation(struct rdma_id_private *id_priv, enum rdma_cm_state state) { switch (state) { case RDMA_CM_ADDR_QUERY: rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); break; case RDMA_CM_ROUTE_QUERY: cma_cancel_route(id_priv); break; case RDMA_CM_LISTEN: if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) && !id_priv->cma_dev) cma_cancel_listens(id_priv); break; default: break; } } static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list; mutex_lock(&lock); bind_list = id_priv->bind_list; if (!bind_list) { mutex_unlock(&lock); return; } hlist_del(&id_priv->node); id_priv->bind_list = NULL; if (hlist_empty(&bind_list->owners)) { idr_remove(bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); if (id_priv->sock) sock_release(id_priv->sock); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; while (!list_empty(&id_priv->mc_list)) { mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) { case IB_LINK_LAYER_INFINIBAND: ib_sa_free_multicast(mc->multicast.ib); kfree(mc); break; case IB_LINK_LAYER_ETHERNET: kref_put(&mc->mcref, release_mc); break; default: break; } } } static void __rdma_free(struct work_struct *work) { struct rdma_id_private *id_priv; id_priv = container_of(work, struct rdma_id_private, work); wait_for_completion(&id_priv->comp); if (id_priv->internal_id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); kfree(id_priv); } void rdma_destroy_id(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; enum rdma_cm_state state; unsigned long flags; struct ib_cm_id *ib; id_priv = container_of(id, struct rdma_id_private, id); state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); /* * Wait for any active callback to finish. New callbacks will find * the id_priv state set to destroying and abort. */ mutex_lock(&id_priv->handler_mutex); mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: spin_lock_irqsave(&id_priv->cm_lock, flags); if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) { ib = id_priv->cm_id.ib; id_priv->cm_id.ib = NULL; spin_unlock_irqrestore(&id_priv->cm_lock, flags); ib_destroy_cm_id(ib); } else spin_unlock_irqrestore(&id_priv->cm_lock, flags); break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); break; default: break; } cma_leave_mc_groups(id_priv); cma_release_dev(id_priv); } cma_release_port(id_priv); cma_deref_id(id_priv); INIT_WORK(&id_priv->work, __rdma_free); queue_work(cma_free_wq, &id_priv->work); } EXPORT_SYMBOL(rdma_destroy_id); static int cma_rep_recv(struct rdma_id_private *id_priv) { int ret; ret = cma_modify_qp_rtr(id_priv, NULL); if (ret) goto reject; ret = cma_modify_qp_rts(id_priv, NULL); if (ret) goto reject; cma_dbg(id_priv, "sending RTU\n"); ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; return 0; reject: cma_modify_qp_err(id_priv); cma_dbg(id_priv, "sending REJ\n"); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; } static int cma_verify_rep(struct rdma_id_private *id_priv, void *data) { if (id_priv->id.ps == RDMA_PS_SDP && sdp_get_majv(((struct sdp_hah *) data)->sdp_version) != SDP_MAJ_VERSION) return -EINVAL; return 0; } static void cma_set_rep_event_data(struct rdma_cm_event *event, struct ib_cm_rep_event_param *rep_data, void *private_data) { event->param.conn.private_data = private_data; event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; event->param.conn.responder_resources = rep_data->responder_resources; event->param.conn.initiator_depth = rep_data->initiator_depth; event->param.conn.flow_control = rep_data->flow_control; event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; event->param.conn.srq = rep_data->srq; event->param.conn.qp_num = rep_data->remote_qpn; } static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; int ret = 0; if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && cma_disable_callback(id_priv, RDMA_CM_CONNECT)) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && cma_disable_callback(id_priv, RDMA_CM_DISCONNECT))) return 0; memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: case IB_CM_REP_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: event.status = cma_verify_rep(id_priv, ib_event->private_data); if (event.status) event.event = RDMA_CM_EVENT_CONNECT_ERROR; else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : RDMA_CM_EVENT_ESTABLISHED; } else event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, ib_event->private_data); break; case IB_CM_RTU_RECEIVED: case IB_CM_USER_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; break; case IB_CM_DREQ_ERROR: event.status = -ETIMEDOUT; /* fall through */ case IB_CM_DREQ_RECEIVED: case IB_CM_DREP_RECEIVED: if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_DISCONNECT)) goto out; event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IB_CM_TIMEWAIT_EXIT: event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; break; case IB_CM_MRA_RECEIVED: /* ignore event */ goto out; case IB_CM_REJ_RECEIVED: cma_modify_qp_err(id_priv); event.status = ib_event->param.rej_rcvd.reason; event.event = RDMA_CM_EVENT_REJECTED; event.param.conn.private_data = ib_event->private_data; event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; union cma_ip_addr *src, *dst; __be16 port; u8 ip_ver; int ret; if (cma_get_net_info(ib_event->private_data, listen_id->ps, &ip_ver, &port, &src, &dst)) return NULL; id = rdma_create_id(listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); rt = &id->route; rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); if (!rt->path_rec) goto err; rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); } else { ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, &rt->addr.dev_addr, NULL); if (ret) goto err; } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); return NULL; } static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; union cma_ip_addr *src, *dst; __be16 port; u8 ip_ver; int ret; id = rdma_create_id(listen_id->event_handler, listen_id->context, listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; if (cma_get_net_info(ib_event->private_data, listen_id->ps, &ip_ver, &port, &src, &dst)) goto err; cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, &id->route.addr.dev_addr, NULL); if (ret) goto err; } id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); return NULL; } static void cma_set_req_event_data(struct rdma_cm_event *event, struct ib_cm_req_event_param *req_data, void *private_data, int offset) { event->param.conn.private_data = private_data + offset; event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; event->param.conn.responder_resources = req_data->responder_resources; event->param.conn.initiator_depth = req_data->initiator_depth; event->param.conn.flow_control = req_data->flow_control; event->param.conn.retry_count = req_data->retry_count; event->param.conn.rnr_retry_count = req_data->rnr_retry_count; event->param.conn.srq = req_data->srq; event->param.conn.qp_num = req_data->remote_qpn; } static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) { return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && (id->qp_type == IB_QPT_UD)) || (!id->qp_type)); } static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; int offset, ret; u8 smac[ETH_ALEN]; u8 alt_smac[ETH_ALEN]; u8 *psmac = smac; u8 *palt_smac = alt_smac; int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) == RDMA_TRANSPORT_IB) && (rdma_port_get_link_layer(cm_id->device, ib_event->param.req_rcvd.port) == IB_LINK_LAYER_ETHERNET)); int is_sidr = 0; listen_id = cm_id->context; if (!cma_check_req_qp_type(&listen_id->id, ib_event)) return -EINVAL; if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { is_sidr = 1; conn_id = cma_new_udp_id(&listen_id->id, ib_event); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { conn_id = cma_new_conn_id(&listen_id->id, ib_event); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); } if (!conn_id) { ret = -ENOMEM; goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); - ret = cma_acquire_dev(conn_id); + ret = cma_acquire_dev(conn_id, listen_id); if (ret) goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) goto err3; if (is_iboe && !is_sidr) { u32 scope_id = rdma_get_ipv6_scope_id(cm_id->device, ib_event->param.req_rcvd.port); if (ib_event->param.req_rcvd.primary_path != NULL) rdma_addr_find_smac_by_sgid( &ib_event->param.req_rcvd.primary_path->sgid, psmac, NULL, scope_id); else psmac = NULL; if (ib_event->param.req_rcvd.alternate_path != NULL) rdma_addr_find_smac_by_sgid( &ib_event->param.req_rcvd.alternate_path->sgid, palt_smac, NULL, scope_id); else palt_smac = NULL; } /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(&lock); if (is_iboe && !is_sidr) ib_update_cm_av(cm_id, psmac, palt_smac); if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) { cma_dbg(container_of(&conn_id->id, struct rdma_id_private, id), "sending MRA\n"); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); } mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); return 0; err3: cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; err2: cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); err1: mutex_unlock(&listen_id->handler_mutex); if (conn_id) rdma_destroy_id(&conn_id->id); return ret; } static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) { return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr))); } static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, struct ib_cm_compare_data *compare) { struct cma_hdr *cma_data, *cma_mask; struct sdp_hh *sdp_data, *sdp_mask; __be32 ip4_addr; struct in6_addr ip6_addr; memset(compare, 0, sizeof *compare); cma_data = (void *) compare->data; cma_mask = (void *) compare->mask; sdp_data = (void *) compare->data; sdp_mask = (void *) compare->mask; switch (addr->sa_family) { case AF_INET: ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; if (ps == RDMA_PS_SDP) { sdp_set_ip_ver(sdp_data, 4); sdp_set_ip_ver(sdp_mask, 0xF); if (!cma_any_addr(addr)) { sdp_data->dst_addr.ip4.addr = ip4_addr; sdp_mask->dst_addr.ip4.addr = htonl(~0); } } else { cma_set_ip_ver(cma_data, 4); cma_set_ip_ver(cma_mask, 0xF); if (!cma_any_addr(addr)) { cma_data->dst_addr.ip4.addr = ip4_addr; cma_mask->dst_addr.ip4.addr = htonl(~0); } } break; case AF_INET6: ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; if (ps == RDMA_PS_SDP) { sdp_set_ip_ver(sdp_data, 6); sdp_set_ip_ver(sdp_mask, 0xF); if (!cma_any_addr(addr)) { sdp_data->dst_addr.ip6 = ip6_addr; memset(&sdp_mask->dst_addr.ip6, 0xFF, sizeof(sdp_mask->dst_addr.ip6)); } } else { cma_set_ip_ver(cma_data, 6); cma_set_ip_ver(cma_mask, 0xF); if (!cma_any_addr(addr)) { cma_data->dst_addr.ip6 = ip6_addr; memset(&cma_mask->dst_addr.ip6, 0xFF, sizeof(cma_mask->dst_addr.ip6)); } } break; default: break; } } static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; struct rdma_cm_event event; struct sockaddr_in *sin; int ret = 0; if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IW_CM_EVENT_CONNECT_REPLY: sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; *sin = iw_event->local_addr; sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; *sin = iw_event->remote_addr; switch ((int)iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; break; case -ECONNRESET: case -ECONNREFUSED: event.event = RDMA_CM_EVENT_REJECTED; break; case -ETIMEDOUT: event.event = RDMA_CM_EVENT_UNREACHABLE; break; default: event.event = RDMA_CM_EVENT_CONNECT_ERROR; break; } break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; break; default: BUG_ON(1); } event.status = iw_event->status; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } mutex_unlock(&id_priv->handler_mutex); return ret; } static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; struct sockaddr_in *sin; struct net_device *dev = NULL; struct rdma_cm_event event; int ret; struct ib_device_attr attr; listen_id = cm_id->context; if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.event_handler, listen_id->id.context, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { ret = -ENOMEM; goto out; } conn_id = container_of(new_cm_id, struct rdma_id_private, id); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = RDMA_CM_CONNECT; dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } - ret = cma_acquire_dev(conn_id); + ret = cma_acquire_dev(conn_id, listen_id); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } conn_id->cm_id.iw = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_iw_handler; sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; *sin = iw_event->local_addr; sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; *sin = iw_event->remote_addr; ret = ib_query_device(conn_id->id.device, &attr); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); goto out; } mutex_unlock(&conn_id->handler_mutex); cma_deref_id(conn_id); out: if (dev) dev_put(dev); mutex_unlock(&listen_id->handler_mutex); return ret; } static int cma_ib_listen(struct rdma_id_private *id_priv) { struct ib_cm_compare_data compare_data; struct sockaddr *addr; struct ib_cm_id *id; __be64 svc_id; int ret; id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv); if (IS_ERR(id)) return PTR_ERR(id); id_priv->cm_id.ib = id; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; svc_id = cma_get_service_id(id_priv->id.ps, addr); if (cma_any_addr(addr) && !id_priv->afonly) ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); else { cma_set_compare_data(id_priv->id.ps, addr, &compare_data); ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data); } if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); id_priv->cm_id.ib = NULL; } return ret; } static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) { int ret; struct sockaddr_in *sin; struct iw_cm_id *id; id = iw_create_cm_id(id_priv->id.device, id_priv->sock, iw_conn_req_handler, id_priv); if (IS_ERR(id)) return PTR_ERR(id); id_priv->cm_id.iw = id; sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; id_priv->cm_id.iw->local_addr = *sin; ret = iw_cm_listen(id_priv->cm_id.iw, backlog); if (ret) { iw_destroy_cm_id(id_priv->cm_id.iw); id_priv->cm_id.iw = NULL; } return ret; } static int cma_listen_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_id_private *id_priv = id->context; id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; return id_priv->id.event_handler(id, event); } static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { struct rdma_id_private *dev_id_priv; struct rdma_cm_id *id; int ret; id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) return; dev_id_priv = container_of(id, struct rdma_id_private, id); dev_id_priv->state = RDMA_CM_ADDR_BOUND; dev_id_priv->sock = id_priv->sock; memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); cma_attach_to_dev(dev_id_priv, cma_dev); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; ret = rdma_listen(id, id_priv->backlog); if (ret) cma_warn(id_priv, "cma_listen_on_dev, error %d, listening on device %s\n", ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); } void rdma_set_service_type(struct rdma_cm_id *id, int tos) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); id_priv->tos = (u8) tos; } EXPORT_SYMBOL(rdma_set_service_type); void rdma_set_timeout(struct rdma_cm_id *id, int timeout) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); id_priv->qp_timeout = (u8) timeout; } EXPORT_SYMBOL(rdma_set_timeout); static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, void *context) { struct cma_work *work = context; struct rdma_route *route; route = &work->id->id.route; if (!status) { route->num_paths = 1; *route->path_rec = *path_rec; } else { work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; work->event.status = status; } queue_work(cma_wq, &work->work); } static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct cma_work *work) { struct rdma_addr *addr = &id_priv->id.route.addr; struct ib_sa_path_rec path_rec; ib_sa_comp_mask comp_mask; struct sockaddr_in6 *sin6; memset(&path_rec, 0, sizeof path_rec); rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; path_rec.service_id = cma_get_service_id(id_priv->id.ps, (struct sockaddr *) &addr->dst_addr); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; if (addr->src_addr.ss_family == AF_INET) { path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); comp_mask |= IB_SA_PATH_REC_QOS_CLASS; } else { sin6 = (struct sockaddr_in6 *) &addr->src_addr; path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; } id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, comp_mask, timeout_ms, GFP_KERNEL, cma_query_handler, work, &id_priv->query); return (id_priv->query_id < 0) ? id_priv->query_id : 0; } static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); struct rdma_id_private *id_priv = work->id; int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } out: mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); kfree(work); } static void cma_ndev_work_handler(struct work_struct *_work) { struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); struct rdma_id_private *id_priv = work->id; int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (id_priv->state == RDMA_CM_DESTROYING || id_priv->state == RDMA_CM_DEVICE_REMOVAL) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } out: mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); kfree(work); } static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) { struct rdma_route *route = &id_priv->id.route; struct cma_work *work; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); if (!route->path_rec) { ret = -ENOMEM; goto err1; } ret = cma_query_ib_route(id_priv, timeout_ms, work); if (ret) goto err2; return 0; err2: kfree(route->path_rec); route->path_rec = NULL; err1: kfree(work); return ret; } int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_RESOLVED)) return -EINVAL; id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, GFP_KERNEL); if (!id->route.path_rec) { ret = -ENOMEM; goto err; } id->route.num_paths = num_paths; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_set_ib_paths); static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) { struct cma_work *work; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; queue_work(cma_wq, &work->work); return 0; } static u8 tos_to_sl(u8 tos) { return def_prec2sl & 7; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; struct rdma_addr *addr = &route->addr; struct cma_work *work; int ret; struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; struct net_device *ndev = NULL; if (src_addr->sin_family != dst_addr->sin_family) return -EINVAL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); if (!route->path_rec) { ret = -ENOMEM; goto err1; } route->num_paths = 1; if (addr->dev_addr.bound_dev_if) ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); if (!ndev) { ret = -ENODEV; goto err2; } route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); memcpy(route->path_rec->smac, IF_LLADDR(ndev), ndev->if_addrlen); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &route->path_rec->sgid); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, &route->path_rec->dgid); route->path_rec->hop_limit = 1; route->path_rec->reversible = 1; route->path_rec->pkey = cpu_to_be16(0xffff); route->path_rec->mtu_selector = IB_SA_EQ; route->path_rec->sl = tos_to_sl(id_priv->tos); route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; } work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; work->event.status = 0; queue_work(cma_wq, &work->work); return 0; err2: kfree(route->path_rec); route->path_rec = NULL; err1: kfree(work); return ret; } int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: switch (rdma_port_get_link_layer(id->device, id->port_num)) { case IB_LINK_LAYER_INFINIBAND: ret = cma_resolve_ib_route(id_priv, timeout_ms); break; case IB_LINK_LAYER_ETHERNET: ret = cma_resolve_iboe_route(id_priv); break; default: ret = -ENOSYS; } break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: ret = cma_resolve_iw_route(id_priv, timeout_ms); break; default: ret = -ENOSYS; break; } if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_route); int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type) { /* APM is not supported yet */ return -EINVAL; } EXPORT_SYMBOL(rdma_enable_apm); static int cma_bind_loopback(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; struct ib_port_attr port_attr; union ib_gid gid; u16 pkey; int ret; u8 p; mutex_lock(&lock); if (list_empty(&dev_list)) { ret = -ENODEV; goto out; } list_for_each_entry(cma_dev, &dev_list, list) for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p) if (!ib_query_port(cma_dev->device, p, &port_attr) && port_attr.state == IB_PORT_ACTIVE) goto port_found; p = 1; cma_dev = list_entry(dev_list.next, struct cma_device, list); port_found: ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); if (ret) goto out; ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); if (ret) goto out; id_priv->id.route.addr.dev_addr.dev_type = (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); out: mutex_unlock(&lock); return ret; } static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_dev_addr *dev_addr, void *context) { struct rdma_id_private *id_priv = context; struct rdma_cm_event event; memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED)) goto out; memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); if (!status && !id_priv->cma_dev) - status = cma_acquire_dev(id_priv); + status = cma_acquire_dev(id_priv, NULL); if (status) { if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ADDR_BOUND)) goto out; event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = status; } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); } static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; struct sockaddr *src, *dst; union ib_gid gid; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; if (!id_priv->cma_dev) { ret = cma_bind_loopback(id_priv); if (ret) goto err; } rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; if (cma_zero_addr(src)) { dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; if ((src->sa_family = dst->sa_family) == AF_INET) { ((struct sockaddr_in *)src)->sin_addr = ((struct sockaddr_in *)dst)->sin_addr; } else { ((struct sockaddr_in6 *)src)->sin6_addr = ((struct sockaddr_in6 *)dst)->sin6_addr; } } work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; err: kfree(work); return ret; } static int cma_resolve_scif(struct rdma_id_private *id_priv) { struct cma_work *work; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; /* we probably can leave it empty here */ work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; } static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr) { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; src_addr->sa_family = dst_addr->sa_family; #ifdef INET6 if (dst_addr->sa_family == AF_INET6) { ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; } #endif } if (!cma_any_addr(src_addr)) return rdma_bind_addr(id, src_addr); else { #if defined(INET6) || defined(INET) union { #ifdef INET struct sockaddr_in in; #endif #ifdef INET6 struct sockaddr_in6 in6; #endif } addr; #endif switch(dst_addr->sa_family) { #ifdef INET case AF_INET: memset(&addr.in, 0, sizeof(addr.in)); addr.in.sin_family = dst_addr->sa_family; addr.in.sin_len = sizeof(addr.in); return rdma_bind_addr(id, (struct sockaddr *)&addr.in); #endif #ifdef INET6 case AF_INET6: memset(&addr.in6, 0, sizeof(addr.in6)); addr.in6.sin6_family = dst_addr->sa_family; addr.in6.sin6_len = sizeof(addr.in6); addr.in6.sin6_scope_id = ((struct sockaddr_in6 *)dst_addr)->sin6_scope_id; return rdma_bind_addr(id, (struct sockaddr *)&addr.in6); #endif default: return -EINVAL; } } } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); if (ret) return ret; } if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) ret = cma_resolve_loopback(id_priv); else if (id_priv->id.device && rdma_node_get_transport(id_priv->id.device->node_type) == RDMA_TRANSPORT_SCIF) ret = cma_resolve_scif(id_priv); else ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, id_priv); if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_addr); int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; unsigned long flags; int ret; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); if (id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { ret = -EINVAL; } spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } EXPORT_SYMBOL(rdma_set_reuseaddr); int rdma_set_afonly(struct rdma_cm_id *id, int afonly) { struct rdma_id_private *id_priv; unsigned long flags; int ret; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { id_priv->options |= (1 << CMA_OPTION_AFONLY); id_priv->afonly = afonly; ret = 0; } else { ret = -EINVAL; } spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } EXPORT_SYMBOL(rdma_set_afonly); static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { struct sockaddr_in *sin; sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; sin->sin_port = htons(bind_list->port); id_priv->bind_list = bind_list; hlist_add_head(&id_priv->node, &bind_list->owners); } static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; int port, ret; bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; do { ret = idr_get_new_above(ps, bind_list, snum, &port); } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); if (ret) goto err1; if (port != snum) { ret = -EADDRNOTAVAIL; goto err2; } bind_list->ps = ps; bind_list->port = (unsigned short) port; cma_bind_port(bind_list, id_priv); return 0; err2: idr_remove(ps, port); err1: kfree(bind_list); return ret; } static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) { static unsigned int last_used_port; int low, high, remaining; unsigned int rover; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; rover = random() % remaining + low; retry: if (last_used_port != rover && !idr_find(ps, (unsigned short) rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid * re-using same port immediately after it is closed. */ if (!ret) last_used_port = rover; if (ret != -EADDRNOTAVAIL) return ret; } if (--remaining) { rover++; if ((rover < low) || (rover > high)) rover = low; goto retry; } return -EADDRNOTAVAIL; } /* * Check that the requested port is available. This is called when trying to * bind to a specific port, or when trying to listen on a bound port. In * the latter case, the provided id_priv may already be on the bind_list, but * we still need to check that it's okay to start listening. */ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv, uint8_t reuseaddr) { struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && cur_id->reuseaddr) continue; cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; if (id_priv->afonly && cur_id->afonly && (addr->sa_family != cur_addr->sa_family)) continue; if (cma_any_addr(addr) || cma_any_addr(cur_addr)) return -EADDRNOTAVAIL; if (!cma_addr_cmp(addr, cur_addr)) return -EADDRINUSE; } return 0; } static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list; unsigned short snum; int ret; snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)); bind_list = idr_find(ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); if (!ret) cma_bind_port(bind_list, id_priv); } return ret; } static int cma_bind_listen(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; int ret = 0; mutex_lock(&lock); if (bind_list->owners.first->next) ret = cma_check_port(bind_list, id_priv, 0); mutex_unlock(&lock); return ret; } static int cma_get_tcp_port(struct rdma_id_private *id_priv) { int ret; int size; struct socket *sock; ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; #ifdef __linux__ ret = sock->ops->bind(sock, (struct sockaddr *) &id_priv->id.route.addr.src_addr, ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); #else ret = -sobind(sock, (struct sockaddr *)&id_priv->id.route.addr.src_addr, curthread); #endif if (ret) { sock_release(sock); return ret; } size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr); ret = sock_getname(sock, (struct sockaddr *) &id_priv->id.route.addr.src_addr, &size, 0); if (ret) { sock_release(sock); return ret; } id_priv->sock = sock; return 0; } static int cma_get_port(struct rdma_id_private *id_priv) { struct idr *ps; int ret; switch (id_priv->id.ps) { case RDMA_PS_SDP: ps = &sdp_ps; break; case RDMA_PS_TCP: ps = &tcp_ps; if (unify_tcp_port_space) { ret = cma_get_tcp_port(id_priv); if (ret) goto out; } break; case RDMA_PS_UDP: ps = &udp_ps; break; case RDMA_PS_IPOIB: ps = &ipoib_ps; break; case RDMA_PS_IB: ps = &ib_ps; break; default: return -EPROTONOSUPPORT; } mutex_lock(&lock); if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)) ret = cma_alloc_any_port(ps, id_priv); else ret = cma_use_port(ps, id_priv); mutex_unlock(&lock); out: return ret; } static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, struct sockaddr *addr) { #if defined(INET6) struct sockaddr_in6 *sin6; if (addr->sa_family != AF_INET6) return 0; sin6 = (struct sockaddr_in6 *) addr; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) && !sin6->sin6_scope_id) return -EINVAL; dev_addr->bound_dev_if = sin6->sin6_scope_id; #endif return 0; } int rdma_listen(struct rdma_cm_id *id, int backlog) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); if (ret) return ret; } if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) return -EINVAL; if (id_priv->reuseaddr) { ret = cma_bind_listen(id_priv); if (ret) goto err; } id_priv->backlog = backlog; if (id->device) { switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: ret = cma_ib_listen(id_priv); if (ret) goto err; break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: ret = cma_iw_listen(id_priv, backlog); if (ret) goto err; break; default: ret = -ENOSYS; goto err; } } else cma_listen_on_all(id_priv); return 0; err: id_priv->backlog = 0; cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); return ret; } EXPORT_SYMBOL(rdma_listen); int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; int ret; #if defined(INET6) int ipv6only; size_t var_size = sizeof(int); #endif if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); if (ret) goto err1; memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); if (!cma_any_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr, NULL); if (ret) goto err1; - ret = cma_acquire_dev(id_priv); + ret = cma_acquire_dev(id_priv, NULL); if (ret) goto err1; } if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { if (addr->sa_family == AF_INET) id_priv->afonly = 1; #if defined(INET6) else if (addr->sa_family == AF_INET6) id_priv->afonly = kernel_sysctlbyname(&thread0, "net.inet6.ip6.v6only", &ipv6only, &var_size, NULL, 0, NULL, 0); #endif } ret = cma_get_port(id_priv); if (ret) goto err2; return 0; err2: if (id_priv->cma_dev) cma_release_dev(id_priv); err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, enum rdma_port_space ps, struct rdma_route *route) { struct cma_hdr *cma_hdr; struct sdp_hh *sdp_hdr; if (route->addr.src_addr.ss_family == AF_INET) { struct sockaddr_in *src4, *dst4; src4 = (struct sockaddr_in *) &route->addr.src_addr; dst4 = (struct sockaddr_in *) &route->addr.dst_addr; switch (ps) { case RDMA_PS_SDP: sdp_hdr = hdr; if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) return -EINVAL; sdp_set_ip_ver(sdp_hdr, 4); sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; sdp_hdr->port = src4->sin_port; break; default: cma_hdr = hdr; cma_hdr->cma_version = CMA_VERSION; cma_set_ip_ver(cma_hdr, 4); cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; cma_hdr->port = src4->sin_port; break; } } else { struct sockaddr_in6 *src6, *dst6; src6 = (struct sockaddr_in6 *) &route->addr.src_addr; dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; switch (ps) { case RDMA_PS_SDP: sdp_hdr = hdr; if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) return -EINVAL; sdp_set_ip_ver(sdp_hdr, 6); sdp_hdr->src_addr.ip6 = src6->sin6_addr; sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; sdp_hdr->port = src6->sin6_port; break; default: cma_hdr = hdr; cma_hdr->cma_version = CMA_VERSION; cma_set_ip_ver(cma_hdr, 6); cma_hdr->src_addr.ip6 = src6->sin6_addr; cma_hdr->dst_addr.ip6 = dst6->sin6_addr; cma_hdr->port = src6->sin6_port; break; } } return 0; } static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -ETIMEDOUT; break; case IB_CM_SIDR_REP_RECEIVED: event.param.ud.private_data = ib_event->private_data; event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; if (rep->status != IB_SIDR_SUCCESS) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = ib_event->param.sidr_rep_rcvd.status; break; } ret = cma_set_qkey(id_priv); if (ret) { event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = -EINVAL; break; } if (id_priv->qkey != rep->qkey) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -EINVAL; break; } ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, id_priv->id.route.path_rec, &event.param.ud.ah_attr); event.param.ud.qp_num = rep->qpn; event.param.ud.qkey = rep->qkey; event.event = RDMA_CM_EVENT_ESTABLISHED; event.status = 0; break; default: printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_sidr_req_param req; struct rdma_route *route; struct ib_cm_id *id; int ret; req.private_data_len = sizeof(struct cma_hdr) + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!req.private_data) return -ENOMEM; if (conn_param->private_data && conn_param->private_data_len) memcpy((void *) req.private_data + sizeof(struct cma_hdr), conn_param->private_data, conn_param->private_data_len); route = &id_priv->id.route; ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route); if (ret) goto out; id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, id_priv); if (IS_ERR(id)) { ret = PTR_ERR(id); goto out; } id_priv->cm_id.ib = id; req.path = route->path_rec; req.service_id = cma_get_service_id(id_priv->id.ps, (struct sockaddr *) &route->addr.dst_addr); req.timeout_ms = 1 << (cma_response_timeout - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; cma_dbg(id_priv, "sending SIDR\n"); ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); id_priv->cm_id.ib = NULL; } out: kfree(req.private_data); return ret; } static int cma_connect_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_req_param req; struct rdma_route *route; void *private_data; struct ib_cm_id *id; int offset, ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv->id.ps); req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; if (conn_param->private_data && conn_param->private_data_len) memcpy(private_data + offset, conn_param->private_data, conn_param->private_data_len); id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); if (IS_ERR(id)) { ret = PTR_ERR(id); goto out; } id_priv->cm_id.ib = id; route = &id_priv->id.route; ret = cma_format_hdr(private_data, id_priv->id.ps, route); if (ret) goto out; req.private_data = private_data; req.primary_path = &route->path_rec[0]; if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; req.service_id = cma_get_service_id(id_priv->id.ps, (struct sockaddr *) &route->addr.dst_addr); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; req.flow_control = conn_param->flow_control; req.retry_count = min_t(u8, 7, conn_param->retry_count); req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); req.remote_cm_response_timeout = cma_response_timeout; req.local_cm_response_timeout = cma_response_timeout; req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; cma_dbg(id_priv, "sending REQ\n"); ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: if (ret && !IS_ERR(id)) { ib_destroy_cm_id(id); id_priv->cm_id.ib = NULL; } kfree(private_data); return ret; } static int cma_connect_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_id *cm_id; struct sockaddr_in* sin; int ret; struct iw_cm_conn_param iw_param; cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock, cma_iw_handler, id_priv); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); id_priv->cm_id.iw = cm_id; sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; cm_id->local_addr = *sin; sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; cm_id->remote_addr = *sin; ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; if (conn_param) { iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; iw_param.private_data = conn_param->private_data; iw_param.private_data_len = conn_param->private_data_len; iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; } else { memset(&iw_param, 0, sizeof iw_param); iw_param.qpn = id_priv->qp_num; } ret = iw_cm_connect(cm_id, &iw_param); out: if (ret) { iw_destroy_cm_id(cm_id); id_priv->cm_id.iw = NULL; } return ret; } int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp) { id_priv->qp_num = conn_param->qp_num; id_priv->srq = conn_param->srq; } switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: ret = cma_connect_iw(id_priv, conn_param); break; default: ret = -ENOSYS; break; } if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_connect); static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_rep_param rep; int ret; ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; ret = cma_modify_qp_rts(id_priv, conn_param); if (ret) goto out; memset(&rep, 0, sizeof rep); rep.qp_num = id_priv->qp_num; rep.starting_psn = id_priv->seq_num; rep.private_data = conn_param->private_data; rep.private_data_len = conn_param->private_data_len; rep.responder_resources = conn_param->responder_resources; rep.initiator_depth = conn_param->initiator_depth; rep.failover_accepted = 0; rep.flow_control = conn_param->flow_control; rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; cma_dbg(id_priv, "sending REP\n"); ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); out: return ret; } static int cma_accept_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_conn_param iw_param; int ret; if (!conn_param) return -EINVAL; ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; iw_param.private_data = conn_param->private_data; iw_param.private_data_len = conn_param->private_data_len; if (id_priv->id.qp) { iw_param.qpn = id_priv->qp_num; } else iw_param.qpn = conn_param->qp_num; return iw_cm_accept(id_priv->cm_id.iw, &iw_param); } static int cma_send_sidr_rep(struct rdma_id_private *id_priv, enum ib_cm_sidr_status status, const void *private_data, int private_data_len) { struct ib_cm_sidr_rep_param rep; int ret; memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { ret = cma_set_qkey(id_priv); if (ret) return ret; rep.qp_num = id_priv->qp_num; rep.qkey = id_priv->qkey; } rep.private_data = private_data; rep.private_data_len = private_data_len; cma_dbg(id_priv, "sending SIDR\n"); return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); id_priv->owner = curthread->td_proc->p_pid; if (!cma_comp(id_priv, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp && conn_param) { id_priv->qp_num = conn_param->qp_num; id_priv->srq = conn_param->srq; } switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, conn_param->private_data, conn_param->private_data_len); else ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, NULL, 0); } else { if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); } break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: ret = cma_accept_iw(id_priv, conn_param); break; default: ret = -ENOSYS; break; } if (ret) goto reject; return 0; reject: cma_modify_qp_err(id_priv); rdma_reject(id, NULL, 0); return ret; } EXPORT_SYMBOL(rdma_accept); int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; switch (id->device->node_type) { case RDMA_NODE_IB_CA: ret = ib_cm_notify(id_priv->cm_id.ib, event); break; default: ret = 0; break; } return ret; } EXPORT_SYMBOL(rdma_notify); int rdma_reject(struct rdma_cm_id *id, const void *private_data, u8 private_data_len) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, private_data, private_data_len); else { cma_dbg(id_priv, "sending REJ\n"); ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); } break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); break; default: ret = -ENOSYS; break; } return ret; } EXPORT_SYMBOL(rdma_reject); int rdma_disconnect(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: ret = cma_modify_qp_err(id_priv); if (ret) goto out; /* Initiate or respond to a disconnect. */ cma_dbg(id_priv, "sending DREQ\n"); if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) { cma_dbg(id_priv, "sending DREP\n"); ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); } break; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_SCIF: ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); break; default: ret = -EINVAL; break; } out: return ret; } EXPORT_SYMBOL(rdma_disconnect); static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) { struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; struct rdma_cm_event event; struct rdma_dev_addr *dev_addr; int ret; struct net_device *ndev = NULL; u16 vlan; id_priv = mc->id_priv; dev_addr = &id_priv->id.route.addr.dev_addr; if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) && cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) return 0; mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, be16_to_cpu(multicast->rec.mlid)); mutex_unlock(&id_priv->qp_mutex); memset(&event, 0, sizeof event); event.status = status; event.param.ud.private_data = mc->context; ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (!ndev) { status = -ENODEV; } else { vlan = rdma_vlan_dev_vlan_id(ndev); dev_put(ndev); } if (!status) { event.event = RDMA_CM_EVENT_MULTICAST_JOIN; ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num, &multicast->rec, &event.param.ud.ah_attr); event.param.ud.ah_attr.vlan_id = vlan; event.param.ud.qp_num = 0xFFFFFF; event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); } else { event.event = RDMA_CM_EVENT_MULTICAST_ERROR; /* mark that the cached record is no longer valid */ if (status != -ENETRESET && status != -EAGAIN) { spin_lock(&id_priv->lock); id_priv->is_valid_rec = 0; spin_unlock(&id_priv->lock); } } ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return 0; } mutex_unlock(&id_priv->handler_mutex); return 0; } static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, union ib_gid *mgid) { unsigned char mc_map[MAX_ADDR_LEN]; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct sockaddr_in *sin = (struct sockaddr_in *) addr; #if defined(INET6) struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; #endif if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); #if defined(INET6) } else if ((addr->sa_family == AF_INET6) && ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else if (addr->sa_family == AF_INET6) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); #endif } else { ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); } } static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret = 0; ib_addr_get_mgid(dev_addr, &id_priv->rec.mgid); /* cache ipoib bc record */ spin_lock(&id_priv->lock); if (!id_priv->is_valid_rec) ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, &id_priv->rec.mgid, &id_priv->rec); if (ret) { id_priv->is_valid_rec = 0; spin_unlock(&id_priv->lock); return ret; } else { rec = id_priv->rec; id_priv->is_valid_rec = 1; } spin_unlock(&id_priv->lock); cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); if (id_priv->id.ps == RDMA_PS_UDP) rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | IB_SA_MCMEMBER_REC_FLOW_LABEL | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; if (id_priv->id.ps == RDMA_PS_IPOIB) comp_mask |= IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_RATE_SELECTOR | IB_SA_MCMEMBER_REC_MTU_SELECTOR | IB_SA_MCMEMBER_REC_MTU | IB_SA_MCMEMBER_REC_HOP_LIMIT; mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, cma_ib_mc_handler, mc); return PTR_RET(mc->multicast.ib); } static void iboe_mcast_work_handler(struct work_struct *work) { struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); struct cma_multicast *mc = mw->mc; struct ib_sa_multicast *m = mc->multicast.ib; mc->multicast.ib->context = mc; cma_ib_mc_handler(0, m); kref_put(&mc->mcref, release_mc); kfree(mw); } static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) { struct sockaddr_in *sin = (struct sockaddr_in *)addr; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); } else if (addr->sa_family == AF_INET6) { memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else { mgid->raw[0] = 0xff; mgid->raw[1] = 0x0e; mgid->raw[2] = 0; mgid->raw[3] = 0; mgid->raw[4] = 0; mgid->raw[5] = 0; mgid->raw[6] = 0; mgid->raw[7] = 0; mgid->raw[8] = 0; mgid->raw[9] = 0; mgid->raw[10] = 0xff; mgid->raw[11] = 0xff; *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; } } static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct iboe_mcast_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; if (cma_zero_addr((struct sockaddr *)&mc->addr)) return -EINVAL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); if (!mc->multicast.ib) { err = -ENOMEM; goto out1; } cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); if (id_priv->id.ps == RDMA_PS_UDP) mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); if (dev_addr->bound_dev_if) ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (!ndev) { err = -ENODEV; goto out2; } mc->multicast.ib->rec.rate = iboe_get_rate(ndev); mc->multicast.ib->rec.hop_limit = 1; mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); dev_put(ndev); if (!mc->multicast.ib->rec.mtu) { err = -EINVAL; goto out2; } rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &mc->multicast.ib->rec.port_gid); work->id = id_priv; work->mc = mc; INIT_WORK(&work->work, iboe_mcast_work_handler); kref_get(&mc->mcref); queue_work(cma_wq, &work->work); return 0; out2: kfree(mc->multicast.ib); out1: kfree(work); return err; } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, void *context) { struct rdma_id_private *id_priv; struct cma_multicast *mc; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) return -EINVAL; mc = kmalloc(sizeof *mc, GFP_KERNEL); if (!mc) return -ENOMEM; memcpy(&mc->addr, addr, ip_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; spin_lock(&id_priv->lock); list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: switch (rdma_port_get_link_layer(id->device, id->port_num)) { case IB_LINK_LAYER_INFINIBAND: ret = cma_join_ib_multicast(id_priv, mc); break; case IB_LINK_LAYER_ETHERNET: kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); break; default: ret = -EINVAL; } break; default: ret = -ENOSYS; break; } if (ret) { spin_lock_irq(&id_priv->lock); list_del(&mc->list); spin_unlock_irq(&id_priv->lock); kfree(mc); } return ret; } EXPORT_SYMBOL(rdma_join_multicast); void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; struct cma_multicast *mc; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { list_del(&mc->list); spin_unlock_irq(&id_priv->lock); if (id->qp) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, be16_to_cpu(mc->multicast.ib->rec.mlid)); if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { switch (rdma_port_get_link_layer(id->device, id->port_num)) { case IB_LINK_LAYER_INFINIBAND: ib_sa_free_multicast(mc->multicast.ib); kfree(mc); break; case IB_LINK_LAYER_ETHERNET: kref_put(&mc->mcref, release_mc); break; default: break; } } return; } } spin_unlock_irq(&id_priv->lock); } EXPORT_SYMBOL(rdma_leave_multicast); static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr; struct cma_ndev_work *work; dev_addr = &id_priv->id.route.addr.dev_addr; if ((dev_addr->bound_dev_if == ndev->if_index) && memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->if_xname, &id_priv->id); work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; INIT_WORK(&work->work, cma_ndev_work_handler); work->id = id_priv; work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; atomic_inc(&id_priv->refcount); queue_work(cma_wq, &work->work); } return 0; } static int cma_netdev_callback(struct notifier_block *self, unsigned long event, void *ctx) { struct net_device *ndev = (struct net_device *)ctx; struct cma_device *cma_dev; struct rdma_id_private *id_priv; int ret = NOTIFY_DONE; /* BONDING related, commented out until the bonding is resolved */ #if 0 if (dev_net(ndev) != &init_net) return NOTIFY_DONE; if (event != NETDEV_BONDING_FAILOVER) return NOTIFY_DONE; if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING)) return NOTIFY_DONE; #endif if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER) return NOTIFY_DONE; mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) list_for_each_entry(id_priv, &cma_dev->id_list, list) { ret = cma_netdev_change(ndev, id_priv); if (ret) goto out; } out: mutex_unlock(&lock); return ret; } static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; static void cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; struct rdma_id_private *id_priv; cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); if (!cma_dev) return; cma_dev->device = device; init_completion(&cma_dev->comp); atomic_set(&cma_dev->refcount, 1); INIT_LIST_HEAD(&cma_dev->id_list); ib_set_client_data(device, &cma_client, cma_dev); mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); list_for_each_entry(id_priv, &listen_any_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); } static int cma_remove_id_dev(struct rdma_id_private *id_priv) { struct rdma_cm_event event; enum rdma_cm_state state; int ret = 0; /* Record that we want to remove the device */ state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); if (state == RDMA_CM_DESTROYING) return 0; cma_cancel_operation(id_priv, state); mutex_lock(&id_priv->handler_mutex); /* Check for destruction from another callback. */ if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; ret = id_priv->id.event_handler(&id_priv->id, &event); out: mutex_unlock(&id_priv->handler_mutex); return ret; } static void cma_process_remove(struct cma_device *cma_dev) { struct rdma_id_private *id_priv; int ret; mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { id_priv = list_entry(cma_dev->id_list.next, struct rdma_id_private, list); list_del(&id_priv->listen_list); list_del_init(&id_priv->list); atomic_inc(&id_priv->refcount); mutex_unlock(&lock); ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); cma_deref_id(id_priv); if (ret) rdma_destroy_id(&id_priv->id); mutex_lock(&lock); } mutex_unlock(&lock); cma_deref_dev(cma_dev); wait_for_completion(&cma_dev->comp); } static void cma_remove_one(struct ib_device *device) { struct cma_device *cma_dev; cma_dev = ib_get_client_data(device, &cma_client); if (!cma_dev) return; mutex_lock(&lock); list_del(&cma_dev->list); mutex_unlock(&lock); cma_process_remove(cma_dev); kfree(cma_dev); } static int __init cma_init(void) { int ret = -ENOMEM; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) return -ENOMEM; cma_free_wq = create_singlethread_workqueue("rdma_cm_fr"); if (!cma_free_wq) goto err1; ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); register_netdevice_notifier(&cma_nb); ret = ib_register_client(&cma_client); if (ret) goto err; return 0; err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_free_wq); err1: destroy_workqueue(cma_wq); return ret; } static void __exit cma_cleanup(void) { ib_unregister_client(&cma_client); unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); flush_workqueue(cma_free_wq); destroy_workqueue(cma_free_wq); destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); idr_destroy(&udp_ps); idr_destroy(&ipoib_ps); idr_destroy(&ib_ps); } module_init(cma_init); module_exit(cma_cleanup); Index: user/alc/PQ_LAUNDRY/sys/vm/vm_pageout.c =================================================================== --- user/alc/PQ_LAUNDRY/sys/vm/vm_pageout.c (revision 303641) +++ user/alc/PQ_LAUNDRY/sys/vm/vm_pageout.c (revision 303642) @@ -1,2177 +1,2178 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005 Yahoo! Technologies Norway AS * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * The proverbial page-out daemon. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * System initialization */ /* the kernel process "vm_pageout"*/ static void vm_pageout(void); static void vm_pageout_init(void); static int vm_pageout_clean(vm_page_t m, int *numpagedout); static int vm_pageout_cluster(vm_page_t m); static void vm_pageout_scan(struct vm_domain *vmd, int pass); static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, int starting_page_shortage); SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, NULL); struct proc *pageproc; static struct kproc_desc page_kp = { "pagedaemon", vm_pageout, &pageproc }; SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &page_kp); SDT_PROVIDER_DEFINE(vm); SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); #if !defined(NO_SWAPPING) /* the kernel process "vm_daemon"*/ static void vm_daemon(void); static struct proc *vmproc; static struct kproc_desc vm_kp = { "vmdaemon", vm_daemon, &vmproc }; SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp); #endif /* Sleep intervals for pagedaemon threads, in subdivisions of one second. */ #define VM_LAUNDER_INTERVAL 10 #define VM_INACT_SCAN_INTERVAL 2 #define VM_LAUNDER_RATE (VM_LAUNDER_INTERVAL / VM_INACT_SCAN_INTERVAL) int vm_pageout_deficit; /* Estimated number of pages deficit */ u_int vm_pageout_wakeup_thresh; static int vm_pageout_oom_seq = 12; bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */ bool vm_pages_needed; /* Are threads waiting for free pages? */ #if !defined(NO_SWAPPING) static int vm_pageout_req_swapout; /* XXX */ static int vm_daemon_needed; static struct mtx vm_daemon_mtx; /* Allow for use by vm_pageout before vm_daemon is initialized. */ MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); #endif static int vm_pageout_update_period; static int disable_swap_pageouts; static int lowmem_period = 10; static time_t lowmem_uptime; #if defined(NO_SWAPPING) static int vm_swap_enabled = 0; static int vm_swap_idle_enabled = 0; #else static int vm_swap_enabled = 1; static int vm_swap_idle_enabled = 0; #endif static int vm_panic_on_oom = 0; SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, CTLFLAG_RWTUN, &vm_panic_on_oom, 0, "panic on out of memory instead of killing the largest process"); SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh, CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0, "free page threshold for waking up the pageout daemon"); SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, CTLFLAG_RW, &vm_pageout_update_period, 0, "Maximum active LRU update period"); SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0, "Low memory callback period"); #if defined(NO_SWAPPING) SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout"); SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); #else SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); #endif SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); static int pageout_lock_miss; SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq, CTLFLAG_RW, &vm_pageout_oom_seq, 0, "back-to-back calls to oom detector to start OOM"); static int act_scan_laundry_weight = 3; SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW, &act_scan_laundry_weight, 0, "weight given to clean vs. dirty pages in active queue scans"); static u_int bkgrd_launder_ratio = 50; SYSCTL_UINT(_vm, OID_AUTO, bkgrd_launder_ratio, CTLFLAG_RW, &bkgrd_launder_ratio, 0, "ratio of clean to dirty inactive pages needed to trigger laundering"); static u_int bkgrd_launder_max = 2048; SYSCTL_UINT(_vm, OID_AUTO, bkgrd_launder_max, CTLFLAG_RW, &bkgrd_launder_max, 0, "maximum background laundering rate, in pages per second"); #define VM_PAGEOUT_PAGE_COUNT 16 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; int vm_page_max_wired; /* XXX max # of wired pages system-wide */ SYSCTL_INT(_vm, OID_AUTO, max_wired, CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); static int vm_pageout_launder(struct vm_domain *vmd, int launder); static void vm_pageout_laundry_worker(void *arg); #if !defined(NO_SWAPPING) static void vm_pageout_map_deactivate_pages(vm_map_t, long); static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long); static void vm_req_vmdaemon(int req); #endif static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); /* * Initialize a dummy page for marking the caller's place in the specified * paging queue. In principle, this function only needs to set the flag * PG_MARKER. Nonetheless, it wirte busies and initializes the hold count * to one as safety precautions. */ static void vm_pageout_init_marker(vm_page_t marker, u_short queue) { bzero(marker, sizeof(*marker)); marker->flags = PG_MARKER; marker->busy_lock = VPB_SINGLE_EXCLUSIVER; marker->queue = queue; marker->hold_count = 1; } /* * vm_pageout_fallback_object_lock: * * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is * known to have failed and page queue must be either PQ_ACTIVE or * PQ_INACTIVE. To avoid lock order violation, unlock the page queue * while locking the vm object. Use marker page to detect page queue * changes and maintain notion of next page on page queue. Return * TRUE if no changes were detected, FALSE otherwise. vm object is * locked on return. * * This function depends on both the lock portion of struct vm_object * and normal struct vm_page being type stable. */ static boolean_t vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; struct vm_pagequeue *pq; boolean_t unchanged; u_short queue; vm_object_t object; queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); object = m->object; TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q); vm_pagequeue_unlock(pq); vm_page_unlock(m); VM_OBJECT_WLOCK(object); vm_page_lock(m); vm_pagequeue_lock(pq); /* * The page's object might have changed, and/or the page might * have moved from its original position in the queue. If the * page's object has changed, then the caller should abandon * processing the page because the wrong object lock was * acquired. Use the marker's plinks.q, not the page's, to * determine if the page has been moved. The state of the * page's plinks.q can be indeterminate; whereas, the marker's * plinks.q must be valid. */ *next = TAILQ_NEXT(&marker, plinks.q); unchanged = m->object == object && m == TAILQ_PREV(&marker, pglist, plinks.q); KASSERT(!unchanged || m->queue == queue, ("page %p queue %d %d", m, queue, m->queue)); TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q); return (unchanged); } /* * Lock the page while holding the page queue lock. Use marker page * to detect page queue changes and maintain notion of next page on * page queue. Return TRUE if no changes were detected, FALSE * otherwise. The page is locked on return. The page queue lock might * be dropped and reacquired. * * This function depends on normal struct vm_page being type stable. */ static boolean_t vm_pageout_page_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; struct vm_pagequeue *pq; boolean_t unchanged; u_short queue; vm_page_lock_assert(m, MA_NOTOWNED); if (vm_page_trylock(m)) return (TRUE); queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q); vm_pagequeue_unlock(pq); vm_page_lock(m); vm_pagequeue_lock(pq); /* Page queue might have changed. */ *next = TAILQ_NEXT(&marker, plinks.q); unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q); KASSERT(!unchanged || m->queue == queue, ("page %p queue %d %d", m, queue, m->queue)); TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q); return (unchanged); } /* * vm_pageout_clean: * * Clean the page and remove it from the laundry. * * We set the busy bit to cause potential page faults on this page to * block. Note the careful timing, however, the busy bit isn't set till * late and we cannot do anything that will mess with the page. */ static int vm_pageout_cluster(vm_page_t m) { vm_object_t object; vm_page_t mc[2*vm_pageout_page_count], pb, ps; int pageout_count; int ib, is, page_base; vm_pindex_t pindex = m->pindex; vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); /* * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP * with the new swapper, but we could have serious problems paging * out other object types if there is insufficient memory. * * Unfortunately, checking free memory here is far too late, so the * check has been moved up a procedural level. */ /* * Can't clean the page if it's busy or held. */ vm_page_assert_unbusied(m); KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m)); vm_page_dequeue(m); vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; page_base = vm_pageout_page_count; ib = 1; is = 1; /* * Scan object for clusterable pages. * * We can cluster ONLY if: ->> the page is NOT * clean, wired, busy, held, or mapped into a * buffer, and one of the following: * 1) The page is in the laundry. * -or- * 2) we force the issue. * * During heavy mmap/modification loads the pageout * daemon can really fragment the underlying file * due to flushing pages out of order and not trying * align the clusters (which leave sporatic out-of-order * holes). To solve this problem we do the reverse scan * first and attempt to align our cluster, then do a * forward scan if room remains. */ more: while (ib && pageout_count < vm_pageout_page_count) { vm_page_t p; if (ib > pindex) { ib = 0; break; } if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { ib = 0; break; } vm_page_test_dirty(p); if (p->dirty == 0) { ib = 0; break; } vm_page_lock(p); if (!vm_page_in_laundry(p) || p->hold_count != 0) { /* may be undergoing I/O */ vm_page_unlock(p); ib = 0; break; } vm_page_dequeue(p); vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; ++ib; /* * alignment boundary, stop here and switch directions. Do * not clear ib. */ if ((pindex - (ib - 1)) % vm_pageout_page_count == 0) break; } while (pageout_count < vm_pageout_page_count && pindex + is < object->size) { vm_page_t p; if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) break; vm_page_test_dirty(p); if (p->dirty == 0) break; vm_page_lock(p); if (!vm_page_in_laundry(p) || p->hold_count != 0) { /* may be undergoing I/O */ vm_page_unlock(p); break; } vm_page_dequeue(p); vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; ++is; } /* * If we exhausted our forward scan, continue with the reverse scan * when possible, even past a page boundary. This catches boundary * conditions. */ if (ib && pageout_count < vm_pageout_page_count) goto more; /* * we allow reads during pageouts... */ return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL, NULL)); } /* * vm_pageout_flush() - launder the given pages * * The given pages are laundered. Note that we setup for the start of * I/O ( i.e. busy the page ), mark it read-only, and bump the object * reference count all in here rather then in the parent. If we want * the parent to do more sophisticated things we may have to change * the ordering. * * Returned runlen is the count of pages between mreq and first * page after mreq with status VM_PAGER_AGAIN. * *eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL * for any page in runlen set. */ int vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, boolean_t *eio) { vm_object_t object = mc[0]->object; int pageout_status[count]; int numpagedout = 0; int i, runlen; VM_OBJECT_ASSERT_WLOCKED(object); /* * Initiate I/O. Bump the vm_page_t->busy counter and * mark the pages read-only. * * We do not have to fixup the clean/dirty bits here... we can * allow the pager to do it after the I/O completes. * * NOTE! mc[i]->dirty may be partial or fragmented due to an * edge case with file fragments. */ for (i = 0; i < count; i++) { KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); vm_page_sbusy(mc[i]); pmap_remove_write(mc[i]); } vm_object_pip_add(object, count); vm_pager_put_pages(object, mc, count, flags, pageout_status); runlen = count - mreq; if (eio != NULL) *eio = FALSE; for (i = 0; i < count; i++) { vm_page_t mt = mc[i]; KASSERT(pageout_status[i] == VM_PAGER_PEND || !pmap_page_is_write_mapped(mt), ("vm_pageout_flush: page %p is not write protected", mt)); switch (pageout_status[i]) { case VM_PAGER_OK: case VM_PAGER_PEND: numpagedout++; break; case VM_PAGER_BAD: /* * Page outside of range of object. Right now we * essentially lose the changes by pretending it * worked. */ vm_page_undirty(mt); vm_page_lock(mt); vm_page_deactivate(mt); vm_page_unlock(mt); break; case VM_PAGER_ERROR: case VM_PAGER_FAIL: /* * If the page couldn't be paged out, then reactivate * it so that it doesn't clog the laundry and inactive * queues. (We will try paging it out again later). */ vm_page_lock(mt); vm_page_activate(mt); vm_page_unlock(mt); if (eio != NULL && i >= mreq && i - mreq < runlen) *eio = TRUE; break; case VM_PAGER_AGAIN: if (i >= mreq && i - mreq < runlen) runlen = i - mreq; break; } /* * If the operation is still going, leave the page busy to * block all other accesses. Also, leave the paging in * progress indicator set so that we don't attempt an object * collapse. */ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); vm_page_sunbusy(mt); } } if (prunlen != NULL) *prunlen = runlen; return (numpagedout); } #if !defined(NO_SWAPPING) /* * vm_pageout_object_deactivate_pages * * Deactivate enough pages to satisfy the inactive target * requirements. * * The object and map must be locked. */ static void vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, long desired) { vm_object_t backing_object, object; vm_page_t p; int act_delta, remove_mode; VM_OBJECT_ASSERT_LOCKED(first_object); if ((first_object->flags & OBJ_FICTITIOUS) != 0) return; for (object = first_object;; object = backing_object) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; VM_OBJECT_ASSERT_LOCKED(object); if ((object->flags & OBJ_UNMANAGED) != 0 || object->paging_in_progress != 0) goto unlock_return; remove_mode = 0; if (object->shadow_count > 1) remove_mode = 1; /* * Scan the object's entire memory queue. */ TAILQ_FOREACH(p, &object->memq, listq) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; if (vm_page_busied(p)) continue; PCPU_INC(cnt.v_pdpages); vm_page_lock(p); if (p->wire_count != 0 || p->hold_count != 0 || !pmap_page_exists_quick(pmap, p)) { vm_page_unlock(p); continue; } act_delta = pmap_ts_referenced(p); if ((p->aflags & PGA_REFERENCED) != 0) { if (act_delta == 0) act_delta = 1; vm_page_aflag_clear(p, PGA_REFERENCED); } if (!vm_page_active(p) && act_delta != 0) { vm_page_activate(p); p->act_count += act_delta; } else if (vm_page_active(p)) { if (act_delta == 0) { p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { pmap_remove_all(p); vm_page_deactivate(p); } else vm_page_requeue(p); } else { vm_page_activate(p); if (p->act_count < ACT_MAX - ACT_ADVANCE) p->act_count += ACT_ADVANCE; vm_page_requeue(p); } } else if (vm_page_inactive(p)) pmap_remove_all(p); vm_page_unlock(p); } if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_RLOCK(backing_object); if (object != first_object) VM_OBJECT_RUNLOCK(object); } unlock_return: if (object != first_object) VM_OBJECT_RUNLOCK(object); } /* * deactivate some number of pages in a map, try to do it fairly, but * that is really hard to do. */ static void vm_pageout_map_deactivate_pages(map, desired) vm_map_t map; long desired; { vm_map_entry_t tmpe; vm_object_t obj, bigobj; int nothingwired; if (!vm_map_trylock(map)) return; bigobj = NULL; nothingwired = TRUE; /* * first, search out the biggest object, and try to free pages from * that. */ tmpe = map->header.next; while (tmpe != &map->header) { if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) { if (obj->shadow_count <= 1 && (bigobj == NULL || bigobj->resident_page_count < obj->resident_page_count)) { if (bigobj != NULL) VM_OBJECT_RUNLOCK(bigobj); bigobj = obj; } else VM_OBJECT_RUNLOCK(obj); } } if (tmpe->wired_count > 0) nothingwired = FALSE; tmpe = tmpe->next; } if (bigobj != NULL) { vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired); VM_OBJECT_RUNLOCK(bigobj); } /* * Next, hunt around for other pages to deactivate. We actually * do this search sort of wrong -- .text first is not the best idea. */ tmpe = map->header.next; while (tmpe != &map->header) { if (pmap_resident_count(vm_map_pmap(map)) <= desired) break; if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL) { VM_OBJECT_RLOCK(obj); vm_pageout_object_deactivate_pages(map->pmap, obj, desired); VM_OBJECT_RUNLOCK(obj); } } tmpe = tmpe->next; } /* * Remove all mappings if a process is swapped out, this will free page * table pages. */ if (desired == 0 && nothingwired) { pmap_remove(vm_map_pmap(map), vm_map_min(map), vm_map_max(map)); } vm_map_unlock(map); } #endif /* !defined(NO_SWAPPING) */ /* * Attempt to acquire all of the necessary locks to launder a page and * then call through the clustering layer to PUTPAGES. Wait a short * time for a vnode lock. * * Requires the page and object lock on entry, releases both before return. * Returns 0 on success and an errno otherwise. */ static int vm_pageout_clean(vm_page_t m, int *numpagedout) { struct vnode *vp; struct mount *mp; vm_object_t object; vm_pindex_t pindex; int error, lockmode; vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); error = 0; vp = NULL; mp = NULL; /* * The object is already known NOT to be dead. It * is possible for the vget() to block the whole * pageout daemon, but the new low-memory handling * code should prevent it. * * We can't wait forever for the vnode lock, we might * deadlock due to a vn_read() getting stuck in * vm_wait while holding this vnode. We skip the * vnode if we can't get it in a reasonable amount * of time. */ if (object->type == OBJT_VNODE) { vm_page_unlock(m); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { mp = NULL; error = EDEADLK; goto unlock_all; } KASSERT(mp != NULL, ("vp %p with NULL v_mount", vp)); vm_object_reference_locked(object); pindex = m->pindex; VM_OBJECT_WUNLOCK(object); lockmode = MNT_SHARED_WRITES(vp->v_mount) ? LK_SHARED : LK_EXCLUSIVE; if (vget(vp, lockmode | LK_TIMELOCK, curthread)) { vp = NULL; error = EDEADLK; goto unlock_mp; } VM_OBJECT_WLOCK(object); vm_page_lock(m); /* * While the object and page were unlocked, the page * may have been: * (1) moved to a different queue, * (2) reallocated to a different object, * (3) reallocated to a different offset, or * (4) cleaned. */ if (!vm_page_in_laundry(m) || m->object != object || m->pindex != pindex || m->dirty == 0) { vm_page_unlock(m); error = ENXIO; goto unlock_all; } /* * The page may have been busied or held while the object * and page locks were released. */ if (vm_page_busied(m) || m->hold_count != 0) { vm_page_unlock(m); error = EBUSY; goto unlock_all; } } /* * If a page is dirty, then it is either being washed * (but not yet cleaned) or it is still in the * laundry. If it is still in the laundry, then we * start the cleaning operation. */ if ((*numpagedout = vm_pageout_cluster(m)) == 0) error = EIO; unlock_all: VM_OBJECT_WUNLOCK(object); unlock_mp: vm_page_lock_assert(m, MA_NOTOWNED); if (mp != NULL) { if (vp != NULL) vput(vp); vm_object_deallocate(object); vn_finished_write(mp); } return (error); } /* * Attempt to launder the specified number of pages. * * Returns the number of pages successfully laundered. */ static int vm_pageout_launder(struct vm_domain *vmd, int launder) { vm_page_t m, next; struct vm_pagequeue *pq; vm_object_t object; int act_delta, error, maxscan, numpagedout, starting_target; int vnodes_skipped; boolean_t pageout_ok, queue_locked, shortfall; starting_target = launder; vnodes_skipped = 0; /* * Scan the laundry queue for pages eligible to be laundered. We stop * once the target number of dirty pages have been laundered, or once * we've reached the end of the queue. A single iteration of this loop * may cause more than one page to be laundered because of clustering. * * maxscan ensures that we don't re-examine requeued pages. Any * additional pages written as part of a cluster are subtracted from * maxscan since they must be taken from the laundry queue. */ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; maxscan = pq->pq_cnt; shortfall = vm_laundry_target() > 0; vm_pagequeue_lock(pq); queue_locked = TRUE; for (m = TAILQ_FIRST(&pq->pq_pl); m != NULL && maxscan-- > 0 && launder > 0; m = next) { vm_pagequeue_assert_locked(pq); KASSERT(queue_locked, ("unlocked laundry queue")); KASSERT(vm_page_in_laundry(m), ("page %p has an inconsistent queue", m)); next = TAILQ_NEXT(m, plinks.q); if ((m->flags & PG_MARKER) != 0) continue; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("PG_FICTITIOUS page %p cannot be in laundry queue", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("VPO_UNMANAGED page %p cannot be in laundry queue", m)); if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) { vm_page_unlock(m); continue; } object = m->object; if ((!VM_OBJECT_TRYWLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || m->hold_count != 0)) || vm_page_busied(m)) { VM_OBJECT_WUNLOCK(object); vm_page_unlock(m); continue; } /* * We unlock the laundry queue, invalidating the * 'next' pointer. Use our marker to remember our * place. */ TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker, plinks.q); vm_pagequeue_unlock(pq); queue_locked = FALSE; /* * Invalid pages can be easily freed. They cannot be * mapped; vm_page_free() asserts this. */ if (m->valid == 0) goto free_page; /* * If the page has been referenced and the object is not dead, * reactivate or requeue the page depending on whether the * object is mapped. */ if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta = 1; } else act_delta = 0; if (object->ref_count != 0) act_delta += pmap_ts_referenced(m); else { KASSERT(!pmap_page_is_mapped(m), ("page %p is mapped", m)); } if (act_delta != 0) { if (object->ref_count != 0) { vm_page_activate(m); /* * Increase the activation count if the page * was referenced while in the laundry queue. * This makes it less likely that the page will * be returned prematurely to the inactive * queue. */ m->act_count += act_delta + ACT_ADVANCE; /* * If this was a background laundering, count * activated pages towards our target. The * purpose of background laundering is to ensure * that pages are eventually cycled through the * laundry queue, and an activation is a valid * way out. */ if (!shortfall) launder--; goto drop_page; } else if ((object->flags & OBJ_DEAD) == 0) goto requeue_page; } /* * If the page appears to be clean at the machine-independent * layer, then remove all of its mappings from the pmap in * anticipation of freeing it. If, however, any of the page's * mappings allow write access, then the page may still be * modified until the last of those mappings are removed. */ if (object->ref_count != 0) { vm_page_test_dirty(m); if (m->dirty == 0) pmap_remove_all(m); } /* * Clean pages are freed, and dirty pages are paged out unless * they belong to a dead object. Requeueing dirty pages from * dead objects is pointless, as they are being paged out and * freed by the thread that destroyed the object. */ if (m->dirty == 0) { free_page: vm_page_free(m); PCPU_INC(cnt.v_dfree); } else if ((object->flags & OBJ_DEAD) == 0) { if (object->type != OBJT_SWAP && object->type != OBJT_DEFAULT) pageout_ok = TRUE; else if (disable_swap_pageouts) pageout_ok = FALSE; else pageout_ok = TRUE; if (!pageout_ok) { requeue_page: vm_pagequeue_lock(pq); queue_locked = TRUE; vm_page_requeue_locked(m); goto drop_page; } error = vm_pageout_clean(m, &numpagedout); if (error == 0) { launder -= numpagedout; maxscan -= numpagedout - 1; } else if (error == EDEADLK) { pageout_lock_miss++; vnodes_skipped++; } goto relock_queue; } drop_page: vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); relock_queue: if (!queue_locked) { vm_pagequeue_lock(pq); queue_locked = TRUE; } next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q); } vm_pagequeue_unlock(pq); /* * Wakeup the sync daemon if we skipped a vnode in a writeable object * and we didn't launder enough pages. */ if (vnodes_skipped > 0 && launder > 0) (void)speedup_syncer(); return (starting_target - launder); } /* * Perform the work of the laundry thread: periodically wake up and determine * whether any pages need to be laundered. If so, determine the number of pages * that need to be laundered, and launder them. */ static void vm_pageout_laundry_worker(void *arg) { struct vm_domain *domain; uint64_t ninact, nlaundry; u_int wakeups, gen; int cycle, domidx, launder; int shortfall, prev_shortfall, target; domidx = (uintptr_t)arg; domain = &vm_dom[domidx]; KASSERT(domain->vmd_segs != 0, ("domain without segments")); vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY); cycle = 0; gen = 0; shortfall = prev_shortfall = 0; target = 0; /* * The pageout laundry worker is never done, so loop forever. */ for (;;) { KASSERT(target >= 0, ("negative target %d", target)); launder = 0; /* * First determine whether we need to launder pages to meet a * shortage of free pages. */ if (vm_laundering_needed()) { shortfall = vm_laundry_target() + vm_pageout_deficit; /* * If we're in shortfall and we haven't yet started a * laundering cycle to get us out of it, begin a run. * If we're still in shortfall despite a previous * laundering run, start a new one. */ if (prev_shortfall == 0 || cycle == 0) { target = shortfall; cycle = VM_LAUNDER_RATE; } prev_shortfall = shortfall; } if (prev_shortfall > 0) { /* * We entered shortfall at some point in the recent * past. If we have reached our target, or the * laundering run is finished and we're not currently in * shortfall, we have no immediate need to launder * pages. Otherwise keep laundering. */ if (vm_laundry_target() <= 0 || cycle == 0) { shortfall = prev_shortfall = target = 0; } else { launder = target / cycle; goto dolaundry; } } /* * There's no immediate need to launder any pages; see if we * meet the conditions to perform background laundering: * * 1. The ratio of dirty to clean inactive pages exceeds the * background laundering threshold and the pagedaemon has * recently been woken up, or * 2. we haven't yet reached the target of the current * background laundering run. */ ninact = vm_cnt.v_inactive_count; nlaundry = vm_cnt.v_laundry_count; wakeups = VM_METER_PCPU_CNT(v_pdwakeups); if (target == 0 && ninact > 0 && wakeups != gen && nlaundry * bkgrd_launder_ratio >= ninact) { gen = wakeups; /* * The pagedaemon has woken up at least once since the * last background laundering run and we're above the * dirty page threshold. Launder some pages to balance * the inactive and laundry queues. We attempt to * finish within one second. */ cycle = VM_LAUNDER_INTERVAL; /* * Set our target to that of the pagedaemon, scaled by * the relative lengths of the inactive and laundry * queues. Divide by a fudge factor as well: we don't * want to reclaim dirty pages at the same rate as clean * pages. */ target = vm_cnt.v_free_target - vm_pageout_wakeup_thresh; target = nlaundry * (u_int)target / ninact / 10; if (target == 0) target = 1; /* * Make sure we don't exceed the background laundering * threshold. */ target = min(target, bkgrd_launder_max); } if (target > 0 && cycle != 0) launder = target / cycle; dolaundry: if (launder > 0) target -= min(vm_pageout_launder(domain, launder), target); tsleep(&vm_cnt.v_laundry_count, PVM, "laundr", hz / VM_LAUNDER_INTERVAL); cycle--; } } /* * vm_pageout_scan does the dirty work for the pageout daemon. * * pass 0 - Update active LRU/deactivate pages * pass 1 - Free inactive pages */ static void vm_pageout_scan(struct vm_domain *vmd, int pass) { vm_page_t m, next; struct vm_pagequeue *pq; vm_object_t object; long min_scan; int act_delta, addl_page_shortage, deficit, maxscan; int page_shortage, scan_tick, scanned, starting_page_shortage; boolean_t queue_locked; /* * If we need to reclaim memory ask kernel caches to return * some. We rate limit to avoid thrashing. */ if (vmd == &vm_dom[0] && pass > 0 && (time_uptime - lowmem_uptime) >= lowmem_period) { /* * Decrease registered cache sizes. */ SDT_PROBE0(vm, , , vm__lowmem_scan); EVENTHANDLER_INVOKE(vm_lowmem, 0); /* * We do this explicitly after the caches have been * drained above. */ uma_reclaim(); lowmem_uptime = time_uptime; } /* * The addl_page_shortage is the number of temporarily * stuck pages in the inactive queue. In other words, the * number of pages from the inactive count that should be * discounted in setting the target for the active queue scan. */ addl_page_shortage = 0; /* * Calculate the number of pages that we want to free. */ if (pass > 0) { deficit = atomic_readandclear_int(&vm_pageout_deficit); page_shortage = vm_paging_target() + deficit; } else page_shortage = deficit = 0; starting_page_shortage = page_shortage; /* * Start scanning the inactive queue for pages that we can free. The * scan will stop when we reach the target or we have scanned the * entire queue. (Note that m->act_count is not used to make * decisions for the inactive queue, only for the active queue.) */ pq = &vmd->vmd_pagequeues[PQ_INACTIVE]; maxscan = pq->pq_cnt; vm_pagequeue_lock(pq); queue_locked = TRUE; for (m = TAILQ_FIRST(&pq->pq_pl); m != NULL && maxscan-- > 0 && page_shortage > 0; m = next) { vm_pagequeue_assert_locked(pq); KASSERT(queue_locked, ("unlocked inactive queue")); KASSERT(vm_page_inactive(m), ("Inactive queue %p", m)); PCPU_INC(cnt.v_pdpages); next = TAILQ_NEXT(m, plinks.q); /* * skip marker pages */ if (m->flags & PG_MARKER) continue; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("Fictitious page %p cannot be in inactive queue", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("Unmanaged page %p cannot be in inactive queue", m)); /* * The page or object lock acquisitions fail if the * page was removed from the queue or moved to a * different position within the queue. In either * case, addl_page_shortage should not be incremented. */ if (!vm_pageout_page_lock(m, &next)) goto unlock_page; else if (m->hold_count != 0) { /* * Held pages are essentially stuck in the * queue. So, they ought to be discounted * from the inactive count. See the * calculation of the page_shortage for the * loop over the active queue below. */ addl_page_shortage++; goto unlock_page; } object = m->object; if (!VM_OBJECT_TRYWLOCK(object)) { if (!vm_pageout_fallback_object_lock(m, &next)) goto unlock_object; else if (m->hold_count != 0) { addl_page_shortage++; goto unlock_object; } } if (vm_page_busied(m)) { /* * Don't mess with busy pages. Leave them at * the front of the queue. Most likely, they * are being paged out and will leave the * queue shortly after the scan finishes. So, * they ought to be discounted from the * inactive count. */ addl_page_shortage++; unlock_object: VM_OBJECT_WUNLOCK(object); unlock_page: vm_page_unlock(m); continue; } KASSERT(m->hold_count == 0, ("Held page %p", m)); /* * We unlock the inactive page queue, invalidating the * 'next' pointer. Use our marker to remember our * place. */ TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q); vm_page_dequeue_locked(m); vm_pagequeue_unlock(pq); queue_locked = FALSE; /* * Invalid pages can be easily freed. They cannot be * mapped, vm_page_free() asserts this. */ if (m->valid == 0) goto free_page; /* * If the page has been referenced and the object is not dead, * reactivate or requeue the page depending on whether the * object is mapped. */ if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta = 1; } else act_delta = 0; if (object->ref_count != 0) { act_delta += pmap_ts_referenced(m); } else { KASSERT(!pmap_page_is_mapped(m), ("vm_pageout_scan: page %p is mapped", m)); } if (act_delta != 0) { if (object->ref_count != 0) { vm_page_activate(m); /* * Increase the activation count if the page * was referenced while in the inactive queue. * This makes it less likely that the page will * be returned prematurely to the inactive * queue. */ m->act_count += act_delta + ACT_ADVANCE; goto drop_page; } else if ((object->flags & OBJ_DEAD) == 0) { vm_pagequeue_lock(pq); queue_locked = TRUE; m->queue = PQ_INACTIVE; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); goto drop_page; } } /* * If the page appears to be clean at the machine-independent * layer, then remove all of its mappings from the pmap in * anticipation of freeing it. If, however, any of the page's * mappings allow write access, then the page may still be * modified until the last of those mappings are removed. */ if (object->ref_count != 0) { vm_page_test_dirty(m); if (m->dirty == 0) pmap_remove_all(m); } /* * Clean pages can be freed, but dirty pages must be sent back * to the laundry, unless they belong to a dead object. * Requeueing dirty pages from dead objects is pointless, as * they are being paged out and freed by the thread that * destroyed the object. */ if (m->dirty == 0) { free_page: vm_page_free(m); PCPU_INC(cnt.v_dfree); --page_shortage; } else if ((object->flags & OBJ_DEAD) == 0) vm_page_launder(m); drop_page: vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); if (!queue_locked) { vm_pagequeue_lock(pq); queue_locked = TRUE; } next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q); } vm_pagequeue_unlock(pq); /* * Wakeup the laundry thread(s) if we didn't free the targeted number * of pages. */ if (page_shortage > 0) wakeup(&vm_cnt.v_laundry_count); #if !defined(NO_SWAPPING) /* * Wakeup the swapout daemon if we didn't free the targeted number of * pages. */ if (vm_swap_enabled && page_shortage > 0) vm_req_vmdaemon(VM_SWAP_NORMAL); #endif /* * If the inactive queue scan fails repeatedly to meet its * target, kill the largest process. */ vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage); /* * Compute the number of pages we want to try to move from the * active queue to either the inactive or laundry queue. * * When scanning active pages, we make clean pages count more heavily * towards the page shortage than dirty pages. This is because dirty * pages must be laundered before they can be reused and thus have less * utility when attempting to quickly alleviate a shortage. However, * this weighting also causes the scan to deactivate dirty pages more * more aggressively, improving the effectiveness of clustering and * ensuring that they can eventually be reused. */ page_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count + vm_cnt.v_laundry_count / act_scan_laundry_weight) + vm_paging_target() + deficit + addl_page_shortage; page_shortage *= act_scan_laundry_weight; pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; vm_pagequeue_lock(pq); maxscan = pq->pq_cnt; /* * If we're just idle polling attempt to visit every * active page within 'update_period' seconds. */ scan_tick = ticks; if (vm_pageout_update_period != 0) { min_scan = pq->pq_cnt; min_scan *= scan_tick - vmd->vmd_last_active_scan; min_scan /= hz * vm_pageout_update_period; } else min_scan = 0; if (min_scan > 0 || (page_shortage > 0 && maxscan > 0)) vmd->vmd_last_active_scan = scan_tick; /* * Scan the active queue for pages that can be deactivated. Update * the per-page activity counter and use it to identify deactivation * candidates. */ for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned < min_scan || (page_shortage > 0 && scanned < maxscan)); m = next, scanned++) { KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_scan: page %p isn't active", m)); next = TAILQ_NEXT(m, plinks.q); if ((m->flags & PG_MARKER) != 0) continue; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("Fictitious page %p cannot be in active queue", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("Unmanaged page %p cannot be in active queue", m)); if (!vm_pageout_page_lock(m, &next)) { vm_page_unlock(m); continue; } /* * The count for pagedaemon pages is done after checking the * page for eligibility... */ PCPU_INC(cnt.v_pdpages); /* * Check to see "how much" the page has been used. */ if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta = 1; } else act_delta = 0; /* * Unlocked object ref count check. Two races are possible. * 1) The ref was transitioning to zero and we saw non-zero, * the pmap bits will be checked unnecessarily. * 2) The ref was transitioning to one and we saw zero. * The page lock prevents a new reference to this page so * we need not check the reference bits. */ if (m->object->ref_count != 0) act_delta += pmap_ts_referenced(m); /* * Advance or decay the act_count based on recent usage. */ if (act_delta != 0) { m->act_count += ACT_ADVANCE + act_delta; if (m->act_count > ACT_MAX) m->act_count = ACT_MAX; } else m->act_count -= min(m->act_count, ACT_DECLINE); /* * Move this page to the tail of the active, inactive or laundry * queue depending on usage. */ if (m->act_count == 0) { /* Dequeue to avoid later lock recursion. */ vm_page_dequeue_locked(m); #if 0 /* * This requires the object write lock. It might be a * good idea during a page shortage, but might also * cause contention with a concurrent attempt to launder * pages from this object. */ if (m->object->ref_count != 0) vm_page_test_dirty(m); #endif /* * When not short for inactive pages, let dirty pages go * through the inactive queue before moving to the * laundry queues. This gives them some extra time to * be reactivated, potentially avoiding an expensive * pageout. During a page shortage, the inactive queue * is necessarily small, so we may move dirty pages * directly to the laundry queue. */ if (page_shortage <= 0) vm_page_deactivate(m); else { if (m->dirty == 0) { vm_page_deactivate(m); page_shortage -= act_scan_laundry_weight; } else { vm_page_launder(m); page_shortage--; } } } else vm_page_requeue_locked(m); vm_page_unlock(m); } vm_pagequeue_unlock(pq); #if !defined(NO_SWAPPING) /* - * Idle process swapout -- run once per second. + * Idle process swapout -- run once per second when we are reclaiming + * pages. */ - if (vm_swap_idle_enabled) { + if (vm_swap_idle_enabled && pass > 0) { static long lsec; if (time_second != lsec) { vm_req_vmdaemon(VM_SWAP_IDLE); lsec = time_second; } } #endif } static int vm_pageout_oom_vote; /* * The pagedaemon threads randlomly select one to perform the * OOM. Trying to kill processes before all pagedaemons * failed to reach free target is premature. */ static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, int starting_page_shortage) { int old_vote; if (starting_page_shortage <= 0 || starting_page_shortage != page_shortage) vmd->vmd_oom_seq = 0; else vmd->vmd_oom_seq++; if (vmd->vmd_oom_seq < vm_pageout_oom_seq) { if (vmd->vmd_oom) { vmd->vmd_oom = FALSE; atomic_subtract_int(&vm_pageout_oom_vote, 1); } return; } /* * Do not follow the call sequence until OOM condition is * cleared. */ vmd->vmd_oom_seq = 0; if (vmd->vmd_oom) return; vmd->vmd_oom = TRUE; old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1); if (old_vote != vm_ndomains - 1) return; /* * The current pagedaemon thread is the last in the quorum to * start OOM. Initiate the selection and signaling of the * victim. */ vm_pageout_oom(VM_OOM_MEM); /* * After one round of OOM terror, recall our vote. On the * next pass, current pagedaemon would vote again if the low * memory condition is still there, due to vmd_oom being * false. */ vmd->vmd_oom = FALSE; atomic_subtract_int(&vm_pageout_oom_vote, 1); } /* * The OOM killer is the page daemon's action of last resort when * memory allocation requests have been stalled for a prolonged period * of time because it cannot reclaim memory. This function computes * the approximate number of physical pages that could be reclaimed if * the specified address space is destroyed. * * Private, anonymous memory owned by the address space is the * principal resource that we expect to recover after an OOM kill. * Since the physical pages mapped by the address space's COW entries * are typically shared pages, they are unlikely to be released and so * they are not counted. * * To get to the point where the page daemon runs the OOM killer, its * efforts to write-back vnode-backed pages may have stalled. This * could be caused by a memory allocation deadlock in the write path * that might be resolved by an OOM kill. Therefore, physical pages * belonging to vnode-backed objects are counted, because they might * be freed without being written out first if the address space holds * the last reference to an unlinked vnode. * * Similarly, physical pages belonging to OBJT_PHYS objects are * counted because the address space might hold the last reference to * the object. */ static long vm_pageout_oom_pagecount(struct vmspace *vmspace) { vm_map_t map; vm_map_entry_t entry; vm_object_t obj; long res; map = &vmspace->vm_map; KASSERT(!map->system_map, ("system map")); sx_assert(&map->lock, SA_LOCKED); res = 0; for (entry = map->header.next; entry != &map->header; entry = entry->next) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) continue; obj = entry->object.vm_object; if (obj == NULL) continue; if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 && obj->ref_count != 1) continue; switch (obj->type) { case OBJT_DEFAULT: case OBJT_SWAP: case OBJT_PHYS: case OBJT_VNODE: res += obj->resident_page_count; break; } } return (res); } void vm_pageout_oom(int shortage) { struct proc *p, *bigproc; vm_offset_t size, bigsize; struct thread *td; struct vmspace *vm; /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of * deadlock if process B is bigproc and one of it's child processes * attempts to propagate a signal to B while we are waiting for A's * lock while walking this list. To avoid this, we don't block on * the process lock but just skip a process if it is already locked. */ bigproc = NULL; bigsize = 0; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { int breakout; PROC_LOCK(p); /* * If this is a system, protected or killed process, skip it. */ if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC | P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 || p->p_pid == 1 || P_KILLED(p) || (p->p_pid < 48 && swap_pager_avail != 0)) { PROC_UNLOCK(p); continue; } /* * If the process is in a non-running type state, * don't touch it. Check all the threads individually. */ breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td) && !TD_IS_SUSPENDED(td) && !TD_IS_SWAPPED(td)) { thread_unlock(td); breakout = 1; break; } thread_unlock(td); } if (breakout) { PROC_UNLOCK(p); continue; } /* * get the process size */ vm = vmspace_acquire_ref(p); if (vm == NULL) { PROC_UNLOCK(p); continue; } _PHOLD_LITE(p); PROC_UNLOCK(p); sx_sunlock(&allproc_lock); if (!vm_map_trylock_read(&vm->vm_map)) { vmspace_free(vm); sx_slock(&allproc_lock); PRELE(p); continue; } size = vmspace_swap_count(vm); if (shortage == VM_OOM_MEM) size += vm_pageout_oom_pagecount(vm); vm_map_unlock_read(&vm->vm_map); vmspace_free(vm); sx_slock(&allproc_lock); /* * If this process is bigger than the biggest one, * remember it. */ if (size > bigsize) { if (bigproc != NULL) PRELE(bigproc); bigproc = p; bigsize = size; } else { PRELE(p); } } sx_sunlock(&allproc_lock); if (bigproc != NULL) { if (vm_panic_on_oom != 0) panic("out of swap space"); PROC_LOCK(bigproc); killproc(bigproc, "out of swap space"); sched_nice(bigproc, PRIO_MIN); _PRELE(bigproc); PROC_UNLOCK(bigproc); wakeup(&vm_cnt.v_free_count); } } static void vm_pageout_worker(void *arg) { struct vm_domain *domain; int domidx; domidx = (uintptr_t)arg; domain = &vm_dom[domidx]; /* * XXXKIB It could be useful to bind pageout daemon threads to * the cores belonging to the domain, from which vm_page_array * is allocated. */ KASSERT(domain->vmd_segs != 0, ("domain without segments")); domain->vmd_last_active_scan = ticks; vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE); vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE); TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl, &domain->vmd_inacthead, plinks.q); /* * The pageout daemon worker is never done, so loop forever. */ while (TRUE) { mtx_lock(&vm_page_queue_free_mtx); /* * Generally, after a level >= 1 scan, if there are enough * free pages to wakeup the waiters, then they are already * awake. A call to vm_page_free() during the scan awakened * them. However, in the following case, this wakeup serves * to bound the amount of time that a thread might wait. * Suppose a thread's call to vm_page_alloc() fails, but * before that thread calls VM_WAIT, enough pages are freed by * other threads to alleviate the free page shortage. The * thread will, nonetheless, wait until another page is freed * or this wakeup is performed. */ if (vm_pages_needed && !vm_page_count_min()) { vm_pages_needed = false; wakeup(&vm_cnt.v_free_count); } /* * Do not clear vm_pageout_wanted until we reach our target. * Otherwise, we may be awakened over and over again, wasting * CPU time. */ if (vm_pageout_wanted && !vm_paging_needed()) vm_pageout_wanted = false; /* * Might the page daemon receive a wakeup call? */ if (vm_pageout_wanted) { /* * No. Either vm_pageout_wanted was set by another * thread during the previous scan, which must have * been a level 0 scan, or vm_pageout_wanted was * already set and the scan failed to free enough * pages. If we haven't yet performed a level >= 2 * scan (unlimited dirty cleaning), then upgrade the * level and scan again now. Otherwise, sleep a bit * and try again later. */ mtx_unlock(&vm_page_queue_free_mtx); if (domain->vmd_pass > 1) pause("psleep", hz / 2); domain->vmd_pass++; } else { /* * Yes. Sleep until pages need to be reclaimed or * have their reference stats updated. */ if (mtx_sleep(&vm_pageout_wanted, &vm_page_queue_free_mtx, PDROP | PVM, "psleep", hz) == 0) { PCPU_INC(cnt.v_pdwakeups); domain->vmd_pass = 1; } else domain->vmd_pass = 0; } vm_pageout_scan(domain, domain->vmd_pass); } } /* * vm_pageout_init initialises basic pageout daemon settings. */ static void vm_pageout_init(void) { /* * Initialize some paging parameters. */ vm_cnt.v_interrupt_free_min = 2; if (vm_cnt.v_page_count < 2000) vm_pageout_page_count = 8; /* * v_free_reserved needs to include enough for the largest * swap pager structures plus enough for any pv_entry structs * when paging. */ if (vm_cnt.v_page_count > 1024) vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200; else vm_cnt.v_free_min = 4; vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + vm_cnt.v_interrupt_free_min; vm_cnt.v_free_reserved = vm_pageout_page_count + vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768); vm_cnt.v_free_severe = vm_cnt.v_free_min / 2; vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved; vm_cnt.v_free_min += vm_cnt.v_free_reserved; vm_cnt.v_free_severe += vm_cnt.v_free_reserved; vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2; if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3) vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3; /* * Set the default wakeup threshold to be 10% above the minimum * page limit. This keeps the steady state out of shortfall. */ vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11; /* * Set interval in seconds for active scan. We want to visit each * page at least once every ten minutes. This is to prevent worst * case paging behaviors with stale active LRU. */ if (vm_pageout_update_period == 0) vm_pageout_update_period = 600; /* XXX does not really belong here */ if (vm_page_max_wired == 0) vm_page_max_wired = vm_cnt.v_free_count / 3; } /* * vm_pageout is the high level pageout daemon. */ static void vm_pageout(void) { int error; #ifdef VM_NUMA_ALLOC int i; #endif swap_pager_swap_init(); error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL, 0, 0, "laundry: dom0"); if (error != 0) panic("starting laundry for domain 0, error %d", error); #ifdef VM_NUMA_ALLOC for (i = 1; i < vm_ndomains; i++) { error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, curproc, NULL, 0, 0, "dom%d", i); if (error != 0) { panic("starting pageout for domain %d, error %d\n", i, error); } } #endif error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL, 0, 0, "uma"); if (error != 0) panic("starting uma_reclaim helper, error %d\n", error); vm_pageout_worker((void *)(uintptr_t)0); } /* * Unless the free page queue lock is held by the caller, this function * should be regarded as advisory. Specifically, the caller should * not msleep() on &vm_cnt.v_free_count following this function unless * the free page queue lock is held until the msleep() is performed. */ void pagedaemon_wakeup(void) { if (!vm_pageout_wanted && curthread->td_proc != pageproc) { vm_pageout_wanted = true; wakeup(&vm_pageout_wanted); } } #if !defined(NO_SWAPPING) static void vm_req_vmdaemon(int req) { static int lastrun = 0; mtx_lock(&vm_daemon_mtx); vm_pageout_req_swapout |= req; if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { wakeup(&vm_daemon_needed); lastrun = ticks; } mtx_unlock(&vm_daemon_mtx); } static void vm_daemon(void) { struct rlimit rsslim; struct proc *p; struct thread *td; struct vmspace *vm; int breakout, swapout_flags, tryagain, attempts; #ifdef RACCT uint64_t rsize, ravailable; #endif while (TRUE) { mtx_lock(&vm_daemon_mtx); msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", #ifdef RACCT racct_enable ? hz : 0 #else 0 #endif ); swapout_flags = vm_pageout_req_swapout; vm_pageout_req_swapout = 0; mtx_unlock(&vm_daemon_mtx); if (swapout_flags) swapout_procs(swapout_flags); /* * scan the processes for exceeding their rlimits or if * process is swapped out -- deactivate pages */ tryagain = 0; attempts = 0; again: attempts++; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { vm_pindex_t limit, size; /* * if this is a system process or if we have already * looked at this process, skip it. */ PROC_LOCK(p); if (p->p_state != PRS_NORMAL || p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) { PROC_UNLOCK(p); continue; } /* * if the process is in a non-running type state, * don't touch it. */ breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td) && !TD_IS_SUSPENDED(td)) { thread_unlock(td); breakout = 1; break; } thread_unlock(td); } if (breakout) { PROC_UNLOCK(p); continue; } /* * get a limit */ lim_rlimit_proc(p, RLIMIT_RSS, &rsslim); limit = OFF_TO_IDX( qmin(rsslim.rlim_cur, rsslim.rlim_max)); /* * let processes that are swapped out really be * swapped out set the limit to nothing (will force a * swap-out.) */ if ((p->p_flag & P_INMEM) == 0) limit = 0; /* XXX */ vm = vmspace_acquire_ref(p); _PHOLD_LITE(p); PROC_UNLOCK(p); if (vm == NULL) { PRELE(p); continue; } sx_sunlock(&allproc_lock); size = vmspace_resident_count(vm); if (size >= limit) { vm_pageout_map_deactivate_pages( &vm->vm_map, limit); } #ifdef RACCT if (racct_enable) { rsize = IDX_TO_OFF(size); PROC_LOCK(p); racct_set(p, RACCT_RSS, rsize); ravailable = racct_get_available(p, RACCT_RSS); PROC_UNLOCK(p); if (rsize > ravailable) { /* * Don't be overly aggressive; this * might be an innocent process, * and the limit could've been exceeded * by some memory hog. Don't try * to deactivate more than 1/4th * of process' resident set size. */ if (attempts <= 8) { if (ravailable < rsize - (rsize / 4)) { ravailable = rsize - (rsize / 4); } } vm_pageout_map_deactivate_pages( &vm->vm_map, OFF_TO_IDX(ravailable)); /* Update RSS usage after paging out. */ size = vmspace_resident_count(vm); rsize = IDX_TO_OFF(size); PROC_LOCK(p); racct_set(p, RACCT_RSS, rsize); PROC_UNLOCK(p); if (rsize > ravailable) tryagain = 1; } } #endif vmspace_free(vm); sx_slock(&allproc_lock); PRELE(p); } sx_sunlock(&allproc_lock); if (tryagain != 0 && attempts <= 10) goto again; } } #endif /* !defined(NO_SWAPPING) */ Index: user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_size_mismatch.t =================================================================== --- user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_size_mismatch.t (revision 303641) +++ user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_size_mismatch.t (revision 303642) @@ -1,436 +1,434 @@ #!/bin/sh # $FreeBSD$ dir=`dirname $0` . ${dir}/../../misc.sh -[ "${os}" = "FreeBSD" ] && die "panics FreeBSD; see bug # 194586" - echo "1..100" disks_create 7 disks_create 1 64M files_create 7 files_create 1 64M names_create 1 expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1} expect_fl ${ZPOOL} add ${name0} mirror ${disk7} ${disk2} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1} expect_ok ${ZPOOL} add -f ${name0} mirror ${disk7} ${disk2} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk7} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} expect_fl ${ZPOOL} add ${name0} mirror ${file7} ${file2} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} expect_ok ${ZPOOL} add -f ${name0} mirror ${file7} ${file2} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file7} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2} expect_fl ${ZPOOL} add ${name0} raidz1 ${disk3} ${disk7} ${disk4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2} expect_ok ${ZPOOL} add -f ${name0} raidz1 ${disk3} ${disk7} ${disk4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${disk3} ONLINE 0 0 0" echo " ${disk7} ONLINE 0 0 0" echo " ${disk4} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2} expect_fl ${ZPOOL} add ${name0} raidz1 ${file3} ${file7} ${file4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2} expect_ok ${ZPOOL} add -f ${name0} raidz1 ${file3} ${file7} ${file4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo " ${file7} ONLINE 0 0 0" echo " ${file4} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3} expect_fl ${ZPOOL} add ${name0} raidz2 ${disk4} ${disk5} ${disk6} ${disk7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " ${disk3} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3} expect_ok ${ZPOOL} add -f ${name0} raidz2 ${disk4} ${disk5} ${disk6} ${disk7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " ${disk3} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${disk4} ONLINE 0 0 0" echo " ${disk5} ONLINE 0 0 0" echo " ${disk6} ONLINE 0 0 0" echo " ${disk7} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3} expect_fl ${ZPOOL} add ${name0} raidz2 ${file4} ${file5} ${file6} ${file7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3} expect_ok ${ZPOOL} add -f ${name0} raidz2 ${file4} ${file5} ${file6} ${file7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${file4} ONLINE 0 0 0" echo " ${file5} ONLINE 0 0 0" echo " ${file6} ONLINE 0 0 0" echo " ${file7} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${disk0} add_msg="# TODO Sun CR 6726091, Lustre bug 16873" expect_fl ${ZPOOL} add ${name0} log mirror ${disk1} ${disk7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} add_msg="" expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${disk0} expect_ok ${ZPOOL} add -f ${name0} log mirror ${disk1} ${disk7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk7} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} add_msg="# TODO Sun CR 6726091, Lustre bug 16873" expect_fl ${ZPOOL} add ${name0} log mirror ${file1} ${file7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} add_msg="" expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} expect_ok ${ZPOOL} add -f ${name0} log mirror ${file1} ${file7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file7} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${disk0} log mirror ${disk1} ${disk2} add_msg="# TODO Sun CR 6726091, Lustre bug 16873" expect_fl ${ZPOOL} add ${name0} log mirror ${disk3} ${disk7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} add_msg="" expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${disk0} log mirror ${disk1} ${disk2} expect_ok ${ZPOOL} add -f ${name0} log mirror ${disk3} ${disk7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk3} ONLINE 0 0 0" echo " ${disk7} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} log mirror ${file1} ${file2} add_msg="# TODO Sun CR 6726091, Lustre bug 16873" expect_fl ${ZPOOL} add ${name0} log mirror ${file3} ${file7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} add_msg="" expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} log mirror ${file1} ${file2} expect_ok ${ZPOOL} add -f ${name0} log mirror ${file3} ${file7} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo " ${file7} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} disks_destroy files_destroy Index: user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_type_mismatch.t =================================================================== --- user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_type_mismatch.t (revision 303641) +++ user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_type_mismatch.t (revision 303642) @@ -1,410 +1,408 @@ #!/bin/sh # $FreeBSD$ dir=`dirname $0` . ${dir}/../../misc.sh -[ "${os}" = "FreeBSD" ] && die "panics FreeBSD; see bug # 194587" - echo "1..100" disks_create 7 files_create 7 names_create 1 expect_ok ${ZPOOL} create ${name0} ${disk0} expect_fl ${ZPOOL} add ${name0} ${file0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${disk0} expect_ok ${ZPOOL} add -f ${name0} ${file0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} expect_fl ${ZPOOL} add ${name0} ${disk0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} expect_ok ${ZPOOL} add -f ${name0} ${disk0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1} expect_fl ${ZPOOL} add ${name0} mirror ${disk2} ${file0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1} expect_ok ${ZPOOL} add -f ${name0} mirror ${disk2} ${file0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} expect_fl ${ZPOOL} add ${name0} mirror ${disk0} ${file2} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} expect_ok ${ZPOOL} add -f ${name0} mirror ${disk0} ${file2} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2} expect_fl ${ZPOOL} add ${name0} raidz1 ${disk3} ${file0} ${disk4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2} expect_ok ${ZPOOL} add -f ${name0} raidz1 ${disk3} ${file0} ${disk4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${disk3} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${disk4} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2} expect_fl ${ZPOOL} add ${name0} raidz1 ${file3} ${disk0} ${file4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2} expect_ok ${ZPOOL} add -f ${name0} raidz1 ${file3} ${disk0} ${file4} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${file4} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3} expect_fl ${ZPOOL} add ${name0} raidz2 ${disk4} ${file0} ${disk5} ${disk6} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " ${disk3} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3} expect_ok ${ZPOOL} add -f ${name0} raidz2 ${disk4} ${file0} ${disk5} ${disk6} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${disk2} ONLINE 0 0 0" echo " ${disk3} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${disk4} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${disk5} ONLINE 0 0 0" echo " ${disk6} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3} expect_fl ${ZPOOL} add ${name0} raidz2 ${file4} ${disk0} ${file5} ${file6} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3} expect_ok ${ZPOOL} add -f ${name0} raidz2 ${file4} ${disk0} ${file5} ${file6} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${file4} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " ${file5} ONLINE 0 0 0" echo " ${file6} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${disk0} add_msg="# TODO Sun CR 6726091, Lustre bug 16873" expect_fl ${ZPOOL} add ${name0} log mirror ${disk1} ${file0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} add_msg="" expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${disk0} expect_ok ${ZPOOL} add -f ${name0} log mirror ${disk1} ${file0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${disk1} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} add_msg="# TODO Sun CR 6726091, Lustre bug 16873" expect_fl ${ZPOOL} add ${name0} log mirror ${file1} ${disk0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} add_msg="" expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} expect_ok ${ZPOOL} add -f ${name0} log mirror ${file1} ${disk0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${disk0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} disks_destroy files_destroy Index: user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/create/files.t =================================================================== --- user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/create/files.t (revision 303641) +++ user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/create/files.t (revision 303642) @@ -1,191 +1,189 @@ #!/bin/sh # $FreeBSD$ dir=`dirname $0` . ${dir}/../../misc.sh -[ "${os}" = "FreeBSD" ] && die "panics FreeBSD; see bug # 194589" - echo "1..59" files_create 5 names_create 1 expect_ok ${ZPOOL} create ${name0} ${file0} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_ok ${ZPOOL} create ${name0} ${file0} ${file1} ${file2} ${file3} ${file4} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo " ${file4} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz1 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " raidz2 ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} spare ${file2} ${file3} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " spares" echo " ${file2} AVAIL " echo " ${file3} AVAIL " echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} log ${file2} ${file3} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} log mirror ${file2} ${file3} expect_ok ${ZPOOL} status -x ${name0} expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0} exp=`( echo " pool: ${name0}" echo " state: ONLINE" echo " scrub: none requested" echo "config:" echo " NAME STATE READ WRITE CKSUM" echo " ${name0} ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file0} ONLINE 0 0 0" echo " ${file1} ONLINE 0 0 0" echo " logs ONLINE 0 0 0" echo " mirror ONLINE 0 0 0" echo " ${file2} ONLINE 0 0 0" echo " ${file3} ONLINE 0 0 0" echo "errors: No known data errors" )` expect "${exp}" ${ZPOOL} status ${name0} expect_ok ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} expect_fl ${ZPOOL} create ${name0} mirror ${file0} ${file1} cache ${file2} ${file3} expect_fl ${ZPOOL} status -x ${name0} expect_fl ${ZPOOL} destroy ${name0} files_destroy Index: user/alc/PQ_LAUNDRY/usr.bin/at/at.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/at/at.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/at/at.c (revision 303642) @@ -1,913 +1,913 @@ /* * at.c : Put file into atrun queue * Copyright (C) 1993, 1994 Thomas Koenig * * Atrun & Atq modifications * Copyright (C) 1993 David Parsons * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the author(s) may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define _USE_BSD 1 /* System Headers */ #include #include #include #include #include #include #include #include #include #ifndef __FreeBSD__ #include #endif #ifdef __FreeBSD__ #include #endif #include #include #include #include #include #include #include #include /* Local headers */ #include "at.h" #include "panic.h" #include "parsetime.h" #include "perm.h" #define MAIN #include "privs.h" /* Macros */ #ifndef ATJOB_DIR #define ATJOB_DIR "/usr/spool/atjobs/" #endif #ifndef LFILE #define LFILE ATJOB_DIR ".lockfile" #endif #ifndef ATJOB_MX #define ATJOB_MX 255 #endif #define ALARMC 10 /* Number of seconds to wait for timeout */ #define SIZE 255 #define TIMESIZE 50 enum { ATQ, ATRM, AT, BATCH, CAT }; /* what program we want to run */ /* File scope variables */ static const char *no_export[] = { "TERM", "TERMCAP", "DISPLAY", "_" }; static int send_mail = 0; static char *atinput = NULL; /* where to get input from */ static char atqueue = 0; /* which queue to examine for jobs (atq) */ /* External variables */ extern char **environ; int fcreated; char atfile[] = ATJOB_DIR "12345678901234"; char atverify = 0; /* verify time instead of queuing job */ char *namep; /* Function declarations */ static void sigc(int signo); static void alarmc(int signo); static char *cwdname(void); static void writefile(time_t runtimer, char queue); static void list_jobs(long *, int); static long nextjob(void); static time_t ttime(const char *arg); static int in_job_list(long, long *, int); static long *get_job_list(int, char *[], int *); /* Signal catching functions */ static void sigc(int signo __unused) { /* If the user presses ^C, remove the spool file and exit */ if (fcreated) { PRIV_START unlink(atfile); PRIV_END } _exit(EXIT_FAILURE); } static void alarmc(int signo __unused) { char buf[1024]; /* Time out after some seconds. */ strlcpy(buf, namep, sizeof(buf)); strlcat(buf, ": file locking timed out\n", sizeof(buf)); write(STDERR_FILENO, buf, strlen(buf)); sigc(0); } /* Local functions */ static char *cwdname(void) { /* Read in the current directory; the name will be overwritten on * subsequent calls. */ static char *ptr = NULL; static size_t size = SIZE; if (ptr == NULL) if ((ptr = malloc(size)) == NULL) errx(EXIT_FAILURE, "virtual memory exhausted"); while (1) { if (ptr == NULL) panic("out of memory"); if (getcwd(ptr, size-1) != NULL) return ptr; if (errno != ERANGE) perr("cannot get directory"); free (ptr); size += SIZE; if ((ptr = malloc(size)) == NULL) errx(EXIT_FAILURE, "virtual memory exhausted"); } } static long nextjob(void) { long jobno; FILE *fid; if ((fid = fopen(ATJOB_DIR ".SEQ", "r+")) != NULL) { if (fscanf(fid, "%5lx", &jobno) == 1) { rewind(fid); jobno = (1+jobno) % 0xfffff; /* 2^20 jobs enough? */ fprintf(fid, "%05lx\n", jobno); } else jobno = EOF; fclose(fid); return jobno; } else if ((fid = fopen(ATJOB_DIR ".SEQ", "w")) != NULL) { fprintf(fid, "%05lx\n", jobno = 1); fclose(fid); return 1; } return EOF; } static void writefile(time_t runtimer, char queue) { /* This does most of the work if at or batch are invoked for writing a job. */ long jobno; char *ap, *ppos, *mailname; struct passwd *pass_entry; struct stat statbuf; int fdes, lockdes, fd2; FILE *fp, *fpin; struct sigaction act; char **atenv; int ch; mode_t cmask; struct flock lock; #ifdef __FreeBSD__ (void) setlocale(LC_TIME, ""); #endif /* Install the signal handler for SIGINT; terminate after removing the * spool file if necessary */ act.sa_handler = sigc; sigemptyset(&(act.sa_mask)); act.sa_flags = 0; sigaction(SIGINT, &act, NULL); ppos = atfile + strlen(ATJOB_DIR); /* Loop over all possible file names for running something at this * particular time, see if a file is there; the first empty slot at any * particular time is used. Lock the file LFILE first to make sure * we're alone when doing this. */ PRIV_START if ((lockdes = open(LFILE, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR)) < 0) perr("cannot open lockfile " LFILE); lock.l_type = F_WRLCK; lock.l_whence = SEEK_SET; lock.l_start = 0; lock.l_len = 0; act.sa_handler = alarmc; sigemptyset(&(act.sa_mask)); act.sa_flags = 0; /* Set an alarm so a timeout occurs after ALARMC seconds, in case * something is seriously broken. */ sigaction(SIGALRM, &act, NULL); alarm(ALARMC); fcntl(lockdes, F_SETLKW, &lock); alarm(0); if ((jobno = nextjob()) == EOF) perr("cannot generate job number"); sprintf(ppos, "%c%5lx%8lx", queue, jobno, (unsigned long) (runtimer/60)); for(ap=ppos; *ap != '\0'; ap ++) if (*ap == ' ') *ap = '0'; if (stat(atfile, &statbuf) != 0) if (errno != ENOENT) perr("cannot access " ATJOB_DIR); /* Create the file. The x bit is only going to be set after it has * been completely written out, to make sure it is not executed in the * meantime. To make sure they do not get deleted, turn off their r * bit. Yes, this is a kluge. */ cmask = umask(S_IRUSR | S_IWUSR | S_IXUSR); if ((fdes = creat(atfile, O_WRONLY)) == -1) perr("cannot create atjob file"); if ((fd2 = dup(fdes)) <0) perr("error in dup() of job file"); if(fchown(fd2, real_uid, real_gid) != 0) perr("cannot give away file"); PRIV_END /* We no longer need suid root; now we just need to be able to write * to the directory, if necessary. */ REDUCE_PRIV(DAEMON_UID, DAEMON_GID) /* We've successfully created the file; let's set the flag so it * gets removed in case of an interrupt or error. */ fcreated = 1; /* Now we can release the lock, so other people can access it */ lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = 0; lock.l_len = 0; fcntl(lockdes, F_SETLKW, &lock); close(lockdes); if((fp = fdopen(fdes, "w")) == NULL) panic("cannot reopen atjob file"); /* Get the userid to mail to, first by trying getlogin(), * then from LOGNAME, finally from getpwuid(). */ mailname = getlogin(); if (mailname == NULL) mailname = getenv("LOGNAME"); if ((mailname == NULL) || (mailname[0] == '\0') || (strlen(mailname) >= MAXLOGNAME) || (getpwnam(mailname)==NULL)) { pass_entry = getpwuid(real_uid); if (pass_entry != NULL) mailname = pass_entry->pw_name; } if (atinput != (char *) NULL) { fpin = freopen(atinput, "r", stdin); if (fpin == NULL) perr("cannot open input file"); } fprintf(fp, "#!/bin/sh\n# atrun uid=%ld gid=%ld\n# mail %*s %d\n", (long) real_uid, (long) real_gid, MAXLOGNAME - 1, mailname, send_mail); /* Write out the umask at the time of invocation */ fprintf(fp, "umask %lo\n", (unsigned long) cmask); /* Write out the environment. Anything that may look like a * special character to the shell is quoted, except for \n, which is * done with a pair of "'s. Don't export the no_export list (such * as TERM or DISPLAY) because we don't want these. */ for (atenv= environ; *atenv != NULL; atenv++) { int export = 1; char *eqp; eqp = strchr(*atenv, '='); if (eqp == NULL) eqp = *atenv; else { size_t i; - for (i=0; i&2\n\t exit 1\n}\n"); while((ch = getchar()) != EOF) fputc(ch, fp); fprintf(fp, "\n"); if (ferror(fp)) panic("output error"); if (ferror(stdin)) panic("input error"); fclose(fp); /* Set the x bit so that we're ready to start executing */ if (fchmod(fd2, S_IRUSR | S_IWUSR | S_IXUSR) < 0) perr("cannot give away file"); close(fd2); fprintf(stderr, "Job %ld will be executed using /bin/sh\n", jobno); } static int in_job_list(long job, long *joblist, int len) { int i; for (i = 0; i < len; i++) if (job == joblist[i]) return 1; return 0; } static void list_jobs(long *joblist, int len) { /* List all a user's jobs in the queue, by looping through ATJOB_DIR, * or everybody's if we are root */ struct passwd *pw; DIR *spool; struct dirent *dirent; struct stat buf; struct tm runtime; unsigned long ctm; char queue; long jobno; time_t runtimer; char timestr[TIMESIZE]; int first=1; #ifdef __FreeBSD__ (void) setlocale(LC_TIME, ""); #endif PRIV_START if (chdir(ATJOB_DIR) != 0) perr("cannot change to " ATJOB_DIR); if ((spool = opendir(".")) == NULL) perr("cannot open " ATJOB_DIR); /* Loop over every file in the directory */ while((dirent = readdir(spool)) != NULL) { if (stat(dirent->d_name, &buf) != 0) perr("cannot stat in " ATJOB_DIR); /* See it's a regular file and has its x bit turned on and * is the user's */ if (!S_ISREG(buf.st_mode) || ((buf.st_uid != real_uid) && ! (real_uid == 0)) || !(S_IXUSR & buf.st_mode || atverify)) continue; if(sscanf(dirent->d_name, "%c%5lx%8lx", &queue, &jobno, &ctm)!=3) continue; /* If jobs are given, only list those jobs */ if (joblist && !in_job_list(jobno, joblist, len)) continue; if (atqueue && (queue != atqueue)) continue; runtimer = 60*(time_t) ctm; runtime = *localtime(&runtimer); strftime(timestr, TIMESIZE, "%+", &runtime); if (first) { printf("Date\t\t\t\tOwner\t\tQueue\tJob#\n"); first=0; } pw = getpwuid(buf.st_uid); printf("%s\t%-16s%c%s\t%ld\n", timestr, pw ? pw->pw_name : "???", queue, (S_IXUSR & buf.st_mode) ? "":"(done)", jobno); } PRIV_END closedir(spool); } static void process_jobs(int argc, char **argv, int what) { /* Delete every argument (job - ID) given */ int i; int rc; int nofJobs; int nofDone; int statErrno; struct stat buf; DIR *spool; struct dirent *dirent; unsigned long ctm; char queue; long jobno; nofJobs = argc - optind; nofDone = 0; PRIV_START if (chdir(ATJOB_DIR) != 0) perr("cannot change to " ATJOB_DIR); if ((spool = opendir(".")) == NULL) perr("cannot open " ATJOB_DIR); PRIV_END /* Loop over every file in the directory */ while((dirent = readdir(spool)) != NULL) { PRIV_START rc = stat(dirent->d_name, &buf); statErrno = errno; PRIV_END /* There's a race condition between readdir above and stat here: * another atrm process could have removed the file from the spool * directory under our nose. If this happens, stat will set errno to * ENOENT, which we shouldn't treat as fatal. */ if (rc != 0) { if (statErrno == ENOENT) continue; else perr("cannot stat in " ATJOB_DIR); } if(sscanf(dirent->d_name, "%c%5lx%8lx", &queue, &jobno, &ctm)!=3) continue; for (i=optind; i < argc; i++) { if (atoi(argv[i]) == jobno) { if ((buf.st_uid != real_uid) && !(real_uid == 0)) errx(EXIT_FAILURE, "%s: not owner", argv[i]); switch (what) { case ATRM: PRIV_START if (unlink(dirent->d_name) != 0) perr(dirent->d_name); PRIV_END break; case CAT: { FILE *fp; int ch; PRIV_START fp = fopen(dirent->d_name,"r"); PRIV_END if (!fp) { perr("cannot open file"); } while((ch = getc(fp)) != EOF) { putchar(ch); } fclose(fp); } break; default: errx(EXIT_FAILURE, "internal error, process_jobs = %d", what); } /* All arguments have been processed */ if (++nofDone == nofJobs) goto end; } } } end: closedir(spool); } /* delete_jobs */ #define ATOI2(ar) ((ar)[0] - '0') * 10 + ((ar)[1] - '0'); (ar) += 2; static time_t ttime(const char *arg) { /* * This is pretty much a copy of stime_arg1() from touch.c. I changed * the return value and the argument list because it's more convenient * (IMO) to do everything in one place. - Joe Halpin */ struct timeval tv[2]; time_t now; struct tm *t; int yearset; char *p; if (gettimeofday(&tv[0], NULL)) panic("Cannot get current time"); /* Start with the current time. */ now = tv[0].tv_sec; if ((t = localtime(&now)) == NULL) panic("localtime"); /* [[CC]YY]MMDDhhmm[.SS] */ if ((p = strchr(arg, '.')) == NULL) t->tm_sec = 0; /* Seconds defaults to 0. */ else { if (strlen(p + 1) != 2) goto terr; *p++ = '\0'; t->tm_sec = ATOI2(p); } yearset = 0; switch(strlen(arg)) { case 12: /* CCYYMMDDhhmm */ t->tm_year = ATOI2(arg); t->tm_year *= 100; yearset = 1; /* FALLTHROUGH */ case 10: /* YYMMDDhhmm */ if (yearset) { yearset = ATOI2(arg); t->tm_year += yearset; } else { yearset = ATOI2(arg); t->tm_year = yearset + 2000; } t->tm_year -= 1900; /* Convert to UNIX time. */ /* FALLTHROUGH */ case 8: /* MMDDhhmm */ t->tm_mon = ATOI2(arg); --t->tm_mon; /* Convert from 01-12 to 00-11 */ t->tm_mday = ATOI2(arg); t->tm_hour = ATOI2(arg); t->tm_min = ATOI2(arg); break; default: goto terr; } t->tm_isdst = -1; /* Figure out DST. */ tv[0].tv_sec = tv[1].tv_sec = mktime(t); if (tv[0].tv_sec != -1) return tv[0].tv_sec; else terr: panic( "out of range or illegal time specification: [[CC]YY]MMDDhhmm[.SS]"); } static long * get_job_list(int argc, char *argv[], int *joblen) { int i, len; long *joblist; char *ep; joblist = NULL; len = argc; if (len > 0) { if ((joblist = malloc(len * sizeof(*joblist))) == NULL) panic("out of memory"); for (i = 0; i < argc; i++) { errno = 0; if ((joblist[i] = strtol(argv[i], &ep, 10)) < 0 || ep == argv[i] || *ep != '\0' || errno) panic("invalid job number"); } } *joblen = len; return joblist; } int main(int argc, char **argv) { int c; char queue = DEFAULT_AT_QUEUE; char queue_set = 0; char *pgm; int program = AT; /* our default program */ const char *options = "q:f:t:rmvldbc"; /* default options for at */ time_t timer; long *joblist; int joblen; joblist = NULL; joblen = 0; timer = -1; RELINQUISH_PRIVS /* Eat any leading paths */ if ((pgm = strrchr(argv[0], '/')) == NULL) pgm = argv[0]; else pgm++; namep = pgm; /* find out what this program is supposed to do */ if (strcmp(pgm, "atq") == 0) { program = ATQ; options = "q:v"; } else if (strcmp(pgm, "atrm") == 0) { program = ATRM; options = ""; } else if (strcmp(pgm, "batch") == 0) { program = BATCH; options = "f:q:mv"; } /* process whatever options we can process */ opterr=1; while ((c=getopt(argc, argv, options)) != -1) switch (c) { case 'v': /* verify time settings */ atverify = 1; break; case 'm': /* send mail when job is complete */ send_mail = 1; break; case 'f': atinput = optarg; break; case 'q': /* specify queue */ if (strlen(optarg) > 1) usage(); atqueue = queue = *optarg; if (!(islower(queue)||isupper(queue))) usage(); queue_set = 1; break; case 'd': warnx("-d is deprecated; use -r instead"); /* fall through to 'r' */ case 'r': if (program != AT) usage(); program = ATRM; options = ""; break; case 't': if (program != AT) usage(); timer = ttime(optarg); break; case 'l': if (program != AT) usage(); program = ATQ; options = "q:"; break; case 'b': if (program != AT) usage(); program = BATCH; options = "f:q:mv"; break; case 'c': program = CAT; options = ""; break; default: usage(); break; } /* end of options eating */ /* select our program */ if(!check_permission()) errx(EXIT_FAILURE, "you do not have permission to use this program"); switch (program) { case ATQ: REDUCE_PRIV(DAEMON_UID, DAEMON_GID) if (queue_set == 0) joblist = get_job_list(argc - optind, argv + optind, &joblen); list_jobs(joblist, joblen); break; case ATRM: REDUCE_PRIV(DAEMON_UID, DAEMON_GID) process_jobs(argc, argv, ATRM); break; case CAT: process_jobs(argc, argv, CAT); break; case AT: /* * If timer is > -1, then the user gave the time with -t. In that * case, it's already been set. If not, set it now. */ if (timer == -1) timer = parsetime(argc, argv); if (atverify) { struct tm *tm = localtime(&timer); fprintf(stderr, "%s\n", asctime(tm)); } writefile(timer, queue); break; case BATCH: if (queue_set) queue = toupper(queue); else queue = DEFAULT_BATCH_QUEUE; if (argc > optind) timer = parsetime(argc, argv); else timer = time(NULL); if (atverify) { struct tm *tm = localtime(&timer); fprintf(stderr, "%s\n", asctime(tm)); } writefile(timer, queue); break; default: panic("internal error"); break; } exit(EXIT_SUCCESS); } Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.common =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.common (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.common (revision 303642) @@ -1,105 +1,105 @@ /* * òÏÓÓÉÊÓËÉÅ ÐÒÁÚÄÎÉËÉ * * $FreeBSD$ */ #ifndef _ru_RU_KOI8_R_common_ #define _ru_RU_KOI8_R_common_ LANG=ru_RU.KOI8-R -12 ÑÎ× äÅÎØ ÒÁÂÏÔÎÉËÁ ÐÒÏËÕÒÁÔÕÒÙ -13 ÑÎ× äÅÎØ ÒÏÓÓÉÊÓËÏÊ ÐÅÞÁÔÉ -14 ÑÎ× óÔÁÒÙÊ îÏ×ÙÊ ÇÏÄ -21 ÑÎ× äÅÎØ ÉÎÖÅÎÅÒÎÙÈ ×ÏÊÓË -25 ÑÎ× ôÁÔØÑÎÉÎ ÄÅÎØ. óÔÕÄÅÎÞÅÓËÉÊ ÐÒÁÚÄÎÉË - 8 ÆÅ× äÅÎØ ÒÏÓÓÉÊÓËÏÊ ÎÁÕËÉ -10 ÆÅ× äÅÎØ ÄÉÐÌÏÍÁÔÉÞÅÓËÏÇÏ ÒÁÂÏÔÎÉËÁ - 1 ÍÁÒ ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ÇÒÁÖÄÁÎÓËÏÊ ÏÂÏÒÏÎÙ +12 ÑÎ×. äÅÎØ ÒÁÂÏÔÎÉËÁ ÐÒÏËÕÒÁÔÕÒÙ +13 ÑÎ×. äÅÎØ ÒÏÓÓÉÊÓËÏÊ ÐÅÞÁÔÉ +14 ÑÎ×. óÔÁÒÙÊ îÏ×ÙÊ ÇÏÄ +21 ÑÎ×. äÅÎØ ÉÎÖÅÎÅÒÎÙÈ ×ÏÊÓË +25 ÑÎ×. ôÁÔØÑÎÉÎ ÄÅÎØ. óÔÕÄÅÎÞÅÓËÉÊ ÐÒÁÚÄÎÉË + 8 ÆÅ×Ò. äÅÎØ ÒÏÓÓÉÊÓËÏÊ ÎÁÕËÉ +10 ÆÅ×Ò. äÅÎØ ÄÉÐÌÏÍÁÔÉÞÅÓËÏÇÏ ÒÁÂÏÔÎÉËÁ + 1 ÍÁÒÔÁ ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ÇÒÁÖÄÁÎÓËÏÊ ÏÂÏÒÏÎÙ 03/SunSecond äÅÎØ ÒÁÂÏÔÎÉËÏ× ÇÅÏÄÅÚÉÉ É ËÁÒÔÏÇÒÁÆÉÉ -11 ÍÁÒ äÅÎØ ÒÁÂÏÔÎÉËÁ ÏÒÇÁÎÏ× ÎÁÒËÏËÏÎÔÒÏÌÑ -18 ÍÁÒ äÅÎØ ÎÁÌÏÇÏ×ÏÊ ÐÏÌÉÃÉÉ +11 ÍÁÒÔÁ äÅÎØ ÒÁÂÏÔÎÉËÁ ÏÒÇÁÎÏ× ÎÁÒËÏËÏÎÔÒÏÌÑ +18 ÍÁÒÔÁ äÅÎØ ÎÁÌÏÇÏ×ÏÊ ÐÏÌÉÃÉÉ 03/SunThird äÅÎØ ÒÁÂÏÔÎÉËÏ× ÔÏÒÇÏ×ÌÉ, ÂÙÔÏ×ÏÇÏ ÏÂÓÌÕÖÉ×ÁÎÉÑ ÎÁÓÅÌÅÎÉÑ É ÖÉÌÉÝÎÏ-ËÏÍÍÕÎÁÌØÎÏÇÏ ÈÏÚÑÊÓÔ×Á -27 ÍÁÒ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÔÅÁÔÒÁ -27 ÍÁÒ äÅÎØ ×ÎÕÔÒÅÎÎÉÈ ×ÏÊÓË - 1 ÁÐÒ äÅÎØ ÓÍÅÈÁ - 2 ÁÐÒ äÅÎØ ÅÄÉÎÅÎÉÑ ÎÁÒÏÄÏ× +27 ÍÁÒÔÁ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÔÅÁÔÒÁ +27 ÍÁÒÔÁ äÅÎØ ×ÎÕÔÒÅÎÎÉÈ ×ÏÊÓË + 1 ÁÐÒ. äÅÎØ ÓÍÅÈÁ + 2 ÁÐÒ. äÅÎØ ÅÄÉÎÅÎÉÑ ÎÁÒÏÄÏ× 04/SunFirst äÅÎØ ÇÅÏÌÏÇÁ -12 ÁÐÒ äÅÎØ ËÏÓÍÏÎÁ×ÔÉËÉ +12 ÁÐÒ. äÅÎØ ËÏÓÍÏÎÁ×ÔÉËÉ 04/SunSecond äÅÎØ ×ÏÊÓË ÐÒÏÔÉ×Ï×ÏÚÄÕÛÎÏÊ ÏÂÏÒÏÎÙ -26 ÁÐÒ äÅÎØ ÐÁÍÑÔÉ ÐÏÇÉÂÛÉÈ × ÒÁÄÉÁÃÉÏÎÎÙÈ Á×ÁÒÉÑÈ É ËÁÔÁÓÔÒÏÆÁÈ -30 ÁÐÒ äÅÎØ ÐÏÖÁÒÎÏÊ ÏÈÒÁÎÙ - 7 ÍÁÊ äÅÎØ ÒÁÄÉÏ -17 ÍÁÊ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÔÅÌÅËÏÍÍÕÎÉËÁÃÉÊ -18 ÍÁÊ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÍÕÚÅÅ× -24 ÍÁÊ äÅÎØ ÓÌÁ×ÑÎÓËÏÊ ÐÉÓØÍÅÎÎÏÓÔÉ É ËÕÌØÔÕÒÙ -26 ÍÁÊ äÅÎØ ÒÏÓÓÉÊÓËÏÇÏ ÐÒÅÄÐÒÉÎÉÍÁÔÅÌØÓÔ×Á -27 ÍÁÊ ïÂÝÅÒÏÓÓÉÊÓËÉÊ ÄÅÎØ ÂÉÂÌÉÏÔÅË -28 ÍÁÊ äÅÎØ ÐÏÇÒÁÎÉÞÎÉËÁ -30 ÍÁÊ äÅÎØ ÐÏÖÁÒÎÏÊ ÏÈÒÁÎÙ -31 ÍÁÊ äÅÎØ òÏÓÓÉÊÓËÏÊ áÄ×ÏËÁÔÕÒÙ +26 ÁÐÒ. äÅÎØ ÐÁÍÑÔÉ ÐÏÇÉÂÛÉÈ × ÒÁÄÉÁÃÉÏÎÎÙÈ Á×ÁÒÉÑÈ É ËÁÔÁÓÔÒÏÆÁÈ +30 ÁÐÒ. äÅÎØ ÐÏÖÁÒÎÏÊ ÏÈÒÁÎÙ + 7 ÍÁÑ äÅÎØ ÒÁÄÉÏ +17 ÍÁÑ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÔÅÌÅËÏÍÍÕÎÉËÁÃÉÊ +18 ÍÁÑ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÍÕÚÅÅ× +24 ÍÁÑ äÅÎØ ÓÌÁ×ÑÎÓËÏÊ ÐÉÓØÍÅÎÎÏÓÔÉ É ËÕÌØÔÕÒÙ +26 ÍÁÑ äÅÎØ ÒÏÓÓÉÊÓËÏÇÏ ÐÒÅÄÐÒÉÎÉÍÁÔÅÌØÓÔ×Á +27 ÍÁÑ ïÂÝÅÒÏÓÓÉÊÓËÉÊ ÄÅÎØ ÂÉÂÌÉÏÔÅË +28 ÍÁÑ äÅÎØ ÐÏÇÒÁÎÉÞÎÉËÁ +30 ÍÁÑ äÅÎØ ÐÏÖÁÒÎÏÊ ÏÈÒÁÎÙ +31 ÍÁÑ äÅÎØ òÏÓÓÉÊÓËÏÊ áÄ×ÏËÁÔÕÒÙ 05/SunLast äÅÎØ ÈÉÍÉËÁ - 1 ÉÀÎ äÅÎØ ÚÁÝÉÔÙ ÄÅÔÅÊ - 5 ÉÀÎ äÅÎØ ÜËÏÌÏÇÁ - 6 ÉÀÎ ðÕÛËÉÎÓËÉÊ ÄÅÎØ - 8 ÉÀÎ äÅÎØ ÓÏÃÉÁÌØÎÏÇÏ ÒÁÂÏÔÎÉËÁ + 1 ÉÀÎÑ äÅÎØ ÚÁÝÉÔÙ ÄÅÔÅÊ + 5 ÉÀÎÑ äÅÎØ ÜËÏÌÏÇÁ + 6 ÉÀÎÑ ðÕÛËÉÎÓËÉÊ ÄÅÎØ + 8 ÉÀÎÑ äÅÎØ ÓÏÃÉÁÌØÎÏÇÏ ÒÁÂÏÔÎÉËÁ 06/SunSecond äÅÎØ ÒÁÂÏÔÎÉËÏ× ÌÅÇËÏÊ ÐÒÏÍÙÛÌÅÎÎÏÓÔÉ 06/SunThird äÅÎØ ÍÅÄÉÃÉÎÓËÏÇÏ ÒÁÂÏÔÎÉËÁ -22 ÉÀÎ äÅÎØ ÐÁÍÑÔÉ É ÓËÏÒÂÉ (îÁÞÁÌÏ ÷ÅÌÉËÏÊ ïÔÅÞÅÓÔ×ÅÎÎÏÊ ÷ÏÊÎÙ, 1941 ÇÏÄ) -27 ÉÀÎ äÅÎØ ÍÏÌÏÄÅÖÉ -29 ÉÀÎ äÅÎØ ÐÁÒÔÉÚÁÎ É ÐÏÄÐÏÌØÝÉËÏ× +22 ÉÀÎÑ äÅÎØ ÐÁÍÑÔÉ É ÓËÏÒÂÉ (îÁÞÁÌÏ ÷ÅÌÉËÏÊ ïÔÅÞÅÓÔ×ÅÎÎÏÊ ÷ÏÊÎÙ, 1941 ÇÏÄ) +27 ÉÀÎÑ äÅÎØ ÍÏÌÏÄÅÖÉ +29 ÉÀÎÑ äÅÎØ ÐÁÒÔÉÚÁÎ É ÐÏÄÐÏÌØÝÉËÏ× 06/SatLast äÅÎØ ÉÚÏÂÒÅÔÁÔÅÌÑ É ÒÁÃÉÏÎÁÌÉÚÁÔÏÒÁ 07/SunFirst äÅÎØ ÒÁÂÏÔÎÉËÏ× ÍÏÒÓËÏÇÏ É ÒÅÞÎÏÇÏ ÆÌÏÔÁ 07/SunSecond äÅÎØ ÒÙÂÁËÁ 07/SunSecond äÅÎØ ÒÏÓÓÉÊÓËÏÊ ÐÏÞÔÙ 07/SunThird äÅÎØ ÍÅÔÁÌÌÕÒÇÁ 07/SunLast äÅÎØ ÷ÏÅÎÎÏ-íÏÒÓËÏÇÏ æÌÏÔÁ -28 ÉÀÌ äÅÎØ ËÒÅÝÅÎÉÑ òÕÓÉ - 6 Á×Ç äÅÎØ ÖÅÌÅÚÎÏÄÏÒÏÖÎÙÈ ×ÏÊÓË +28 ÉÀÌÑ äÅÎØ ËÒÅÝÅÎÉÑ òÕÓÉ + 6 Á×Ç. äÅÎØ ÖÅÌÅÚÎÏÄÏÒÏÖÎÙÈ ×ÏÊÓË 08/SunFirst äÅÎØ ÖÅÌÅÚÎÏÄÏÒÏÖÎÉËÁ -12 Á×Ç äÅÎØ ×ÏÅÎÎÏ-×ÏÚÄÕÛÎÙÈ ÓÉÌ +12 Á×Ç. äÅÎØ ×ÏÅÎÎÏ-×ÏÚÄÕÛÎÙÈ ÓÉÌ 08/SunSecond äÅÎØ ÓÔÒÏÉÔÅÌÑ 08/SunThird äÅÎØ ÷ÏÚÄÕÛÎÏÇÏ æÌÏÔÁ -22 Á×Ç äÅÎØ ÇÏÓÕÄÁÒÓÔ×ÅÎÎÏÇÏ ÆÌÁÇÁ -27 Á×Ç äÅÎØ ËÉÎÏ +22 Á×Ç. äÅÎØ ÇÏÓÕÄÁÒÓÔ×ÅÎÎÏÇÏ ÆÌÁÇÁ +27 Á×Ç. äÅÎØ ËÉÎÏ 08/SunLast äÅÎØ ÛÁÈÔÅÒÁ - 1 ÓÅÎ äÅÎØ ÚÎÁÎÉÊ - 2 ÓÅÎ äÅÎØ ÒÏÓÓÉÊÓËÏÊ Ç×ÁÒÄÉÉ - 3 ÓÅÎ äÅÎØ ÓÏÌÉÄÁÒÎÏÓÔÉ × ÂÏÒØÂÅ Ó ÔÅÒÒÏÒÉÚÍÏÍ - 4 ÓÅÎ äÅÎØ ÓÐÅÃÉÁÌÉÓÔÁ ÐÏ ÑÄÅÒÎÏÍÕ ÏÂÅÓÐÅÞÅÎÉÀ + 1 ÓÅÎÔ. äÅÎØ ÚÎÁÎÉÊ + 2 ÓÅÎÔ. äÅÎØ ÒÏÓÓÉÊÓËÏÊ Ç×ÁÒÄÉÉ + 3 ÓÅÎÔ. äÅÎØ ÓÏÌÉÄÁÒÎÏÓÔÉ × ÂÏÒØÂÅ Ó ÔÅÒÒÏÒÉÚÍÏÍ + 4 ÓÅÎÔ. äÅÎØ ÓÐÅÃÉÁÌÉÓÔÁ ÐÏ ÑÄÅÒÎÏÍÕ ÏÂÅÓÐÅÞÅÎÉÀ 09/SunFirst äÅÎØ ÒÁÂÏÔÎÉËÏ× ÎÅÆÔÑÎÏÊ É ÇÁÚÏ×ÏÊ ÐÒÏÍÙÛÌÅÎÎÏÓÔÉ 09/SunSecond äÅÎØ ÔÁÎËÉÓÔÁ 09/SunThird äÅÎØ ÒÁÂÏÔÎÉËÏ× ÌÅÓÁ -28 ÓÅÎ äÅÎØ ÒÁÂÏÔÎÉËÁ ÁÔÏÍÎÏÊ ÐÒÏÍÙÛÌÅÎÎÏÓÔÉ +28 ÓÅÎÔ. äÅÎØ ÒÁÂÏÔÎÉËÁ ÁÔÏÍÎÏÊ ÐÒÏÍÙÛÌÅÎÎÏÓÔÉ 09/SunLast äÅÎØ ÍÁÛÉÎÏÓÔÒÏÉÔÅÌÑ - 1 ÏËÔ äÅÎØ ÐÏÖÉÌÙÈ ÌÀÄÅÊ - 1 ÏËÔ äÅÎØ ÓÕÈÏÐÕÔÎÙÈ ×ÏÊÓË - 4 ÏËÔ äÅÎØ ËÏÓÍÉÞÅÓËÉÈ ×ÏÊÓË - 5 ÏËÔ äÅÎØ ÕÞÉÔÅÌÑ -14 ÏËÔ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÓÔÁÎÄÁÒÔÉÚÁÃÉÉ + 1 ÏËÔ. äÅÎØ ÐÏÖÉÌÙÈ ÌÀÄÅÊ + 1 ÏËÔ. äÅÎØ ÓÕÈÏÐÕÔÎÙÈ ×ÏÊÓË + 4 ÏËÔ. äÅÎØ ËÏÓÍÉÞÅÓËÉÈ ×ÏÊÓË + 5 ÏËÔ. äÅÎØ ÕÞÉÔÅÌÑ +14 ÏËÔ. íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÓÔÁÎÄÁÒÔÉÚÁÃÉÉ 10/SunSecond äÅÎØ ÒÁÂÏÔÎÉËÏ× ÓÅÌØÓËÏÇÏ ÈÏÚÑÊÓÔ×Á É ÐÅÒÅÒÁÂÁÔÙ×ÁÀÝÅÊ ÐÒÏÍÙÛÌÅÎÎÏÓÔÉ 10/SunThird äÅÎØ ÒÁÂÏÔÎÉËÏ× ÄÏÒÏÖÎÏÇÏ ÈÏÚÑÊÓÔ×Á -24 ÏËÔ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ïïî -25 ÏËÔ äÅÎØ ÔÁÍÏÖÅÎÎÉËÁ -30 ÏËÔ äÅÎØ ÐÁÍÑÔÉ ÖÅÒÔ× ÐÏÌÉÔÉÞÅÓËÉÈ ÒÅÐÒÅÓÓÉÊ +24 ÏËÔ. íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ïïî +25 ÏËÔ. äÅÎØ ÔÁÍÏÖÅÎÎÉËÁ +30 ÏËÔ. äÅÎØ ÐÁÍÑÔÉ ÖÅÒÔ× ÐÏÌÉÔÉÞÅÓËÉÈ ÒÅÐÒÅÓÓÉÊ 10/SunLast äÅÎØ ÒÁÂÏÔÎÉËÏ× Á×ÔÏÍÏÂÉÌØÎÏÇÏ ÔÒÁÎÓÐÏÒÔÁ - 7 ÎÏÑ äÅÎØ ÏËÔÑÂÒØÓËÏÊ ÒÅ×ÏÌÀÃÉÉ 1917 ÇÏÄÁ - 9 ÎÏÑ ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ËÁÞÅÓÔ×Á -10 ÎÏÑ äÅÎØ ÍÉÌÉÃÉÉ -16 ÎÏÑ äÅÎØ ÍÏÒÓËÏÊ ÐÅÈÏÔÙ -17 ÎÏÑ íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÓÔÕÄÅÎÔÏ× -19 ÎÏÑ äÅÎØ ÒÁËÅÔÎÙÈ ×ÏÊÓË É ÁÒÔÉÌÌÅÒÉÉ -21 ÎÏÑ äÅÎØ ÒÁÂÏÔÎÉËÏ× ÎÁÌÏÇÏ×ÙÈ ÏÒÇÁÎÏ× -26 ÎÏÑ ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ÉÎÆÏÒÍÁÃÉÉ + 7 ÎÏÑÂ. äÅÎØ ÏËÔÑÂÒØÓËÏÊ ÒÅ×ÏÌÀÃÉÉ 1917 ÇÏÄÁ + 9 ÎÏÑÂ. ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ËÁÞÅÓÔ×Á +10 ÎÏÑÂ. äÅÎØ ÍÉÌÉÃÉÉ +16 ÎÏÑÂ. äÅÎØ ÍÏÒÓËÏÊ ÐÅÈÏÔÙ +17 ÎÏÑÂ. íÅÖÄÕÎÁÒÏÄÎÙÊ ÄÅÎØ ÓÔÕÄÅÎÔÏ× +19 ÎÏÑÂ. äÅÎØ ÒÁËÅÔÎÙÈ ×ÏÊÓË É ÁÒÔÉÌÌÅÒÉÉ +21 ÎÏÑÂ. äÅÎØ ÒÁÂÏÔÎÉËÏ× ÎÁÌÏÇÏ×ÙÈ ÏÒÇÁÎÏ× +26 ÎÏÑÂ. ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ÉÎÆÏÒÍÁÃÉÉ 11/SunLast äÅÎØ ÍÁÔÅÒÉ - 1 ÄÅË ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ÂÏÒØÂÙ ÓÏ óðéäÏÍ - 3 ÄÅË äÅÎØ ÀÒÉÓÔÁ - 9 ÄÅË äÅÎØ çÅÒÏÅ× ïÔÅÞÅÓÔ×Á -12 ÄÅË äÅÎØ ëÏÎÓÔÉÔÕÃÉÉ -17 ÄÅË äÅÎØ ÒÁËÅÔÎÙÈ ×ÏÊÓË ÓÔÒÁÔÅÇÉÞÅÓËÏÇÏ ÎÁÚÎÁÞÅÎÉÑ -20 ÄÅË äÅÎØ ÒÁÂÏÔÎÉËÁ ÏÒÇÁÎÏ× ÂÅÚÏÐÁÓÎÏÓÔÉ -22 ÄÅË äÅÎØ ÜÎÅÒÇÅÔÉËÁ -27 ÄÅË äÅÎØ ÓÐÁÓÁÔÅÌÑ + 1 ÄÅË. ÷ÓÅÍÉÒÎÙÊ ÄÅÎØ ÂÏÒØÂÙ ÓÏ óðéäÏÍ + 3 ÄÅË. äÅÎØ ÀÒÉÓÔÁ + 9 ÄÅË. äÅÎØ çÅÒÏÅ× ïÔÅÞÅÓÔ×Á +12 ÄÅË. äÅÎØ ëÏÎÓÔÉÔÕÃÉÉ +17 ÄÅË. äÅÎØ ÒÁËÅÔÎÙÈ ×ÏÊÓË ÓÔÒÁÔÅÇÉÞÅÓËÏÇÏ ÎÁÚÎÁÞÅÎÉÑ +20 ÄÅË. äÅÎØ ÒÁÂÏÔÎÉËÁ ÏÒÇÁÎÏ× ÂÅÚÏÐÁÓÎÏÓÔÉ +22 ÄÅË. äÅÎØ ÜÎÅÒÇÅÔÉËÁ +27 ÄÅË. äÅÎØ ÓÐÁÓÁÔÅÌÑ #endif /* !_ru_RU_KOI8_R_common_ */ Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday (revision 303642) @@ -1,25 +1,25 @@ /* * òÏÓÓÉÊÓËÉÅ ÐÒÁÚÄÎÉËÉ (ÎÅÒÁÂÏÞÉÅ "ËÒÁÓÎÙÅ" ÄÎÉ) * * $FreeBSD$ */ #ifndef _ru_RU_KOI8_R_holiday_ #define _ru_RU_KOI8_R_holiday_ LANG=ru_RU.KOI8-R - 1 ÑÎ× îÏ×ÙÊ ÇÏÄ - 2 ÑÎ× îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ - 3 ÑÎ× îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ - 4 ÑÎ× îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ - 5 ÑÎ× îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ - 7 ÑÎ× òÏÖÄÅÓÔ×Ï èÒÉÓÔÏ×Ï -23 ÆÅ× äÅÎØ ÚÁÝÉÔÎÉËÁ ïÔÅÞÅÓÔ×Á - 8 ÍÁÒ íÅÖÄÕÎÁÒÏÄÎÙÊ ÖÅÎÓËÉÊ ÄÅÎØ - 1 ÍÁÊ ðÒÁÚÄÎÉË ÷ÅÓÎÙ É ôÒÕÄÁ - 9 ÍÁÊ äÅÎØ ðÏÂÅÄÙ -12 ÉÀÎ äÅÎØ òÏÓÓÉÉ - 4 ÎÏÑ äÅÎØ ÎÁÒÏÄÎÏÇÏ ÅÄÉÎÓÔ×Á + 1 ÑÎ×. îÏ×ÙÊ ÇÏÄ + 2 ÑÎ×. îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ + 3 ÑÎ×. îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ + 4 ÑÎ×. îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ + 5 ÑÎ×. îÏ×ÏÇÏÄÎÉÅ ËÁÎÉËÕÌÙ + 7 ÑÎ×. òÏÖÄÅÓÔ×Ï èÒÉÓÔÏ×Ï +23 ÆÅ×Ò. äÅÎØ ÚÁÝÉÔÎÉËÁ ïÔÅÞÅÓÔ×Á + 8 ÍÁÒÔÁ íÅÖÄÕÎÁÒÏÄÎÙÊ ÖÅÎÓËÉÊ ÄÅÎØ + 1 ÍÁÑ ðÒÁÚÄÎÉË ÷ÅÓÎÙ É ôÒÕÄÁ + 9 ÍÁÑ äÅÎØ ðÏÂÅÄÙ +12 ÉÀÎÑ äÅÎØ òÏÓÓÉÉ + 4 ÎÏÑÂ. äÅÎØ ÎÁÒÏÄÎÏÇÏ ÅÄÉÎÓÔ×Á #endif /* !_ru_RU_KOI8_R_holiday_ */ Property changes on: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday ___________________________________________________________________ Added: fbsd:notbinary ## -0,0 +1 ## +yes \ No newline at end of property Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military (revision 303642) @@ -1,28 +1,28 @@ /* * äÎÉ ×ÏÉÎÓËÏÊ ÓÌÁ×Ù òÏÓÓÉÉ * * $FreeBSD$ */ #ifndef _ru_RU_KOI8_R_military_ #define _ru_RU_KOI8_R_military_ LANG=ru_RU.KOI8-R -27 ÑÎ× äÅÎØ ÓÎÑÔÉÑ ÂÌÏËÁÄÙ ÇÏÒÏÄÁ ìÅÎÉÎÇÒÁÄÁ (1944 ÇÏÄ) - 2 ÆÅ× äÅÎØ ÒÁÚÇÒÏÍÁ ÓÏ×ÅÔÓËÉÍÉ ×ÏÊÓËÁÍÉ ÎÅÍÅÃËÏ-ÆÁÛÉÓÔÓËÉÈ ×ÏÊÓË × óÔÁÌÉÎÇÒÁÄÓËÏÊ ÂÉÔ×Å (1943 ÇÏÄ) -23 ÆÅ× äÅÎØ ÐÏÂÅÄÙ ëÒÁÓÎÏÊ áÒÍÉÉ ÎÁÄ ËÁÊÚÅÒÏ×ÓËÉÍÉ ×ÏÊÓËÁÍÉ çÅÒÍÁÎÉÉ (1918 ÇÏÄ) -18 ÁÐÒ äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÉÈ ×ÏÉÎÏ× ËÎÑÚÑ áÌÅËÓÁÎÄÒÁ îÅ×ÓËÏÇÏ ÎÁÄ ÎÅÍÅÃËÉÍÉ ÒÙÃÁÒÑÍÉ ÎÁ þÕÄÓËÏÍ ÏÚÅÒÅ (ìÅÄÏ×ÏÅ ÐÏÂÏÉÝÅ, 1242 ÇÏÄ) -10 ÉÀÌ äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÏÊ ÁÒÍÉÉ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ ðÅÔÒÁ ðÅÒ×ÏÇÏ ÎÁÄ Û×ÅÄÁÍÉ × ðÏÌÔÁ×ÓËÏÍ ÓÒÁÖÅÎÉÉ (1709 ÇÏÄ) - 9 Á×Ç äÅÎØ ÐÅÒ×ÏÊ × ÒÏÓÓÉÊÓËÏÊ ÉÓÔÏÒÉÉ ÍÏÒÓËÏÊ ÐÏÂÅÄÙ ÒÕÓÓËÏÇÏ ÆÌÏÔÁ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ ðÅÔÒÁ ðÅÒ×ÏÇÏ ÎÁÄ Û×ÅÄÁÍÉ Õ ÍÙÓÁ çÁÎÇÕÔ (1714 ÇÏÄ) -23 Á×Ç äÅÎØ ÒÁÚÇÒÏÍÁ ÓÏ×ÅÔÓËÉÍÉ ×ÏÊÓËÁÍÉ ÎÅÍÅÃËÏ-ÆÁÛÉÓÔÓËÉÈ ×ÏÊÓË × ëÕÒÓËÏÊ ÂÉÔ×Å (1943 ÇÏÄ) - 2 ÓÅÎ äÅÎØ ÏËÏÎÞÁÎÉÑ ÷ÔÏÒÏÊ ÍÉÒÏ×ÏÊ ×ÏÊÎÙ (1945 ÇÏÄ) - 8 ÓÅÎ äÅÎØ âÏÒÏÄÉÎÓËÏÇÏ ÓÒÁÖÅÎÉÑ ÒÕÓÓËÏÊ ÁÒÍÉÉ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ í.é. ëÕÔÕÚÏ×Á Ó ÆÒÁÎÃÕÚÓËÏÊ ÁÒÍÉÅÊ (1812 ÇÏÄ) -11 ÓÅÎ äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÏÊ ÜÓËÁÄÒÙ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ æ.æ. õÛÁËÏ×Á ÎÁÄ ÔÕÒÅÃËÏÊ ÜÓËÁÄÒÏÊ Õ ÍÙÓÁ ôÅÎÄÒÁ (1790 ÇÏÄ) -21 ÓÅÎ äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÉÈ ÐÏÌËÏ× ×Ï ÇÌÁ×Å Ó ×ÅÌÉËÉÍ ËÎÑÚÅÍ äÍÉÔÒÉÅÍ äÏÎÓËÉÍ ÎÁÄ ÍÏÎÇÏÌÏ-ÔÁÔÁÒÓËÉÍÉ ×ÏÊÓËÁÍÉ × ëÕÌÉËÏ×ÓËÏÊ ÂÉÔ×Å (1380 ÇÏÄ) - 7 ÎÏÑ äÅÎØ ÏÓ×ÏÂÏÖÄÅÎÉÑ íÏÓË×Ù ÓÉÌÁÍÉ ÎÁÒÏÄÎÏÇÏ ÏÐÏÌÞÅÎÉÑ ÐÏÄ ÒÕËÏ×ÏÄÓÔ×ÏÍ ëÕÚØÍÙ íÉÎÉÎÁ É äÍÉÔÒÉÑ ðÏÖÁÒÓËÏÇÏ ÏÔ ÐÏÌØÓËÉÈ ÉÎÔÅÒ×ÅÎÔÏ× (1612 ÇÏÄ) - 1 ÄÅË äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÏÊ ÜÓËÁÄÒÙ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ ð.ó. îÁÈÉÍÏ×Á ÎÁÄ ÔÕÒÅÃËÏÊ ÜÓËÁÄÒÏÊ Õ ÍÙÓÁ óÉÎÏÐ (1853 ÇÏÄ) - 5 ÄÅË äÅÎØ ÎÁÞÁÌÁ ËÏÎÔÒÎÁÓÔÕÐÌÅÎÉÑ ÓÏ×ÅÔÓËÉÈ ×ÏÊÓË ÐÒÏÔÉ× ÎÅÍÅÃËÏ-ÆÁÛÉÓÔÓËÉÈ ×ÏÊÓË × ÂÉÔ×Å ÐÏÄ íÏÓË×ÏÊ (1941 ÇÏÄ) -24 ÄÅË äÅÎØ ×ÚÑÔÉÑ ÔÕÒÅÃËÏÊ ËÒÅÐÏÓÔÉ éÚÍÁÉÌ ÒÕÓÓËÉÍÉ ×ÏÊÓËÁÍÉ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ á.÷. óÕ×ÏÒÏ×Á (1790 ÇÏÄ) +27 ÑÎ×. äÅÎØ ÓÎÑÔÉÑ ÂÌÏËÁÄÙ ÇÏÒÏÄÁ ìÅÎÉÎÇÒÁÄÁ (1944 ÇÏÄ) + 2 ÆÅ×Ò. äÅÎØ ÒÁÚÇÒÏÍÁ ÓÏ×ÅÔÓËÉÍÉ ×ÏÊÓËÁÍÉ ÎÅÍÅÃËÏ-ÆÁÛÉÓÔÓËÉÈ ×ÏÊÓË × óÔÁÌÉÎÇÒÁÄÓËÏÊ ÂÉÔ×Å (1943 ÇÏÄ) +23 ÆÅ×Ò. äÅÎØ ÐÏÂÅÄÙ ëÒÁÓÎÏÊ áÒÍÉÉ ÎÁÄ ËÁÊÚÅÒÏ×ÓËÉÍÉ ×ÏÊÓËÁÍÉ çÅÒÍÁÎÉÉ (1918 ÇÏÄ) +18 ÁÐÒ. äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÉÈ ×ÏÉÎÏ× ËÎÑÚÑ áÌÅËÓÁÎÄÒÁ îÅ×ÓËÏÇÏ ÎÁÄ ÎÅÍÅÃËÉÍÉ ÒÙÃÁÒÑÍÉ ÎÁ þÕÄÓËÏÍ ÏÚÅÒÅ (ìÅÄÏ×ÏÅ ÐÏÂÏÉÝÅ, 1242 ÇÏÄ) +10 ÉÀÌÑ äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÏÊ ÁÒÍÉÉ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ ðÅÔÒÁ ðÅÒ×ÏÇÏ ÎÁÄ Û×ÅÄÁÍÉ × ðÏÌÔÁ×ÓËÏÍ ÓÒÁÖÅÎÉÉ (1709 ÇÏÄ) + 9 Á×Ç. äÅÎØ ÐÅÒ×ÏÊ × ÒÏÓÓÉÊÓËÏÊ ÉÓÔÏÒÉÉ ÍÏÒÓËÏÊ ÐÏÂÅÄÙ ÒÕÓÓËÏÇÏ ÆÌÏÔÁ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ ðÅÔÒÁ ðÅÒ×ÏÇÏ ÎÁÄ Û×ÅÄÁÍÉ Õ ÍÙÓÁ çÁÎÇÕÔ (1714 ÇÏÄ) +23 Á×Ç. äÅÎØ ÒÁÚÇÒÏÍÁ ÓÏ×ÅÔÓËÉÍÉ ×ÏÊÓËÁÍÉ ÎÅÍÅÃËÏ-ÆÁÛÉÓÔÓËÉÈ ×ÏÊÓË × ëÕÒÓËÏÊ ÂÉÔ×Å (1943 ÇÏÄ) + 2 ÓÅÎÔ. äÅÎØ ÏËÏÎÞÁÎÉÑ ÷ÔÏÒÏÊ ÍÉÒÏ×ÏÊ ×ÏÊÎÙ (1945 ÇÏÄ) + 8 ÓÅÎÔ. äÅÎØ âÏÒÏÄÉÎÓËÏÇÏ ÓÒÁÖÅÎÉÑ ÒÕÓÓËÏÊ ÁÒÍÉÉ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ í.é. ëÕÔÕÚÏ×Á Ó ÆÒÁÎÃÕÚÓËÏÊ ÁÒÍÉÅÊ (1812 ÇÏÄ) +11 ÓÅÎÔ. äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÏÊ ÜÓËÁÄÒÙ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ æ.æ. õÛÁËÏ×Á ÎÁÄ ÔÕÒÅÃËÏÊ ÜÓËÁÄÒÏÊ Õ ÍÙÓÁ ôÅÎÄÒÁ (1790 ÇÏÄ) +21 ÓÅÎÔ. äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÉÈ ÐÏÌËÏ× ×Ï ÇÌÁ×Å Ó ×ÅÌÉËÉÍ ËÎÑÚÅÍ äÍÉÔÒÉÅÍ äÏÎÓËÉÍ ÎÁÄ ÍÏÎÇÏÌÏ-ÔÁÔÁÒÓËÉÍÉ ×ÏÊÓËÁÍÉ × ëÕÌÉËÏ×ÓËÏÊ ÂÉÔ×Å (1380 ÇÏÄ) + 4 ÎÏÑÂ. äÅÎØ ÏÓ×ÏÂÏÖÄÅÎÉÑ íÏÓË×Ù ÓÉÌÁÍÉ ÎÁÒÏÄÎÏÇÏ ÏÐÏÌÞÅÎÉÑ ÐÏÄ ÒÕËÏ×ÏÄÓÔ×ÏÍ ëÕÚØÍÙ íÉÎÉÎÁ É äÍÉÔÒÉÑ ðÏÖÁÒÓËÏÇÏ ÏÔ ÐÏÌØÓËÉÈ ÉÎÔÅÒ×ÅÎÔÏ× (1612 ÇÏÄ) + 1 ÄÅË. äÅÎØ ÐÏÂÅÄÙ ÒÕÓÓËÏÊ ÜÓËÁÄÒÙ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ ð.ó. îÁÈÉÍÏ×Á ÎÁÄ ÔÕÒÅÃËÏÊ ÜÓËÁÄÒÏÊ Õ ÍÙÓÁ óÉÎÏÐ (1853 ÇÏÄ) + 5 ÄÅË. äÅÎØ ÎÁÞÁÌÁ ËÏÎÔÒÎÁÓÔÕÐÌÅÎÉÑ ÓÏ×ÅÔÓËÉÈ ×ÏÊÓË ÐÒÏÔÉ× ÎÅÍÅÃËÏ-ÆÁÛÉÓÔÓËÉÈ ×ÏÊÓË × ÂÉÔ×Å ÐÏÄ íÏÓË×ÏÊ (1941 ÇÏÄ) +24 ÄÅË. äÅÎØ ×ÚÑÔÉÑ ÔÕÒÅÃËÏÊ ËÒÅÐÏÓÔÉ éÚÍÁÉÌ ÒÕÓÓËÉÍÉ ×ÏÊÓËÁÍÉ ÐÏÄ ËÏÍÁÎÄÏ×ÁÎÉÅÍ á.÷. óÕ×ÏÒÏ×Á (1790 ÇÏÄ) #endif /* !_ru_RU_KOI8_R_military_ */ Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.orthodox =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.orthodox (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.orthodox (revision 303642) @@ -1,36 +1,36 @@ /* * ðÒÁ×ÏÓÌÁ×ÎÙÅ ÐÒÁÚÄÎÉËÉ * * $FreeBSD$ */ #ifndef _ru_RU_KOI8_R_orthodox_ #define _ru_RU_KOI8_R_orthodox_ LANG=ru_RU.KOI8-R Paskha=ðÁÓÈÁ -21 ÓÅÎ òÏÖÄÅÓÔ×Ï ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ -27 ÓÅÎ ÷ÏÚÄ×ÉÖÅÎÉÅ ëÒÅÓÔÁ çÏÓÐÏÄÎÑ -14 ÏËÔ ðÏËÒÏ× ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ - 4 ÄÅË ÷×ÅÄÅÎÉÅ ×Ï ÈÒÁÍ ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ - 7 ÑÎ× òÏÖÄÅÓÔ×Ï èÒÉÓÔÏ×Ï -14 ÑÎ× ïÂÒÅÚÁÎÉÅ çÏÓÐÏÄÎÅ -19 ÑÎ× âÏÇÏÑ×ÌÅÎÉÅ ÉÌÉ ëÒÅÝÅÎÉÅ çÏÓÐÏÄÎÅ -15 ÆÅ× óÒÅÔÅÎÉÅ çÏÓÐÏÄÎÅ +21 ÓÅÎÔ. òÏÖÄÅÓÔ×Ï ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ +27 ÓÅÎÔ. ÷ÏÚÄ×ÉÖÅÎÉÅ ëÒÅÓÔÁ çÏÓÐÏÄÎÑ +14 ÏËÔ. ðÏËÒÏ× ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ + 4 ÄÅË. ÷×ÅÄÅÎÉÅ ×Ï ÈÒÁÍ ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ + 7 ÑÎ×. òÏÖÄÅÓÔ×Ï èÒÉÓÔÏ×Ï +14 ÑÎ×. ïÂÒÅÚÁÎÉÅ çÏÓÐÏÄÎÅ +19 ÑÎ×. âÏÇÏÑ×ÌÅÎÉÅ ÉÌÉ ëÒÅÝÅÎÉÅ çÏÓÐÏÄÎÅ +15 ÆÅ×Ò. óÒÅÔÅÎÉÅ çÏÓÐÏÄÎÅ ðÁÓÈÁ-48 ÷ÅÌÉËÉÊ ðÏÓÔ ðÁÓÈÁ-7 ÷ÈÏÄ çÏÓÐÏÄÅÎØ × éÅÒÕÓÁÌÉÍ. ÷ÅÒÂÎÏÅ ÷ÏÓËÒÅÓÅÎØÅ ðÁÓÈÁ-3 ÷ÅÌÉËÉÊ þÅÔ×ÅÒÇ ðÁÓÈÁ-2 óÔÒÁÓÔÎÁÑ ðÑÔÎÉÃÁ ðÁÓÈÁ ÷ÏÓËÒÅÓÅÎÉÅ èÒÉÓÔÏ×Ï ðÁÓÈÁ+39 ÷ÏÚÎÅÓÅÎÉÅ ðÁÓÈÁ+49 äÅÎØ ó×ÑÔÏÊ ôÒÏÉÃÙ. ðÑÔÉÄÅÓÑÔÎÉÃÁ - 7 ÁÐÒ âÌÁÇÏ×ÅÝÅÎÉÅ ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ - 7 ÉÀÌ òÏÖÄÅÓÔ×Ï éÏÁÎÎÁ ðÒÅÄÔÅÞÉ -12 ÉÀÌ äÅÎØ Ó×ÑÔÙÈ ÐÅÒ×Ï×ÅÒÈÏ×ÎÙÈ ÁÐÏÓÔÏÌÏ× ðÅÔÒÁ É ðÁ×ÌÁ -19 Á×Ç ðÒÅÏÂÒÁÖÅÎÉÅ çÏÓÐÏÄÎÅ -28 Á×Ç õÓÐÅÎÉÅ ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ -11 ÓÅÎ õÓÅËÎÏ×ÅÎÉÅ ÇÌÁ×Ù éÏÁÎÎÁ ðÒÅÄÔÅÞÉ + 7 ÁÐÒ. âÌÁÇÏ×ÅÝÅÎÉÅ ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ + 7 ÉÀÌÑ òÏÖÄÅÓÔ×Ï éÏÁÎÎÁ ðÒÅÄÔÅÞÉ +12 ÉÀÌÑ äÅÎØ Ó×ÑÔÙÈ ÐÅÒ×Ï×ÅÒÈÏ×ÎÙÈ ÁÐÏÓÔÏÌÏ× ðÅÔÒÁ É ðÁ×ÌÁ +19 Á×Ç. ðÒÅÏÂÒÁÖÅÎÉÅ çÏÓÐÏÄÎÅ +28 Á×Ç. õÓÐÅÎÉÅ ðÒÅÓ×ÑÔÏÊ âÏÇÏÒÏÄÉÃÙ +11 ÓÅÎÔ. õÓÅËÎÏ×ÅÎÉÅ ÇÌÁ×Ù éÏÁÎÎÁ ðÒÅÄÔÅÞÉ #endif /* !_ru_RU_KOI8_R_orthodox_ */ Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.pagan =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.pagan (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.pagan (revision 303642) @@ -1,42 +1,42 @@ /* * ñÚÙÞÅÓËÉÅ ÐÒÁÚÄÎÉËÉ * * $FreeBSD$ */ #ifndef _ru_RU_KOI8_R_pagan_ #define _ru_RU_KOI8_R_pagan_ LANG=ru_RU.KOI8-R Paskha=ðÁÓÈÁ -21 ÄÅË* úÉÍÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ -25 ÄÅË ëÏÌÑÄÁ (ÓÄ×ÉÎÕÔÏÅ ÚÉÍÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ) - 6 ÑÎ× äÅÎØ ëÁÝÅÑ É ÷ÅÌÅÓÁ -24 ÆÅ× äÅÎØ ÷ÅÌÅÓÁ -29 ÆÅ× äÅÎØ ëÁÝÅÑ - 1 ÍÁÒ äÅÎØ íÁÒÅÎÙ -14 ÍÁÒ îÏ×ÙÊ çÏÄ, ï×ÓÅÎØ ÍÁÌÙÊ +21 ÄÅË.* úÉÍÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ +25 ÄÅË. ëÏÌÑÄÁ (ÓÄ×ÉÎÕÔÏÅ ÚÉÍÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ) + 6 ÑÎ×. äÅÎØ ëÁÝÅÑ É ÷ÅÌÅÓÁ +24 ÆÅ×Ò. äÅÎØ ÷ÅÌÅÓÁ +29 ÆÅ×Ò. äÅÎØ ëÁÝÅÑ + 1 ÍÁÒÔÁ äÅÎØ íÁÒÅÎÙ +14 ÍÁÒÔÁ îÏ×ÙÊ çÏÄ, ï×ÓÅÎØ ÍÁÌÙÊ ðÁÓÈÁ-55 íÁÓÌÅÎÉÃÁ ðÁÓÈÁ+7 ëÒÁÓÎÁÑ çÏÒËÁ ðÁÓÈÁ+16 òÁÄÕÎÉÃÁ -20 ÍÁÒ* ÷ÅÓÅÎÎÉÅ ÒÁ×ÎÏÄÅÎÓÔ×ÉÅ - 7 ÁÐÒ äÅÎØ íÁÒÅÎÙ (ÓÄ×ÉÎÕÔÏÅ ×ÅÓÅÎÎÅÅ ÒÁ×ÎÏÄÅÎÓÔ×ÉÅ) - 6 ÍÁÊ äÅÎØ äÁÖØÂÏÇÁ, ï×ÓÅÎØ ÂÏÌØÛÏÊ -22 ÍÁÊ ñÒÉÌÉÎ äÅÎØ -15 ÉÀÎ äÅÎØ ôÒÉÇÌÁ×Á -21 ÉÀÎ* ìÅÔÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ - 1 ÉÀÌ òÕÓÁÌØÎÁÑ îÅÄÅÌÑ - 7 ÉÀÌ ëÕÐÁÌÁ (ÓÄ×ÉÎÕÔÏÅ ÌÅÔÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ) -27 ÉÀÌ ïÔÂÏÒ ÖÅÒÔ× ðÅÒÕÎÕ, ÒÕÓÁÌÉÉ - 2 Á×Ç ðÅÒÕÎÏ× äÅÎØ -21 Á×Ç äÅÎØ óÔÒÉÂÏÇÁ -28 Á×Ç õÓÐÅÎÉÅ úÌÁÔÏÇÏÒËÉ -14 ÓÅÎ äÅÎØ ÷ÏÌÈÁ úÍÅÅ×ÉÞÁ -22 ÓÅÎ* ðÏ×ÏÒÏÔ Ë ÚÉÍÅ (ÏÓÅÎÎÅÅ ÒÁ×ÎÏÄÅÎÓÔ×ÉÅ) -10 ÎÏÑ äÅÎØ íÁËÏÛÉ -21 ÎÏÑ äÅÎØ ó×ÁÒÏÇÁ É óÅÍÁÒÇÌÁ - 9 ÄÅË äÅÎØ äÁÖØÂÏÇÁ É íÁÒÅÎÙ +20 ÍÁÒÔÁ* ÷ÅÓÅÎÎÅÅ ÒÁ×ÎÏÄÅÎÓÔ×ÉÅ + 7 ÁÐÒ. äÅÎØ íÁÒÅÎÙ (ÓÄ×ÉÎÕÔÏÅ ×ÅÓÅÎÎÅÅ ÒÁ×ÎÏÄÅÎÓÔ×ÉÅ) + 6 ÍÁÑ äÅÎØ äÁÖØÂÏÇÁ, ï×ÓÅÎØ ÂÏÌØÛÏÊ +22 ÍÁÑ ñÒÉÌÉÎ äÅÎØ +15 ÉÀÎÑ äÅÎØ ôÒÉÇÌÁ×Á +21 ÉÀÎÑ* ìÅÔÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ + 1 ÉÀÌÑ òÕÓÁÌØÎÁÑ îÅÄÅÌÑ + 7 ÉÀÌÑ ëÕÐÁÌÁ (ÓÄ×ÉÎÕÔÏÅ ÌÅÔÎÅÅ ÓÏÌÎÃÅÓÔÏÑÎÉÅ) +27 ÉÀÌÑ ïÔÂÏÒ ÖÅÒÔ× ðÅÒÕÎÕ, ÒÕÓÁÌÉÉ + 2 Á×Ç. ðÅÒÕÎÏ× äÅÎØ +21 Á×Ç. äÅÎØ óÔÒÉÂÏÇÁ +28 Á×Ç. õÓÐÅÎÉÅ úÌÁÔÏÇÏÒËÉ +14 ÓÅÎÔ. äÅÎØ ÷ÏÌÈÁ úÍÅÅ×ÉÞÁ +22 ÓÅÎÔ.* ðÏ×ÏÒÏÔ Ë ÚÉÍÅ (ÏÓÅÎÎÅÅ ÒÁ×ÎÏÄÅÎÓÔ×ÉÅ) +10 ÎÏÑÂ. äÅÎØ íÁËÏÛÉ +21 ÎÏÑÂ. äÅÎØ ó×ÁÒÏÇÁ É óÅÍÁÒÇÌÁ + 9 ÄÅË. äÅÎØ äÁÖØÂÏÇÁ É íÁÒÅÎÙ #endif /* !_ru_RU_KOI8_R_pagan_ */ Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.common =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.common (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.common (revision 303642) @@ -1,105 +1,105 @@ /* * РоÑÑийÑкие праздники * * $FreeBSD$ */ #ifndef _ru_RU_UTF_8_common_ #define _ru_RU_UTF_8_common_ LANG=ru_RU.UTF-8 12 Ñнв. День работника прокуратуры 13 Ñнв. День роÑÑийÑкой печати 14 Ñнв. Старый Ðовый год 21 Ñнв. День инженерных войÑк 25 Ñнв. ТатьÑнин день. СтуденчеÑкий праздник 8 февр. День роÑÑийÑкой науки 10 февр. День дипломатичеÑкого работника 1 марта Ð’Ñемирный день гражданÑкой обороны 03/SunSecond День работников геодезии и картографии 11 марта День работника органов Ð½Ð°Ñ€ÐºÐ¾ÐºÐ¾Ð½Ñ‚Ñ€Ð¾Ð»Ñ 18 марта День налоговой полиции 03/SunThird День работников торговли, бытового обÑÐ»ÑƒÐ¶Ð¸Ð²Ð°Ð½Ð¸Ñ Ð½Ð°ÑÐµÐ»ÐµÐ½Ð¸Ñ Ð¸ жилищно-коммунального хозÑйÑтва 27 марта Международный день театра 27 марта День внутренних войÑк 1 апр. День Ñмеха 2 апр. День ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ð½Ð°Ñ€Ð¾Ð´Ð¾Ð² 04/SunFirst День геолога 12 апр. День коÑмонавтики 04/SunSecond День войÑк противовоздушной обороны 26 апр. День памÑти погибших в радиационных авариÑÑ… и катаÑтрофах 30 апр. День пожарной охраны 7 Ð¼Ð°Ñ Ð”ÐµÐ½ÑŒ радио 17 Ð¼Ð°Ñ ÐœÐµÐ¶Ð´ÑƒÐ½Ð°Ñ€Ð¾Ð´Ð½Ñ‹Ð¹ день телекоммуникаций 18 Ð¼Ð°Ñ ÐœÐµÐ¶Ð´ÑƒÐ½Ð°Ñ€Ð¾Ð´Ð½Ñ‹Ð¹ день музеев 24 Ð¼Ð°Ñ Ð”ÐµÐ½ÑŒ ÑлавÑнÑкой пиÑьменноÑти и культуры 26 Ð¼Ð°Ñ Ð”ÐµÐ½ÑŒ роÑÑийÑкого предпринимательÑтва 27 Ð¼Ð°Ñ ÐžÐ±Ñ‰ÐµÑ€Ð¾ÑÑийÑкий день библиотек 28 Ð¼Ð°Ñ Ð”ÐµÐ½ÑŒ пограничника 30 Ð¼Ð°Ñ Ð”ÐµÐ½ÑŒ пожарной охраны 31 Ð¼Ð°Ñ Ð”ÐµÐ½ÑŒ РоÑÑийÑкой Ðдвокатуры 05/SunLast День химика 1 Ð¸ÑŽÐ½Ñ Ð”ÐµÐ½ÑŒ защиты детей 5 Ð¸ÑŽÐ½Ñ Ð”ÐµÐ½ÑŒ Ñколога 6 Ð¸ÑŽÐ½Ñ ÐŸÑƒÑˆÐºÐ¸Ð½Ñкий день 8 Ð¸ÑŽÐ½Ñ Ð”ÐµÐ½ÑŒ Ñоциального работника 06/SunSecond День работников легкой промышленноÑти 06/SunThird День медицинÑкого работника 22 Ð¸ÑŽÐ½Ñ Ð”ÐµÐ½ÑŒ памÑти и Ñкорби (Ðачало Великой ОтечеÑтвенной Войны, 1941 год) 27 Ð¸ÑŽÐ½Ñ Ð”ÐµÐ½ÑŒ молодежи 29 Ð¸ÑŽÐ½Ñ Ð”ÐµÐ½ÑŒ партизан и подпольщиков 06/SatLast День Ð¸Ð·Ð¾Ð±Ñ€ÐµÑ‚Ð°Ñ‚ÐµÐ»Ñ Ð¸ рационализатора 07/SunFirst День работников морÑкого и речного флота 07/SunSecond День рыбака 07/SunSecond День роÑÑийÑкой почты 07/SunThird День металлурга 07/SunLast День Военно-МорÑкого Флота 28 Ð¸ÑŽÐ»Ñ Ð”ÐµÐ½ÑŒ ÐºÑ€ÐµÑ‰ÐµÐ½Ð¸Ñ Ð ÑƒÑи 6 авг. День железнодорожных войÑк 08/SunFirst День железнодорожника 12 авг. День военно-воздушных Ñил 08/SunSecond День ÑÑ‚Ñ€Ð¾Ð¸Ñ‚ÐµÐ»Ñ 08/SunThird День Воздушного Флота 22 авг. День гоÑударÑтвенного флага 27 авг. День кино 08/SunLast День шахтера 1 Ñент. День знаний 2 Ñент. День роÑÑийÑкой гвардии 3 Ñент. День ÑолидарноÑти в борьбе Ñ Ñ‚ÐµÑ€Ñ€Ð¾Ñ€Ð¸Ð·Ð¼Ð¾Ð¼ 4 Ñент. День ÑпециалиÑта по Ñдерному обеÑпечению 09/SunFirst День работников нефтÑной и газовой промышленноÑти 09/SunSecond День танкиÑта 09/SunThird День работников леÑа 28 Ñент. День работника атомной промышленноÑти 09/SunLast День машиноÑÑ‚Ñ€Ð¾Ð¸Ñ‚ÐµÐ»Ñ 1 окт. День пожилых людей 1 окт. День Ñухопутных войÑк 4 окт. День коÑмичеÑких войÑк 5 окт. День ÑƒÑ‡Ð¸Ñ‚ÐµÐ»Ñ 14 окт. Международный день Ñтандартизации 10/SunSecond День работников ÑельÑкого хозÑйÑтва и перерабатывающей промышленноÑти 10/SunThird День работников дорожного хозÑйÑтва 24 окт. Международный день ООР25 окт. День таможенника 30 окт. День памÑти жертв политичеÑких репреÑÑий 10/SunLast День работников автомобильного транÑпорта - 7 ноÑб. День окт.ÑбрьÑкой революции 1917 года + 7 ноÑб. День октÑбрьÑкой революции 1917 года 9 ноÑб. Ð’Ñемирный день качеÑтва 10 ноÑб. День милиции 16 ноÑб. День морÑкой пехоты 17 ноÑб. Международный день Ñтудентов 19 ноÑб. День ракетных войÑк и артиллерии 21 ноÑб. День работников налоговых органов 26 ноÑб. Ð’Ñемирный день информации 11/SunLast День матери 1 дек. Ð’Ñемирный день борьбы Ñо СПИДом 3 дек. День юриÑта 9 дек. День Героев ОтечеÑтва 12 дек. День КонÑтитуции 17 дек. День ракетных войÑк ÑтратегичеÑкого Ð½Ð°Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ 20 дек. День работника органов безопаÑноÑти 22 дек. День Ñнергетика 27 дек. День ÑпаÑÐ°Ñ‚ÐµÐ»Ñ #endif /* !_ru_RU_UTF_8_common_ */ Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military (revision 303642) @@ -1,28 +1,28 @@ /* * Дни воинÑкой Ñлавы РоÑÑии * * $FreeBSD$ */ #ifndef _ru_RU_UTF_8_military_ #define _ru_RU_UTF_8_military_ LANG=ru_RU.UTF-8 27 Ñнв. День ÑнÑÑ‚Ð¸Ñ Ð±Ð»Ð¾ÐºÐ°Ð´Ñ‹ города Ленинграда (1944 год) 2 февр. День разгрома ÑоветÑкими войÑками немецко-фашиÑÑ‚Ñких войÑк в СталинградÑкой битве (1943 год) 23 февр. День победы КраÑной Ðрмии над кайзеровÑкими войÑками Германии (1918 год) 18 апр. День победы руÑÑких воинов кнÑÐ·Ñ ÐлекÑандра ÐевÑкого над немецкими рыцарÑми на ЧудÑком озере (Ледовое побоище, 1242 год) 10 Ð¸ÑŽÐ»Ñ Ð”ÐµÐ½ÑŒ победы руÑÑкой армии под командованием Петра Первого над шведами в ПолтавÑком Ñражении (1709 год) 9 авг. День первой в роÑÑийÑкой иÑтории морÑкой победы руÑÑкого флота под командованием Петра Первого над шведами у мыÑа Гангут (1714 год) 23 авг. День разгрома ÑоветÑкими войÑками немецко-фашиÑÑ‚Ñких войÑк в КурÑкой битве (1943 год) 2 Ñент. День Ð¾ÐºÐ¾Ð½Ñ‡Ð°Ð½Ð¸Ñ Ð’Ñ‚Ð¾Ñ€Ð¾Ð¹ мировой войны (1945 год) 8 Ñент. День БородинÑкого ÑÑ€Ð°Ð¶ÐµÐ½Ð¸Ñ Ñ€ÑƒÑÑкой армии под командованием Ðœ.И. Кутузова Ñ Ñ„Ñ€Ð°Ð½Ñ†ÑƒÐ·Ñкой армией (1812 год) 11 Ñент. День победы руÑÑкой ÑÑкадры под командованием Ф.Ф. Ушакова над турецкой ÑÑкадрой у мыÑа Тендра (1790 год) 21 Ñент. День победы руÑÑких полков во главе Ñ Ð²ÐµÐ»Ð¸ÐºÐ¸Ð¼ кнÑзем Дмитрием ДонÑким над монголо-татарÑкими войÑками в КуликовÑкой битве (1380 год) - 7 ноÑб. День оÑÐ²Ð¾Ð±Ð¾Ð¶Ð´ÐµÐ½Ð¸Ñ ÐœÐ¾Ñквы Ñилами народного Ð¾Ð¿Ð¾Ð»Ñ‡ÐµÐ½Ð¸Ñ Ð¿Ð¾Ð´ руководÑтвом Кузьмы Минина и Ð”Ð¼Ð¸Ñ‚Ñ€Ð¸Ñ ÐŸÐ¾Ð¶Ð°Ñ€Ñкого от польÑких интервентов (1612 год) + 4 ноÑб. День оÑÐ²Ð¾Ð±Ð¾Ð¶Ð´ÐµÐ½Ð¸Ñ ÐœÐ¾Ñквы Ñилами народного Ð¾Ð¿Ð¾Ð»Ñ‡ÐµÐ½Ð¸Ñ Ð¿Ð¾Ð´ руководÑтвом Кузьмы Минина и Ð”Ð¼Ð¸Ñ‚Ñ€Ð¸Ñ ÐŸÐ¾Ð¶Ð°Ñ€Ñкого от польÑких интервентов (1612 год) 1 дек. День победы руÑÑкой ÑÑкадры под командованием П.С. Ðахимова над турецкой ÑÑкадрой у мыÑа Синоп (1853 год) 5 дек. День начала контрнаÑÑ‚ÑƒÐ¿Ð»ÐµÐ½Ð¸Ñ ÑоветÑких войÑк против немецко-фашиÑÑ‚Ñких войÑк в битве под МоÑквой (1941 год) 24 дек. День взÑÑ‚Ð¸Ñ Ñ‚ÑƒÑ€ÐµÑ†ÐºÐ¾Ð¹ крепоÑти Измаил руÑÑкими войÑками под командованием Ð.Ð’. Суворова (1790 год) #endif /* !_ru_RU_UTF_8_military_ */ Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.pagan =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.pagan (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.pagan (revision 303642) @@ -1,42 +1,42 @@ /* * ЯзычеÑкие праздники * * $FreeBSD$ */ #ifndef _ru_RU_UTF_8_pagan_ #define _ru_RU_UTF_8_pagan_ LANG=ru_RU.UTF-8 Paskha=ПаÑха 21 дек.* Зимнее ÑолнцеÑтоÑние 25 дек. КолÑда (Ñдвинутое зимнее ÑолнцеÑтоÑние) 6 Ñнв. День ÐšÐ°Ñ‰ÐµÑ Ð¸ ВелеÑа 24 февр. День ВелеÑа 29 февр. День ÐšÐ°Ñ‰ÐµÑ 1 марта День Марены 14 марта Ðовый Год, ОвÑень малый ПаÑха-55 МаÑленица ПаÑха+7 КраÑÐ½Ð°Ñ Ð“Ð¾Ñ€ÐºÐ° ПаÑха+16 Радуница 20 марта* ВеÑеннее равноденÑтвие 7 апр. День Марены (Ñдвинутое веÑеннее равноденÑтвие) 6 Ð¼Ð°Ñ Ð”ÐµÐ½ÑŒ Дажьбога, ОвÑень большой 22 Ð¼Ð°Ñ Ð¯Ñ€Ð¸Ð»Ð¸Ð½ День 15 Ð¸ÑŽÐ½Ñ Ð”ÐµÐ½ÑŒ Триглава 21 июнÑ* Летнее ÑолнцеÑтоÑние 1 Ð¸ÑŽÐ»Ñ Ð ÑƒÑÐ°Ð»ÑŒÐ½Ð°Ñ ÐÐµÐ´ÐµÐ»Ñ 7 Ð¸ÑŽÐ»Ñ ÐšÑƒÐ¿Ð°Ð»Ð° (Ñдвинутое летнее ÑолнцеÑтоÑние) 27 Ð¸ÑŽÐ»Ñ ÐžÑ‚Ð±Ð¾Ñ€ жертв Перуну, руÑалии 2 авг. Перунов День 21 авг. День Стрибога 28 авг. УÑпение Златогорки 14 Ñент. День Волха Змеевича 22 Ñент.* Поворот к зиме (оÑеннее равноденÑтвие) 10 ноÑб. День Макоши -21 ноÑб. День Сварога и Семартагла +21 ноÑб. День Сварога и Семаргла 9 дек. День Дажьбога и Марены #endif /* !_ru_RU_UTF_8_pagan_ */ Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/io.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/calendar/io.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/calendar/io.c (revision 303642) @@ -1,503 +1,502 @@ /*- * Copyright (c) 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1989, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif #if 0 #ifndef lint static char sccsid[] = "@(#)calendar.c 8.3 (Berkeley) 3/25/94"; #endif #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include #include "pathnames.h" #include "calendar.h" enum { T_OK = 0, T_ERR, T_PROCESS, }; const char *calendarFile = "calendar"; /* default calendar file */ static const char *calendarHomes[] = {".calendar", _PATH_INCLUDE}; /* HOME */ static const char *calendarNoMail = "nomail";/* don't sent mail if file exist */ static char path[MAXPATHLEN]; struct fixs neaster, npaskha, ncny, nfullmoon, nnewmoon; struct fixs nmarequinox, nsepequinox, njunsolstice, ndecsolstice; static int cal_parse(FILE *in, FILE *out); static StringList *definitions = NULL; static struct event *events[MAXCOUNT]; static char *extradata[MAXCOUNT]; static void trimlr(char **buf) { char *walk = *buf; char *last; while (isspace(*walk)) walk++; if (*walk != '\0') { last = walk + strlen(walk) - 1; while (last > walk && isspace(*last)) last--; *(last+1) = 0; } *buf = walk; } static FILE * cal_fopen(const char *file) { FILE *fp; char *home = getenv("HOME"); unsigned int i; if (home == NULL || *home == '\0') { warnx("Cannot get home directory"); return (NULL); } if (chdir(home) != 0) { warnx("Cannot enter home directory"); return (NULL); } - for (i = 0; i < sizeof(calendarHomes)/sizeof(calendarHomes[0]) ; i++) { + for (i = 0; i < nitems(calendarHomes); i++) { if (chdir(calendarHomes[i]) != 0) continue; if ((fp = fopen(file, "r")) != NULL) return (fp); } warnx("can't open calendar file \"%s\"", file); return (NULL); } static int token(char *line, FILE *out, bool *skip) { char *walk, c, a; if (strncmp(line, "endif", 5) == 0) { *skip = false; return (T_OK); } if (*skip) return (T_OK); if (strncmp(line, "include", 7) == 0) { walk = line + 7; trimlr(&walk); if (*walk == '\0') { warnx("Expecting arguments after #include"); return (T_ERR); } if (*walk != '<' && *walk != '\"') { warnx("Excecting '<' or '\"' after #include"); return (T_ERR); } a = *walk; walk++; c = walk[strlen(walk) - 1]; switch(c) { case '>': if (a != '<') { warnx("Unterminated include expecting '\"'"); return (T_ERR); } break; case '\"': if (a != '\"') { warnx("Unterminated include expecting '>'"); return (T_ERR); } break; default: warnx("Unterminated include expecting '%c'", a == '<' ? '>' : '\"' ); return (T_ERR); } walk[strlen(walk) - 1] = '\0'; if (cal_parse(cal_fopen(walk), out)) return (T_ERR); return (T_OK); } if (strncmp(line, "define", 6) == 0) { if (definitions == NULL) definitions = sl_init(); walk = line + 6; trimlr(&walk); if (*walk == '\0') { warnx("Expecting arguments after #define"); return (T_ERR); } sl_add(definitions, strdup(walk)); return (T_OK); } if (strncmp(line, "ifndef", 6) == 0) { walk = line + 6; trimlr(&walk); if (*walk == '\0') { warnx("Expecting arguments after #ifndef"); return (T_ERR); } if (definitions != NULL && sl_find(definitions, walk) != NULL) *skip = true; return (T_OK); } return (T_PROCESS); } #define REPLACE(string, slen, struct_) \ if (strncasecmp(buf, (string), (slen)) == 0 && buf[(slen)]) { \ if (struct_.name != NULL) \ free(struct_.name); \ if ((struct_.name = strdup(buf + (slen))) == NULL) \ errx(1, "cannot allocate memory"); \ struct_.len = strlen(buf + (slen)); \ continue; \ } static int cal_parse(FILE *in, FILE *out) { char *line = NULL; char *buf; size_t linecap = 0; ssize_t linelen; ssize_t l; static int d_first = -1; static int count = 0; int i; int month[MAXCOUNT]; int day[MAXCOUNT]; int year[MAXCOUNT]; bool skip = false; char dbuf[80]; char *pp, p; struct tm tm; int flags; /* Unused */ tm.tm_sec = 0; tm.tm_min = 0; tm.tm_hour = 0; tm.tm_wday = 0; if (in == NULL) return (1); while ((linelen = getline(&line, &linecap, in)) > 0) { if (*line == '#') { switch (token(line+1, out, &skip)) { case T_ERR: free(line); return (1); case T_OK: continue; case T_PROCESS: break; default: break; } } if (skip) continue; buf = line; for (l = linelen; l > 0 && isspace((unsigned char)buf[l - 1]); l--) ; buf[l] = '\0'; if (buf[0] == '\0') continue; /* Parse special definitions: LANG, Easter, Paskha etc */ if (strncmp(buf, "LANG=", 5) == 0) { (void)setlocale(LC_ALL, buf + 5); d_first = (*nl_langinfo(D_MD_ORDER) == 'd'); setnnames(); continue; } REPLACE("Easter=", 7, neaster); REPLACE("Paskha=", 7, npaskha); REPLACE("ChineseNewYear=", 15, ncny); REPLACE("NewMoon=", 8, nnewmoon); REPLACE("FullMoon=", 9, nfullmoon); REPLACE("MarEquinox=", 11, nmarequinox); REPLACE("SepEquinox=", 11, nsepequinox); REPLACE("JunSolstice=", 12, njunsolstice); REPLACE("DecSolstice=", 12, ndecsolstice); if (strncmp(buf, "SEQUENCE=", 9) == 0) { setnsequences(buf + 9); continue; } /* * If the line starts with a tab, the data has to be * added to the previous line */ if (buf[0] == '\t') { for (i = 0; i < count; i++) event_continue(events[i], buf); continue; } /* Get rid of leading spaces (non-standard) */ while (isspace((unsigned char)buf[0])) memcpy(buf, buf + 1, strlen(buf)); /* No tab in the line, then not a valid line */ if ((pp = strchr(buf, '\t')) == NULL) continue; /* Trim spaces in front of the tab */ while (isspace((unsigned char)pp[-1])) pp--; p = *pp; *pp = '\0'; if ((count = parsedaymonth(buf, year, month, day, &flags, extradata)) == 0) continue; *pp = p; if (count < 0) { /* Show error status based on return value */ if (debug) fprintf(stderr, "Ignored: %s\n", buf); if (count == -1) continue; count = -count + 1; } /* Find the last tab */ while (pp[1] == '\t') pp++; if (d_first < 0) d_first = (*nl_langinfo(D_MD_ORDER) == 'd'); for (i = 0; i < count; i++) { tm.tm_mon = month[i] - 1; tm.tm_mday = day[i]; tm.tm_year = year[i] - 1900; (void)strftime(dbuf, sizeof(dbuf), d_first ? "%e %b" : "%b %e", &tm); if (debug) fprintf(stderr, "got %s\n", pp); events[i] = event_add(year[i], month[i], day[i], dbuf, ((flags &= F_VARIABLE) != 0) ? 1 : 0, pp, extradata[i]); } } free(line); fclose(in); return (0); } void cal(void) { FILE *fpin; FILE *fpout; int i; for (i = 0; i < MAXCOUNT; i++) extradata[i] = (char *)calloc(1, 20); if ((fpin = opencalin()) == NULL) return; if ((fpout = opencalout()) == NULL) { fclose(fpin); return; } if (cal_parse(fpin, fpout)) return; event_print_all(fpout); closecal(fpout); } FILE * opencalin(void) { struct stat sbuf; FILE *fpin; /* open up calendar file */ if ((fpin = fopen(calendarFile, "r")) == NULL) { if (doall) { if (chdir(calendarHomes[0]) != 0) return (NULL); if (stat(calendarNoMail, &sbuf) == 0) return (NULL); if ((fpin = fopen(calendarFile, "r")) == NULL) return (NULL); } else { fpin = cal_fopen(calendarFile); } } return (fpin); } FILE * opencalout(void) { int fd; /* not reading all calendar files, just set output to stdout */ if (!doall) return (stdout); /* set output to a temporary file, so if no output don't send mail */ snprintf(path, sizeof(path), "%s/_calXXXXXX", _PATH_TMP); if ((fd = mkstemp(path)) < 0) return (NULL); return (fdopen(fd, "w+")); } void closecal(FILE *fp) { uid_t uid; struct stat sbuf; int nread, pdes[2], status; char buf[1024]; if (!doall) return; rewind(fp); if (fstat(fileno(fp), &sbuf) || !sbuf.st_size) goto done; if (pipe(pdes) < 0) goto done; switch (fork()) { case -1: /* error */ (void)close(pdes[0]); (void)close(pdes[1]); goto done; case 0: /* child -- set stdin to pipe output */ if (pdes[0] != STDIN_FILENO) { (void)dup2(pdes[0], STDIN_FILENO); (void)close(pdes[0]); } (void)close(pdes[1]); uid = geteuid(); if (setuid(getuid()) < 0) { warnx("setuid failed"); _exit(1); } if (setgid(getegid()) < 0) { warnx("setgid failed"); _exit(1); } if (setuid(uid) < 0) { warnx("setuid failed"); _exit(1); } execl(_PATH_SENDMAIL, "sendmail", "-i", "-t", "-F", "\"Reminder Service\"", (char *)NULL); warn(_PATH_SENDMAIL); _exit(1); } /* parent -- write to pipe input */ (void)close(pdes[0]); write(pdes[1], "From: \"Reminder Service\" <", 26); write(pdes[1], pw->pw_name, strlen(pw->pw_name)); write(pdes[1], ">\nTo: <", 7); write(pdes[1], pw->pw_name, strlen(pw->pw_name)); write(pdes[1], ">\nSubject: ", 11); write(pdes[1], dayname, strlen(dayname)); write(pdes[1], "'s Calendar\nPrecedence: bulk\n\n", 30); while ((nread = read(fileno(fp), buf, sizeof(buf))) > 0) (void)write(pdes[1], buf, nread); (void)close(pdes[1]); done: (void)fclose(fp); (void)unlink(path); while (wait(&status) >= 0); } Index: user/alc/PQ_LAUNDRY/usr.bin/checknr/checknr.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/checknr/checknr.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/checknr/checknr.c (revision 303642) @@ -1,647 +1,646 @@ /* * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #if 0 #ifndef lint static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * checknr: check an nroff/troff input file for matching macro calls. * we also attempt to match size and font changes, but only the embedded * kind. These must end in \s0 and \fP resp. Maybe more sophistication * later but for now think of these restrictions as contributions to * structured typesetting. */ #include -#define _WITH_GETLINE #include #include #include #include #define MAXSTK 100 /* Stack size */ #define MAXBR 100 /* Max number of bracket pairs known */ #define MAXCMDS 600 /* Max number of commands known */ static void addcmd(char *); static void addmac(const char *); static int binsrch(const char *); static void checkknown(const char *); static void chkcmd(const char *, const char *); static void complain(int); static int eq(const char *, const char *); static void nomatch(const char *); static void pe(int); static void process(FILE *); static void prop(int); static void usage(void); /* * The stack on which we remember what we've seen so far. */ static struct stkstr { int opno; /* number of opening bracket */ int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ int parm; /* parm to size, font, etc */ int lno; /* line number */ } stk[MAXSTK]; static int stktop; /* * The kinds of opening and closing brackets. */ static struct brstr { const char *opbr; const char *clbr; } br[MAXBR] = { /* A few bare bones troff commands */ #define SZ 0 {"sz", "sz"}, /* also \s */ #define FT 1 {"ft", "ft"}, /* also \f */ /* the -mm package */ {"AL", "LE"}, {"AS", "AE"}, {"BL", "LE"}, {"BS", "BE"}, {"DF", "DE"}, {"DL", "LE"}, {"DS", "DE"}, {"FS", "FE"}, {"ML", "LE"}, {"NS", "NE"}, {"RL", "LE"}, {"VL", "LE"}, /* the -ms package */ {"AB", "AE"}, {"BD", "DE"}, {"CD", "DE"}, {"DS", "DE"}, {"FS", "FE"}, {"ID", "DE"}, {"KF", "KE"}, {"KS", "KE"}, {"LD", "DE"}, {"LG", "NL"}, {"QS", "QE"}, {"RS", "RE"}, {"SM", "NL"}, {"XA", "XE"}, {"XS", "XE"}, /* The -me package */ {"(b", ")b"}, {"(c", ")c"}, {"(d", ")d"}, {"(f", ")f"}, {"(l", ")l"}, {"(q", ")q"}, {"(x", ")x"}, {"(z", ")z"}, /* The -mdoc package */ {"Ao", "Ac"}, {"Bd", "Ed"}, {"Bk", "Ek"}, {"Bo", "Bc"}, {"Do", "Dc"}, {"Fo", "Fc"}, {"Oo", "Oc"}, {"Po", "Pc"}, {"Qo", "Qc"}, {"Rs", "Re"}, {"So", "Sc"}, {"Xo", "Xc"}, /* Things needed by preprocessors */ {"EQ", "EN"}, {"TS", "TE"}, /* Refer */ {"[", "]"}, {0, 0} }; /* * All commands known to nroff, plus macro packages. * Used so we can complain about unrecognized commands. */ static const char *knowncmds[MAXCMDS] = { "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B", "B" , "B1", "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", "Cd", "Cm", "D", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx", "H", "H" , "HC", "HD", "HM", "HO", "HU", "I", "I" , "ID", "IE", "IH", "IM", "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", "Os", "Ot", "Ox", "P", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", "Qo", "Qq", "R", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "Re", "Rs", "S", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy", "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", "Xr", "[", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "\\{", "\\}", "]", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "chop", "cs", "ct", "cu", "da", "de", "di", "dl", "dn", "do", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", "em", "eo", "ep", "ev", "evc", "ex", "fallback", "fc", "feature", "fi", "fl", "flig", "fo", "fp", "ft", "ftr", "fz", "fzoom", "hc", "he", "hidechar", "hl", "hp", "ht", "hw", "hx", "hy", "hylang", "i", "i" , "ie", "if", "ig", "in", "ip", "it", "ix", "kern", "kernafter", "kernbefore", "kernpair", "lc", "lc_ctype", "lg", "lhang", "li", "ll", "ln", "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", "of", "oh", "os", "pa", "papersize", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", "q", "q" , "r", "r" , "rb", "rd", "re", "recursionlimit", "return", "rhang", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "shift", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "track", "u", "uf", "uh", "ul", "vs", "wh", "xflag", "xp", "yr", 0 }; static int lineno; /* current line number in input file */ static const char *cfilename; /* name of current file */ static int nfiles; /* number of files to process */ static int fflag; /* -f: ignore \f */ static int sflag; /* -s: ignore \s */ static int ncmds; /* size of knowncmds */ static int slot; /* slot in knowncmds found by binsrch */ int main(int argc, char **argv) { FILE *f; int i; char *cp; char b1[4]; /* Figure out how many known commands there are */ while (knowncmds[ncmds]) ncmds++; while (argc > 1 && argv[1][0] == '-') { switch(argv[1][1]) { /* -a: add pairs of macros */ case 'a': i = strlen(argv[1]) - 2; if (i % 6 != 0) usage(); /* look for empty macro slots */ for (i=0; br[i].opbr; i++) ; for (cp=argv[1]+3; cp[-1]; cp += 6) { char *tmp; if (i >= MAXBR) errx(1, "too many pairs"); if ((tmp = malloc(3)) == NULL) err(1, "malloc"); strlcpy(tmp, cp, 3); br[i].opbr = tmp; if ((tmp = malloc(3)) == NULL) err(1, "malloc"); strlcpy(tmp, cp+3, 3); br[i].clbr = tmp; addmac(br[i].opbr); /* knows pairs are also known cmds */ addmac(br[i].clbr); i++; } break; /* -c: add known commands */ case 'c': i = strlen(argv[1]) - 2; if (i % 3 != 0) usage(); for (cp=argv[1]+3; cp[-1]; cp += 3) { if (cp[2] && cp[2] != '.') usage(); strncpy(b1, cp, 2); b1[2] = '\0'; addmac(b1); } break; /* -f: ignore font changes */ case 'f': fflag = 1; break; /* -s: ignore size changes */ case 's': sflag = 1; break; default: usage(); } argc--; argv++; } nfiles = argc - 1; if (nfiles > 0) { for (i = 1; i < argc; i++) { cfilename = argv[i]; f = fopen(cfilename, "r"); if (f == NULL) warn("%s", cfilename); else { process(f); fclose(f); } } } else { cfilename = "stdin"; process(stdin); } exit(0); } static void usage(void) { fprintf(stderr, "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n"); exit(1); } static void process(FILE *f) { int i, n; char mac[64]; /* The current macro or nroff command */ char *line; size_t linecap; int pl; line = NULL; linecap = 0; stktop = -1; for (lineno = 1; getline(&line, &linecap, f) > 0; lineno++) { if (line[0] == '.') { /* * find and isolate the macro/command name. */ strncpy(mac, line+1, 4); if (isspace(mac[0])) { pe(lineno); printf("Empty command\n"); } else if (isspace(mac[1])) { mac[1] = 0; } else if (isspace(mac[2])) { mac[2] = 0; } else if (mac[0] != '\\' || mac[1] != '\"') { pe(lineno); printf("Command too long\n"); } /* * Is it a known command? */ checkknown(mac); /* * Should we add it? */ if (eq(mac, "de")) addcmd(line); chkcmd(line, mac); } /* * At this point we process the line looking * for \s and \f. */ for (i = 0; line[i]; i++) if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) { if (!sflag && line[++i] == 's') { pl = line[++i]; if (isdigit(pl)) { n = pl - '0'; pl = ' '; } else n = 0; while (isdigit(line[++i])) n = 10 * n + line[i] - '0'; i--; if (n == 0) { if (stktop >= 0 && stk[stktop].opno == SZ) { stktop--; } else { pe(lineno); printf("unmatched \\s0\n"); } } else { stk[++stktop].opno = SZ; stk[stktop].pl = pl; stk[stktop].parm = n; stk[stktop].lno = lineno; } } else if (!fflag && line[i] == 'f') { n = line[++i]; if (n == 'P') { if (stktop >= 0 && stk[stktop].opno == FT) { stktop--; } else { pe(lineno); printf("unmatched \\fP\n"); } } else { stk[++stktop].opno = FT; stk[stktop].pl = 1; stk[stktop].parm = n; stk[stktop].lno = lineno; } } } } free(line); /* * We've hit the end and look at all this stuff that hasn't been * matched yet! Complain, complain. */ for (i = stktop; i >= 0; i--) { complain(i); } } static void complain(int i) { pe(stk[i].lno); printf("Unmatched "); prop(i); printf("\n"); } static void prop(int i) { if (stk[i].pl == 0) printf(".%s", br[stk[i].opno].opbr); else switch(stk[i].opno) { case SZ: printf("\\s%c%d", stk[i].pl, stk[i].parm); break; case FT: printf("\\f%c", stk[i].parm); break; default: printf("Bug: stk[%d].opno = %d = .%s, .%s", i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); } } static void chkcmd(const char *line __unused, const char *mac) { int i; /* * Check to see if it matches top of stack. */ if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) stktop--; /* OK. Pop & forget */ else { /* No. Maybe it's an opener */ for (i=0; br[i].opbr; i++) { if (eq(mac, br[i].opbr)) { /* Found. Push it. */ stktop++; stk[stktop].opno = i; stk[stktop].pl = 0; stk[stktop].parm = 0; stk[stktop].lno = lineno; break; } /* * Maybe it's an unmatched closer. * NOTE: this depends on the fact * that none of the closers can be * openers too. */ if (eq(mac, br[i].clbr)) { nomatch(mac); break; } } } } static void nomatch(const char *mac) { int i, j; /* * Look for a match further down on stack * If we find one, it suggests that the stuff in * between is supposed to match itself. */ for (j=stktop; j>=0; j--) if (eq(mac,br[stk[j].opno].clbr)) { /* Found. Make a good diagnostic. */ if (j == stktop-2) { /* * Check for special case \fx..\fR and don't * complain. */ if (stk[j+1].opno==FT && stk[j+1].parm!='R' && stk[j+2].opno==FT && stk[j+2].parm=='R') { stktop = j -1; return; } /* * We have two unmatched frobs. Chances are * they were intended to match, so we mention * them together. */ pe(stk[j+1].lno); prop(j+1); printf(" does not match %d: ", stk[j+2].lno); prop(j+2); printf("\n"); } else for (i=j+1; i <= stktop; i++) { complain(i); } stktop = j-1; return; } /* Didn't find one. Throw this away. */ pe(lineno); printf("Unmatched .%s\n", mac); } /* eq: are two strings equal? */ static int eq(const char *s1, const char *s2) { return (strcmp(s1, s2) == 0); } /* print the first part of an error message, given the line number */ static void pe(int linen) { if (nfiles > 1) printf("%s: ", cfilename); printf("%d: ", linen); } static void checkknown(const char *mac) { if (eq(mac, ".")) return; if (binsrch(mac) >= 0) return; if (mac[0] == '\\' && mac[1] == '"') /* comments */ return; pe(lineno); printf("Unknown command: .%s\n", mac); } /* * We have a .de xx line in "line". Add xx to the list of known commands. */ static void addcmd(char *line) { char *mac; /* grab the macro being defined */ mac = line+4; while (isspace(*mac)) mac++; if (*mac == 0) { pe(lineno); printf("illegal define: %s\n", line); return; } mac[2] = 0; if (isspace(mac[1]) || mac[1] == '\\') mac[1] = 0; if (ncmds >= MAXCMDS) { printf("Only %d known commands allowed\n", MAXCMDS); exit(1); } addmac(mac); } /* * Add mac to the list. We should really have some kind of tree * structure here but this is a quick-and-dirty job and I just don't * have time to mess with it. (I wonder if this will come back to haunt * me someday?) Anyway, I claim that .de is fairly rare in user * nroff programs, and the register loop below is pretty fast. */ static void addmac(const char *mac) { const char **src, **dest, **loc; if (binsrch(mac) >= 0){ /* it's OK to redefine something */ #ifdef DEBUG printf("binsrch(%s) -> already in table\n", mac); #endif return; } /* binsrch sets slot as a side effect */ #ifdef DEBUG printf("binsrch(%s) -> %d\n", mac, slot); #endif loc = &knowncmds[slot]; src = &knowncmds[ncmds-1]; dest = src+1; while (dest > loc) *dest-- = *src--; if ((*loc = strdup(mac)) == NULL) err(1, "strdup"); ncmds++; #ifdef DEBUG printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); #endif } /* * Do a binary search in knowncmds for mac. * If found, return the index. If not, return -1. */ static int binsrch(const char *mac) { const char *p; /* pointer to current cmd in list */ int d; /* difference if any */ int mid; /* mid point in binary search */ int top, bot; /* boundaries of bin search, inclusive */ top = ncmds-1; bot = 0; while (top >= bot) { mid = (top+bot)/2; p = knowncmds[mid]; d = p[0] - mac[0]; if (d == 0) d = p[1] - mac[1]; if (d == 0) return (mid); if (d < 0) bot = mid + 1; else top = mid - 1; } slot = bot; /* place it would have gone */ return (-1); } Index: user/alc/PQ_LAUNDRY/usr.bin/comm/comm.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/comm/comm.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/comm/comm.c (revision 303642) @@ -1,249 +1,248 @@ /* * Copyright (c) 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Case Larsen. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1989, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif #if 0 #ifndef lint static char sccsid[] = "From: @(#)comm.c 8.4 (Berkeley) 5/4/95"; #endif #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include #include static int iflag; static const char *tabs[] = { "", "\t", "\t\t" }; static FILE *file(const char *); static wchar_t *convert(const char *); static void show(FILE *, const char *, const char *, char **, size_t *); static void usage(void); int main(int argc, char *argv[]) { int comp, read1, read2; int ch, flag1, flag2, flag3; FILE *fp1, *fp2; const char *col1, *col2, *col3; size_t line1len, line2len; char *line1, *line2; ssize_t n1, n2; wchar_t *tline1, *tline2; const char **p; (void) setlocale(LC_ALL, ""); flag1 = flag2 = flag3 = 1; while ((ch = getopt(argc, argv, "123i")) != -1) switch(ch) { case '1': flag1 = 0; break; case '2': flag2 = 0; break; case '3': flag3 = 0; break; case 'i': iflag = 1; break; case '?': default: usage(); } argc -= optind; argv += optind; if (argc != 2) usage(); fp1 = file(argv[0]); fp2 = file(argv[1]); /* for each column printed, add another tab offset */ p = tabs; col1 = col2 = col3 = NULL; if (flag1) col1 = *p++; if (flag2) col2 = *p++; if (flag3) col3 = *p; line1len = line2len = 0; line1 = line2 = NULL; n1 = n2 = -1; for (read1 = read2 = 1;;) { /* read next line, check for EOF */ if (read1) { n1 = getline(&line1, &line1len, fp1); if (n1 < 0 && ferror(fp1)) err(1, "%s", argv[0]); if (n1 > 0 && line1[n1 - 1] == '\n') line1[n1 - 1] = '\0'; } if (read2) { n2 = getline(&line2, &line2len, fp2); if (n2 < 0 && ferror(fp2)) err(1, "%s", argv[1]); if (n2 > 0 && line2[n2 - 1] == '\n') line2[n2 - 1] = '\0'; } /* if one file done, display the rest of the other file */ if (n1 < 0) { if (n2 >= 0 && col2 != NULL) show(fp2, argv[1], col2, &line2, &line2len); break; } if (n2 < 0) { if (n1 >= 0 && col1 != NULL) show(fp1, argv[0], col1, &line1, &line1len); break; } tline2 = NULL; if ((tline1 = convert(line1)) != NULL) tline2 = convert(line2); if (tline1 == NULL || tline2 == NULL) comp = strcmp(line1, line2); else comp = wcscoll(tline1, tline2); if (tline1 != NULL) free(tline1); if (tline2 != NULL) free(tline2); /* lines are the same */ if (!comp) { read1 = read2 = 1; if (col3 != NULL) (void)printf("%s%s\n", col3, line1); continue; } /* lines are different */ if (comp < 0) { read1 = 1; read2 = 0; if (col1 != NULL) (void)printf("%s%s\n", col1, line1); } else { read1 = 0; read2 = 1; if (col2 != NULL) (void)printf("%s%s\n", col2, line2); } } exit(0); } static wchar_t * convert(const char *str) { size_t n; wchar_t *buf, *p; if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) return (NULL); if (SIZE_MAX / sizeof(*buf) < n + 1) errx(1, "conversion buffer length overflow"); if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) err(1, "malloc"); if (mbstowcs(buf, str, n + 1) != n) errx(1, "internal mbstowcs() error"); if (iflag) { for (p = buf; *p != L'\0'; p++) *p = towlower(*p); } return (buf); } static void show(FILE *fp, const char *fn, const char *offset, char **bufp, size_t *buflenp) { ssize_t n; do { (void)printf("%s%s\n", offset, *bufp); if ((n = getline(bufp, buflenp, fp)) < 0) break; if (n > 0 && (*bufp)[n - 1] == '\n') (*bufp)[n - 1] = '\0'; } while (1); if (ferror(fp)) err(1, "%s", fn); } static FILE * file(const char *name) { FILE *fp; if (!strcmp(name, "-")) return (stdin); if ((fp = fopen(name, "r")) == NULL) { err(1, "%s", name); } return (fp); } static void usage(void) { (void)fprintf(stderr, "usage: comm [-123i] file1 file2\n"); exit(1); } Index: user/alc/PQ_LAUNDRY/usr.bin/cpuset/cpuset.1 =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/cpuset/cpuset.1 (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/cpuset/cpuset.1 (revision 303642) @@ -1,197 +1,197 @@ .\" Copyright (c) 2008 Christian Brueffer .\" Copyright (c) 2008 Jeffrey Roberson .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd January 8, 2015 +.Dd July 29, 2016 .Dt CPUSET 1 .Os .Sh NAME .Nm cpuset .Nd "configure processor sets" .Sh SYNOPSIS .Nm .Op Fl l Ar cpu-list .Op Fl s Ar setid .Ar cmd ... .Nm .Op Fl l Ar cpu-list .Op Fl s Ar setid .Fl p Ar pid .Nm .Op Fl c .Op Fl l Ar cpu-list .Fl C .Fl p Ar pid .Nm .Op Fl c .Op Fl l Ar cpu-list .Op Fl j Ar jailid | Fl p Ar pid | Fl t Ar tid | Fl s Ar setid | Fl x Ar irq .Nm .Fl g .Op Fl cir -.Op Fl d Ar domain | j Ar jailid | Fl p Ar pid | Fl t Ar tid | Fl s Ar setid | Fl x Ar irq +.Op Fl d Ar domain | Fl j Ar jailid | Fl p Ar pid | Fl t Ar tid | Fl s Ar setid | Fl x Ar irq .Sh DESCRIPTION The .Nm command can be used to assign processor sets to processes, run commands constrained to a given set or list of processors, and query information about processor binding, sets, and available processors in the system. .Pp .Nm requires a target to modify or query. The target may be specified as a command, process id, thread id, a cpuset id, an irq, a jail id, or a NUMA domain. Using .Fl g the target's set id or mask may be queried. Using .Fl l or .Fl s the target's CPU mask or set id may be set. If no target is specified, .Nm operates on itself. Not all combinations of operations and targets are supported. For example, you may not set the id of an existing set or query and launch a command at the same time. .Pp There are two sets applicable to each process and one private mask per thread. Every process in the system belongs to a cpuset. By default processes are started in set 1. The mask or id may be queried using .Fl c . Each thread also has a private mask of CPUs it is allowed to run on that must be a subset of the assigned set. And finally, there is a root set, numbered 0, that is immutable. This last set is the list of all possible CPUs in the system and is queried using .Fl r . .Pp When running a command it may join a set specified with .Fl s otherwise a new set is created. In addition, a mask for the command may be specified using .Fl l . When used in conjunction with .Fl c the mask modifies the supplied or created set rather than the private mask for the thread. .Pp The options are as follows: .Bl -tag -width ".Fl l Ar cpu-list" .It Fl C Create a new cpuset and assign the target process to that set. .It Fl c The requested operation should reference the cpuset available via the target specifier. .It Fl d Ar domain Specifies a NUMA domain id as the target of the operation. .It Fl g Causes .Nm to print either a list of valid CPUs or, using .Fl i , the id of the target. .It Fl i When used with the .Fl g option print the id rather than the valid mask of the target. .It Fl j Ar jailid Specifies a jail id as the target of the operation. .It Fl l Ar cpu-list Specifies a list of CPUs to apply to a target. Specification may include numbers separated by '-' for ranges and commas separating individual numbers. A special list of .Dq all may be specified in which case the list includes all CPUs from the root set. .It Fl p Ar pid Specifies a pid as the target of the operation. .It Fl s Ar setid Specifies a set id as the target of the operation. .It Fl r The requested operation should reference the root set available via the target specifier. .It Fl t Ar tid Specifies a thread id as the target of the operation. .It Fl x Ar irq Specifies an irq as the target of the operation. .El .Sh EXIT STATUS .Ex -std .Sh EXAMPLES Create a new group with CPUs 0-4 inclusive and run .Pa /bin/sh on it: .Dl cpuset -c -l 0-4 /bin/sh .Pp Query the mask of CPUs the .Aq sh pid is allowed to run on: .Dl cpuset -g -p .Pp Restrict .Pa /bin/sh to run on CPUs 0 and 2 while its group is still allowed to run on CPUs 0-4: .Dl cpuset -l 0,2 -p .Pp Modify the cpuset .Pa /bin/sh belongs to restricting it to CPUs 0 and 2: .Dl cpuset -l 0,2 -c -p .Pp Modify the cpuset all threads are in by default to contain only the first 4 CPUs, leaving the rest idle: .Dl cpuset -l 0-3 -s 1 .Pp Print the id of the cpuset .Pa /bin/sh is in: .Dl cpuset -g -i -p .Pp Move the .Ar pid into the specified cpuset .Ar setid so it may be managed with other pids in that set: .Dl cpuset -s -p .Pp Create a new cpuset that is restricted to CPUs 0 and 2 and move .Ar pid into the new set: .Dl cpuset -C -c -l 0,2 -p .Sh SEE ALSO .Xr cpuset 2 .Sh HISTORY The .Nm command first appeared in .Fx 7.1 . .Sh AUTHORS .An Jeffrey Roberson Aq Mt jeff@FreeBSD.org Index: user/alc/PQ_LAUNDRY/usr.bin/grep/grep.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/grep/grep.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/grep/grep.c (revision 303642) @@ -1,748 +1,747 @@ /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */ /* $FreeBSD$ */ /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ /*- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav * Copyright (C) 2008-2009 Gabor Kovesdan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include "fastmatch.h" #include "grep.h" #ifndef WITHOUT_NLS #include nl_catd catalog; #endif /* * Default messags to use when NLS is disabled or no catalogue * is found. */ const char *errstr[] = { "", /* 1*/ "(standard input)", /* 2*/ "cannot read bzip2 compressed file", /* 3*/ "unknown %s option", /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n", /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", /* 7*/ "\t[--null] [pattern] [file ...]\n", /* 8*/ "Binary file %s matches\n", /* 9*/ "%s (BSD grep) %s\n", }; /* Flags passed to regcomp() and regexec() */ int cflags = REG_NOSUB; int eflags = REG_STARTEND; /* Shortcut for matching all cases like empty regex */ bool matchall; /* Searching patterns */ unsigned int patterns; static unsigned int pattern_sz; struct pat *pattern; regex_t *r_pattern; fastmatch_t *fg_pattern; /* Filename exclusion/inclusion patterns */ unsigned int fpatterns, dpatterns; static unsigned int fpattern_sz, dpattern_sz; struct epat *dpattern, *fpattern; /* For regex errors */ char re_error[RE_ERROR_BUF + 1]; /* Command-line flags */ unsigned long long Aflag; /* -A x: print x lines trailing each match */ unsigned long long Bflag; /* -B x: print x lines leading each match */ bool Hflag; /* -H: always print file name */ bool Lflag; /* -L: only show names of files with no matches */ bool bflag; /* -b: show block numbers for each match */ bool cflag; /* -c: only show a count of matching lines */ bool hflag; /* -h: don't print filename headers */ bool iflag; /* -i: ignore case */ bool lflag; /* -l: only show names of files with matches */ bool mflag; /* -m x: stop reading the files after x matches */ long long mcount; /* count for -m */ long long mlimit; /* requested value for -m */ bool nflag; /* -n: show line numbers in front of matching lines */ bool oflag; /* -o: print only matching part */ bool qflag; /* -q: quiet mode (don't output anything) */ bool sflag; /* -s: silent mode (ignore errors) */ bool vflag; /* -v: only show non-matching lines */ bool wflag; /* -w: pattern must start and end on word boundaries */ bool xflag; /* -x: pattern must match entire line */ bool lbflag; /* --line-buffered */ bool nullflag; /* --null */ char *label; /* --label */ const char *color; /* --color */ int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ int devbehave = DEV_READ; /* -D: handling of devices */ int dirbehave = DIR_READ; /* -dRr: handling of directories */ int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ bool dexclude, dinclude; /* --exclude-dir and --include-dir */ bool fexclude, finclude; /* --exclude and --include */ enum { BIN_OPT = CHAR_MAX + 1, COLOR_OPT, HELP_OPT, MMAP_OPT, LINEBUF_OPT, LABEL_OPT, NULL_OPT, R_EXCLUDE_OPT, R_INCLUDE_OPT, R_DEXCLUDE_OPT, R_DINCLUDE_OPT }; static inline const char *init_color(const char *); /* Housekeeping */ bool first = true; /* flag whether we are processing the first match */ bool prev; /* flag whether or not the previous line matched */ int tail; /* lines left to print */ bool file_err; /* file reading error */ /* * Prints usage information and returns 2. */ static void usage(void) { fprintf(stderr, getstr(4), getprogname()); fprintf(stderr, "%s", getstr(5)); fprintf(stderr, "%s", getstr(6)); fprintf(stderr, "%s", getstr(7)); exit(2); } static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy"; static const struct option long_options[] = { {"binary-files", required_argument, NULL, BIN_OPT}, {"help", no_argument, NULL, HELP_OPT}, {"mmap", no_argument, NULL, MMAP_OPT}, {"line-buffered", no_argument, NULL, LINEBUF_OPT}, {"label", required_argument, NULL, LABEL_OPT}, {"null", no_argument, NULL, NULL_OPT}, {"color", optional_argument, NULL, COLOR_OPT}, {"colour", optional_argument, NULL, COLOR_OPT}, {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, {"include", required_argument, NULL, R_INCLUDE_OPT}, {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, {"after-context", required_argument, NULL, 'A'}, {"text", no_argument, NULL, 'a'}, {"before-context", required_argument, NULL, 'B'}, {"byte-offset", no_argument, NULL, 'b'}, {"context", optional_argument, NULL, 'C'}, {"count", no_argument, NULL, 'c'}, {"devices", required_argument, NULL, 'D'}, {"directories", required_argument, NULL, 'd'}, {"extended-regexp", no_argument, NULL, 'E'}, {"regexp", required_argument, NULL, 'e'}, {"fixed-strings", no_argument, NULL, 'F'}, {"file", required_argument, NULL, 'f'}, {"basic-regexp", no_argument, NULL, 'G'}, {"no-filename", no_argument, NULL, 'h'}, {"with-filename", no_argument, NULL, 'H'}, {"ignore-case", no_argument, NULL, 'i'}, {"bz2decompress", no_argument, NULL, 'J'}, {"files-with-matches", no_argument, NULL, 'l'}, {"files-without-match", no_argument, NULL, 'L'}, {"max-count", required_argument, NULL, 'm'}, {"lzma", no_argument, NULL, 'M'}, {"line-number", no_argument, NULL, 'n'}, {"only-matching", no_argument, NULL, 'o'}, {"quiet", no_argument, NULL, 'q'}, {"silent", no_argument, NULL, 'q'}, {"recursive", no_argument, NULL, 'r'}, {"no-messages", no_argument, NULL, 's'}, {"binary", no_argument, NULL, 'U'}, {"unix-byte-offsets", no_argument, NULL, 'u'}, {"invert-match", no_argument, NULL, 'v'}, {"version", no_argument, NULL, 'V'}, {"word-regexp", no_argument, NULL, 'w'}, {"line-regexp", no_argument, NULL, 'x'}, {"xz", no_argument, NULL, 'X'}, {"decompress", no_argument, NULL, 'Z'}, {NULL, no_argument, NULL, 0} }; /* * Adds a searching pattern to the internal array. */ static void add_pattern(char *pat, size_t len) { /* Do not add further pattern is we already match everything */ if (matchall) return; /* Check if we can do a shortcut */ if (len == 0) { matchall = true; for (unsigned int i = 0; i < patterns; i++) { free(pattern[i].pat); } pattern = grep_realloc(pattern, sizeof(struct pat)); pattern[0].pat = NULL; pattern[0].len = 0; patterns = 1; return; } /* Increase size if necessary */ if (patterns == pattern_sz) { pattern_sz *= 2; pattern = grep_realloc(pattern, ++pattern_sz * sizeof(struct pat)); } if (len > 0 && pat[len - 1] == '\n') --len; /* pat may not be NUL-terminated */ pattern[patterns].pat = grep_malloc(len + 1); memcpy(pattern[patterns].pat, pat, len); pattern[patterns].len = len; pattern[patterns].pat[len] = '\0'; ++patterns; } /* * Adds a file include/exclude pattern to the internal array. */ static void add_fpattern(const char *pat, int mode) { /* Increase size if necessary */ if (fpatterns == fpattern_sz) { fpattern_sz *= 2; fpattern = grep_realloc(fpattern, ++fpattern_sz * sizeof(struct epat)); } fpattern[fpatterns].pat = grep_strdup(pat); fpattern[fpatterns].mode = mode; ++fpatterns; } /* * Adds a directory include/exclude pattern to the internal array. */ static void add_dpattern(const char *pat, int mode) { /* Increase size if necessary */ if (dpatterns == dpattern_sz) { dpattern_sz *= 2; dpattern = grep_realloc(dpattern, ++dpattern_sz * sizeof(struct epat)); } dpattern[dpatterns].pat = grep_strdup(pat); dpattern[dpatterns].mode = mode; ++dpatterns; } /* * Reads searching patterns from a file and adds them with add_pattern(). */ static void read_patterns(const char *fn) { struct stat st; FILE *f; char *line; size_t len; ssize_t rlen; if ((f = fopen(fn, "r")) == NULL) err(2, "%s", fn); if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) { fclose(f); return; } len = 0; line = NULL; while ((rlen = getline(&line, &len, f)) != -1) add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen); free(line); if (ferror(f)) err(2, "%s", fn); fclose(f); } static inline const char * init_color(const char *d) { char *c; c = getenv("GREP_COLOR"); return (c != NULL && c[0] != '\0' ? c : d); } int main(int argc, char *argv[]) { char **aargv, **eargv, *eopts; char *ep; const char *pn; unsigned long long l; unsigned int aargc, eargc, i; int c, lastc, needpattern, newarg, prevoptind; setlocale(LC_ALL, ""); #ifndef WITHOUT_NLS catalog = catopen("grep", NL_CAT_LOCALE); #endif /* Check what is the program name of the binary. In this way we can have all the funcionalities in one binary without the need of scripting and using ugly hacks. */ pn = getprogname(); if (pn[0] == 'b' && pn[1] == 'z') { filebehave = FILE_BZIP; pn += 2; } else if (pn[0] == 'x' && pn[1] == 'z') { filebehave = FILE_XZ; pn += 2; } else if (pn[0] == 'l' && pn[1] == 'z') { filebehave = FILE_LZMA; pn += 2; } else if (pn[0] == 'z') { filebehave = FILE_GZIP; pn += 1; } switch (pn[0]) { case 'e': grepbehave = GREP_EXTENDED; break; case 'f': grepbehave = GREP_FIXED; break; } lastc = '\0'; newarg = 1; prevoptind = 1; needpattern = 1; eopts = getenv("GREP_OPTIONS"); /* support for extra arguments in GREP_OPTIONS */ eargc = 0; if (eopts != NULL && eopts[0] != '\0') { char *str; /* make an estimation of how many extra arguments we have */ for (unsigned int j = 0; j < strlen(eopts); j++) if (eopts[j] == ' ') eargc++; eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); eargc = 0; /* parse extra arguments */ while ((str = strsep(&eopts, " ")) != NULL) if (str[0] != '\0') eargv[eargc++] = grep_strdup(str); aargv = (char **)grep_calloc(eargc + argc + 1, sizeof(char *)); aargv[0] = argv[0]; for (i = 0; i < eargc; i++) aargv[i + 1] = eargv[i]; for (int j = 1; j < argc; j++, i++) aargv[i + 1] = argv[j]; aargc = eargc + argc; } else { aargv = argv; aargc = argc; } while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != -1)) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (newarg || !isdigit(lastc)) Aflag = 0; else if (Aflag > LLONG_MAX / 10) { errno = ERANGE; err(2, NULL); } Aflag = Bflag = (Aflag * 10) + (c - '0'); break; case 'C': if (optarg == NULL) { Aflag = Bflag = 2; break; } /* FALLTHROUGH */ case 'A': /* FALLTHROUGH */ case 'B': errno = 0; l = strtoull(optarg, &ep, 10); if (((errno == ERANGE) && (l == ULLONG_MAX)) || ((errno == EINVAL) && (l == 0))) err(2, NULL); else if (ep[0] != '\0') { errno = EINVAL; err(2, NULL); } if (c == 'A') Aflag = l; else if (c == 'B') Bflag = l; else Aflag = Bflag = l; break; case 'a': binbehave = BINFILE_TEXT; break; case 'b': bflag = true; break; case 'c': cflag = true; break; case 'D': if (strcasecmp(optarg, "skip") == 0) devbehave = DEV_SKIP; else if (strcasecmp(optarg, "read") == 0) devbehave = DEV_READ; else errx(2, getstr(3), "--devices"); break; case 'd': if (strcasecmp("recurse", optarg) == 0) { Hflag = true; dirbehave = DIR_RECURSE; } else if (strcasecmp("skip", optarg) == 0) dirbehave = DIR_SKIP; else if (strcasecmp("read", optarg) == 0) dirbehave = DIR_READ; else errx(2, getstr(3), "--directories"); break; case 'E': grepbehave = GREP_EXTENDED; break; case 'e': { char *token; char *string = optarg; while ((token = strsep(&string, "\n")) != NULL) add_pattern(token, strlen(token)); } needpattern = 0; break; case 'F': grepbehave = GREP_FIXED; break; case 'f': read_patterns(optarg); needpattern = 0; break; case 'G': grepbehave = GREP_BASIC; break; case 'H': Hflag = true; break; case 'h': Hflag = false; hflag = true; break; case 'I': binbehave = BINFILE_SKIP; break; case 'i': case 'y': iflag = true; cflags |= REG_ICASE; break; case 'J': #ifdef WITHOUT_BZIP2 errno = EOPNOTSUPP; err(2, "bzip2 support was disabled at compile-time"); #endif filebehave = FILE_BZIP; break; case 'L': lflag = false; Lflag = true; break; case 'l': Lflag = false; lflag = true; break; case 'm': mflag = true; errno = 0; mlimit = mcount = strtoll(optarg, &ep, 10); if (((errno == ERANGE) && (mcount == LLONG_MAX)) || ((errno == EINVAL) && (mcount == 0))) err(2, NULL); else if (ep[0] != '\0') { errno = EINVAL; err(2, NULL); } break; case 'M': filebehave = FILE_LZMA; break; case 'n': nflag = true; break; case 'O': linkbehave = LINK_EXPLICIT; break; case 'o': oflag = true; cflags &= ~REG_NOSUB; break; case 'p': linkbehave = LINK_SKIP; break; case 'q': qflag = true; break; case 'S': linkbehave = LINK_READ; break; case 'R': case 'r': dirbehave = DIR_RECURSE; Hflag = true; break; case 's': sflag = true; break; case 'U': binbehave = BINFILE_BIN; break; case 'u': case MMAP_OPT: filebehave = FILE_MMAP; break; case 'V': printf(getstr(9), getprogname(), VERSION); exit(0); case 'v': vflag = true; break; case 'w': wflag = true; cflags &= ~REG_NOSUB; break; case 'x': xflag = true; cflags &= ~REG_NOSUB; break; case 'X': filebehave = FILE_XZ; break; case 'Z': filebehave = FILE_GZIP; break; case BIN_OPT: if (strcasecmp("binary", optarg) == 0) binbehave = BINFILE_BIN; else if (strcasecmp("without-match", optarg) == 0) binbehave = BINFILE_SKIP; else if (strcasecmp("text", optarg) == 0) binbehave = BINFILE_TEXT; else errx(2, getstr(3), "--binary-files"); break; case COLOR_OPT: color = NULL; if (optarg == NULL || strcasecmp("auto", optarg) == 0 || strcasecmp("tty", optarg) == 0 || strcasecmp("if-tty", optarg) == 0) { char *term; term = getenv("TERM"); if (isatty(STDOUT_FILENO) && term != NULL && strcasecmp(term, "dumb") != 0) color = init_color("01;31"); } else if (strcasecmp("always", optarg) == 0 || strcasecmp("yes", optarg) == 0 || strcasecmp("force", optarg) == 0) { color = init_color("01;31"); } else if (strcasecmp("never", optarg) != 0 && strcasecmp("none", optarg) != 0 && strcasecmp("no", optarg) != 0) errx(2, getstr(3), "--color"); cflags &= ~REG_NOSUB; break; case LABEL_OPT: label = optarg; break; case LINEBUF_OPT: lbflag = true; break; case NULL_OPT: nullflag = true; break; case R_INCLUDE_OPT: finclude = true; add_fpattern(optarg, INCL_PAT); break; case R_EXCLUDE_OPT: fexclude = true; add_fpattern(optarg, EXCL_PAT); break; case R_DINCLUDE_OPT: dinclude = true; add_dpattern(optarg, INCL_PAT); break; case R_DEXCLUDE_OPT: dexclude = true; add_dpattern(optarg, EXCL_PAT); break; case HELP_OPT: default: usage(); } lastc = c; newarg = optind != prevoptind; prevoptind = optind; } aargc -= optind; aargv += optind; /* Empty pattern file matches nothing */ if (!needpattern && (patterns == 0)) exit(1); /* Fail if we don't have any pattern */ if (aargc == 0 && needpattern) usage(); /* Process patterns from command line */ if (aargc != 0 && needpattern) { char *token; char *string = *aargv; while ((token = strsep(&string, "\n")) != NULL) add_pattern(token, strlen(token)); --aargc; ++aargv; } switch (grepbehave) { case GREP_BASIC: break; case GREP_FIXED: /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */ cflags |= 0020; break; case GREP_EXTENDED: cflags |= REG_EXTENDED; break; default: /* NOTREACHED */ usage(); } fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); /* Check if cheating is allowed (always is for fgrep). */ for (i = 0; i < patterns; ++i) { if (fastncomp(&fg_pattern[i], pattern[i].pat, pattern[i].len, cflags) != 0) { /* Fall back to full regex library */ c = regcomp(&r_pattern[i], pattern[i].pat, cflags); if (c != 0) { regerror(c, &r_pattern[i], re_error, RE_ERROR_BUF); errx(2, "%s", re_error); } } } if (lbflag) setlinebuf(stdout); if ((aargc == 0 || aargc == 1) && !Hflag) hflag = true; if (aargc == 0) exit(!procfile("-")); if (dirbehave == DIR_RECURSE) c = grep_tree(aargv); else for (c = 0; aargc--; ++aargv) { if ((finclude || fexclude) && !file_matching(*aargv)) continue; c+= procfile(*aargv); } #ifndef WITHOUT_NLS catclose(catalog); #endif /* Find out the correct return value according to the results and the command line option. */ exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1)); } Index: user/alc/PQ_LAUNDRY/usr.bin/gzip/gzip.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/gzip/gzip.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/gzip/gzip.c (revision 303642) @@ -1,2174 +1,2174 @@ /* $NetBSD: gzip.c,v 1.109 2015/10/27 07:36:18 mrg Exp $ */ /*- * Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #ifndef lint __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006\ Matthew R. Green. All rights reserved."); __FBSDID("$FreeBSD$"); #endif /* not lint */ /* * gzip.c -- GPL free gzip using zlib. * * RFC 1950 covers the zlib format * RFC 1951 covers the deflate format * RFC 1952 covers the gzip format * * TODO: * - use mmap where possible * - make bzip2/compress -v/-t/-l support work as well as possible */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* what type of file are we dealing with */ enum filetype { FT_GZIP, #ifndef NO_BZIP2_SUPPORT FT_BZIP2, #endif #ifndef NO_COMPRESS_SUPPORT FT_Z, #endif #ifndef NO_PACK_SUPPORT FT_PACK, #endif #ifndef NO_XZ_SUPPORT FT_XZ, #endif FT_LAST, FT_UNKNOWN }; #ifndef NO_BZIP2_SUPPORT #include #define BZ2_SUFFIX ".bz2" #define BZIP2_MAGIC "\102\132\150" #endif #ifndef NO_COMPRESS_SUPPORT #define Z_SUFFIX ".Z" #define Z_MAGIC "\037\235" #endif #ifndef NO_PACK_SUPPORT #define PACK_MAGIC "\037\036" #endif #ifndef NO_XZ_SUPPORT #include #define XZ_SUFFIX ".xz" #define XZ_MAGIC "\3757zXZ" #endif #define GZ_SUFFIX ".gz" #define BUFLEN (64 * 1024) #define GZIP_MAGIC0 0x1F #define GZIP_MAGIC1 0x8B #define GZIP_OMAGIC1 0x9E #define GZIP_TIMESTAMP (off_t)4 #define GZIP_ORIGNAME (off_t)10 #define HEAD_CRC 0x02 #define EXTRA_FIELD 0x04 #define ORIG_NAME 0x08 #define COMMENT 0x10 #define OS_CODE 3 /* Unix */ typedef struct { const char *zipped; int ziplen; const char *normal; /* for unzip - must not be longer than zipped */ } suffixes_t; static suffixes_t suffixes[] = { #define SUFFIX(Z, N) {Z, sizeof Z - 1, N} SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S .xxx */ #ifndef SMALL SUFFIX(GZ_SUFFIX, ""), SUFFIX(".z", ""), SUFFIX("-gz", ""), SUFFIX("-z", ""), SUFFIX("_z", ""), SUFFIX(".taz", ".tar"), SUFFIX(".tgz", ".tar"), #ifndef NO_BZIP2_SUPPORT SUFFIX(BZ2_SUFFIX, ""), SUFFIX(".tbz", ".tar"), SUFFIX(".tbz2", ".tar"), #endif #ifndef NO_COMPRESS_SUPPORT SUFFIX(Z_SUFFIX, ""), #endif #ifndef NO_XZ_SUPPORT SUFFIX(XZ_SUFFIX, ""), #endif SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */ #endif /* SMALL */ #undef SUFFIX }; -#define NUM_SUFFIXES (sizeof suffixes / sizeof suffixes[0]) +#define NUM_SUFFIXES (nitems(suffixes)) #define SUFFIX_MAXLEN 30 static const char gzip_version[] = "FreeBSD gzip 20150413"; #ifndef SMALL static const char gzip_copyright[] = \ " Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n" " All rights reserved.\n" "\n" " Redistribution and use in source and binary forms, with or without\n" " modification, are permitted provided that the following conditions\n" " are met:\n" " 1. Redistributions of source code must retain the above copyright\n" " notice, this list of conditions and the following disclaimer.\n" " 2. Redistributions in binary form must reproduce the above copyright\n" " notice, this list of conditions and the following disclaimer in the\n" " documentation and/or other materials provided with the distribution.\n" "\n" " THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n" " IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n" " OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n" " IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n" " INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n" " BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n" " LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n" " AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n" " OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n" " OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n" " SUCH DAMAGE."; #endif static int cflag; /* stdout mode */ static int dflag; /* decompress mode */ static int lflag; /* list mode */ static int numflag = 6; /* gzip -1..-9 value */ #ifndef SMALL static int fflag; /* force mode */ static int kflag; /* don't delete input files */ static int nflag; /* don't save name/timestamp */ static int Nflag; /* don't restore name/timestamp */ static int qflag; /* quiet mode */ static int rflag; /* recursive mode */ static int tflag; /* test */ static int vflag; /* verbose mode */ static const char *remove_file = NULL; /* file to be removed upon SIGINT */ #else #define qflag 0 #define tflag 0 #endif static int exit_value = 0; /* exit value */ static char *infile; /* name of file coming in */ static void maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2; #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \ !defined(NO_XZ_SUPPORT) static void maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2; #endif static void maybe_warn(const char *fmt, ...) __printflike(1, 2); static void maybe_warnx(const char *fmt, ...) __printflike(1, 2); static enum filetype file_gettype(u_char *); #ifdef SMALL #define gz_compress(if, of, sz, fn, tm) gz_compress(if, of, sz) #endif static off_t gz_compress(int, int, off_t *, const char *, uint32_t); static off_t gz_uncompress(int, int, char *, size_t, off_t *, const char *); static off_t file_compress(char *, char *, size_t); static off_t file_uncompress(char *, char *, size_t); static void handle_pathname(char *); static void handle_file(char *, struct stat *); static void handle_stdin(void); static void handle_stdout(void); static void print_ratio(off_t, off_t, FILE *); static void print_list(int fd, off_t, const char *, time_t); static void usage(void) __dead2; static void display_version(void) __dead2; #ifndef SMALL static void display_license(void); static void sigint_handler(int); #endif static const suffixes_t *check_suffix(char *, int); static ssize_t read_retry(int, void *, size_t); #ifdef SMALL #define unlink_input(f, sb) unlink(f) #else static off_t cat_fd(unsigned char *, size_t, off_t *, int fd); static void prepend_gzip(char *, int *, char ***); static void handle_dir(char *); static void print_verbage(const char *, const char *, off_t, off_t); static void print_test(const char *, int); static void copymodes(int fd, const struct stat *, const char *file); static int check_outfile(const char *outfile); #endif #ifndef NO_BZIP2_SUPPORT static off_t unbzip2(int, int, char *, size_t, off_t *); #endif #ifndef NO_COMPRESS_SUPPORT static FILE *zdopen(int); static off_t zuncompress(FILE *, FILE *, char *, size_t, off_t *); #endif #ifndef NO_PACK_SUPPORT static off_t unpack(int, int, char *, size_t, off_t *); #endif #ifndef NO_XZ_SUPPORT static off_t unxz(int, int, char *, size_t, off_t *); #endif #ifdef SMALL #define getopt_long(a,b,c,d,e) getopt(a,b,c) #else static const struct option longopts[] = { { "stdout", no_argument, 0, 'c' }, { "to-stdout", no_argument, 0, 'c' }, { "decompress", no_argument, 0, 'd' }, { "uncompress", no_argument, 0, 'd' }, { "force", no_argument, 0, 'f' }, { "help", no_argument, 0, 'h' }, { "keep", no_argument, 0, 'k' }, { "list", no_argument, 0, 'l' }, { "no-name", no_argument, 0, 'n' }, { "name", no_argument, 0, 'N' }, { "quiet", no_argument, 0, 'q' }, { "recursive", no_argument, 0, 'r' }, { "suffix", required_argument, 0, 'S' }, { "test", no_argument, 0, 't' }, { "verbose", no_argument, 0, 'v' }, { "version", no_argument, 0, 'V' }, { "fast", no_argument, 0, '1' }, { "best", no_argument, 0, '9' }, { "ascii", no_argument, 0, 'a' }, { "license", no_argument, 0, 'L' }, { NULL, no_argument, 0, 0 }, }; #endif int main(int argc, char **argv) { const char *progname = getprogname(); #ifndef SMALL char *gzip; int len; #endif int ch; #ifndef SMALL if ((gzip = getenv("GZIP")) != NULL) prepend_gzip(gzip, &argc, &argv); signal(SIGINT, sigint_handler); #endif /* * XXX * handle being called `gunzip', `zcat' and `gzcat' */ if (strcmp(progname, "gunzip") == 0) dflag = 1; else if (strcmp(progname, "zcat") == 0 || strcmp(progname, "gzcat") == 0) dflag = cflag = 1; #ifdef SMALL #define OPT_LIST "123456789cdhlV" #else #define OPT_LIST "123456789acdfhklLNnqrS:tVv" #endif while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) { switch (ch) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': numflag = ch - '0'; break; case 'c': cflag = 1; break; case 'd': dflag = 1; break; case 'l': lflag = 1; dflag = 1; break; case 'V': display_version(); /* NOTREACHED */ #ifndef SMALL case 'a': fprintf(stderr, "%s: option --ascii ignored on this system\n", progname); break; case 'f': fflag = 1; break; case 'k': kflag = 1; break; case 'L': display_license(); /* NOT REACHED */ case 'N': nflag = 0; Nflag = 1; break; case 'n': nflag = 1; Nflag = 0; break; case 'q': qflag = 1; break; case 'r': rflag = 1; break; case 'S': len = strlen(optarg); if (len != 0) { if (len > SUFFIX_MAXLEN) errx(1, "incorrect suffix: '%s': too long", optarg); suffixes[0].zipped = optarg; suffixes[0].ziplen = len; } else { suffixes[NUM_SUFFIXES - 1].zipped = ""; suffixes[NUM_SUFFIXES - 1].ziplen = 0; } break; case 't': cflag = 1; tflag = 1; dflag = 1; break; case 'v': vflag = 1; break; #endif default: usage(); /* NOTREACHED */ } } argv += optind; argc -= optind; if (argc == 0) { if (dflag) /* stdin mode */ handle_stdin(); else /* stdout mode */ handle_stdout(); } else { do { handle_pathname(argv[0]); } while (*++argv); } #ifndef SMALL if (qflag == 0 && lflag && argc > 1) print_list(-1, 0, "(totals)", 0); #endif exit(exit_value); } /* maybe print a warning */ void maybe_warn(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarn(fmt, ap); va_end(ap); } if (exit_value == 0) exit_value = 1; } /* ... without an errno. */ void maybe_warnx(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarnx(fmt, ap); va_end(ap); } if (exit_value == 0) exit_value = 1; } /* maybe print an error */ void maybe_err(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarn(fmt, ap); va_end(ap); } exit(2); } #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \ !defined(NO_XZ_SUPPORT) /* ... without an errno. */ void maybe_errx(const char *fmt, ...) { va_list ap; if (qflag == 0) { va_start(ap, fmt); vwarnx(fmt, ap); va_end(ap); } exit(2); } #endif #ifndef SMALL /* split up $GZIP and prepend it to the argument list */ static void prepend_gzip(char *gzip, int *argc, char ***argv) { char *s, **nargv, **ac; int nenvarg = 0, i; /* scan how many arguments there are */ for (s = gzip;;) { while (*s == ' ' || *s == '\t') s++; if (*s == 0) goto count_done; nenvarg++; while (*s != ' ' && *s != '\t') if (*s++ == 0) goto count_done; } count_done: /* punt early */ if (nenvarg == 0) return; *argc += nenvarg; ac = *argv; nargv = (char **)malloc((*argc + 1) * sizeof(char *)); if (nargv == NULL) maybe_err("malloc"); /* stash this away */ *argv = nargv; /* copy the program name first */ i = 0; nargv[i++] = *(ac++); /* take a copy of $GZIP and add it to the array */ s = strdup(gzip); if (s == NULL) maybe_err("strdup"); for (;;) { /* Skip whitespaces. */ while (*s == ' ' || *s == '\t') s++; if (*s == 0) goto copy_done; nargv[i++] = s; /* Find the end of this argument. */ while (*s != ' ' && *s != '\t') if (*s++ == 0) /* Argument followed by NUL. */ goto copy_done; /* Terminate by overwriting ' ' or '\t' with NUL. */ *s++ = 0; } copy_done: /* copy the original arguments and a NULL */ while (*ac) nargv[i++] = *(ac++); nargv[i] = NULL; } #endif /* compress input to output. Return bytes read, -1 on error */ static off_t gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime) { z_stream z; char *outbufp, *inbufp; off_t in_tot = 0, out_tot = 0; ssize_t in_size; int i, error; uLong crc; #ifdef SMALL static char header[] = { GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, 0, 0, 0, 0, 0, 0, OS_CODE }; #endif outbufp = malloc(BUFLEN); inbufp = malloc(BUFLEN); if (outbufp == NULL || inbufp == NULL) { maybe_err("malloc failed"); goto out; } memset(&z, 0, sizeof z); z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = 0; #ifdef SMALL memcpy(outbufp, header, sizeof header); i = sizeof header; #else if (nflag != 0) { mtime = 0; origname = ""; } i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s", GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, *origname ? ORIG_NAME : 0, mtime & 0xff, (mtime >> 8) & 0xff, (mtime >> 16) & 0xff, (mtime >> 24) & 0xff, numflag == 1 ? 4 : numflag == 9 ? 2 : 0, OS_CODE, origname); if (i >= BUFLEN) /* this need PATH_MAX > BUFLEN ... */ maybe_err("snprintf"); if (*origname) i++; #endif z.next_out = (unsigned char *)outbufp + i; z.avail_out = BUFLEN - i; error = deflateInit2(&z, numflag, Z_DEFLATED, (-MAX_WBITS), 8, Z_DEFAULT_STRATEGY); if (error != Z_OK) { maybe_warnx("deflateInit2 failed"); in_tot = -1; goto out; } crc = crc32(0L, Z_NULL, 0); for (;;) { if (z.avail_out == 0) { if (write(out, outbufp, BUFLEN) != BUFLEN) { maybe_warn("write"); out_tot = -1; goto out; } out_tot += BUFLEN; z.next_out = (unsigned char *)outbufp; z.avail_out = BUFLEN; } if (z.avail_in == 0) { in_size = read(in, inbufp, BUFLEN); if (in_size < 0) { maybe_warn("read"); in_tot = -1; goto out; } if (in_size == 0) break; crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size); in_tot += in_size; z.next_in = (unsigned char *)inbufp; z.avail_in = in_size; } error = deflate(&z, Z_NO_FLUSH); if (error != Z_OK && error != Z_STREAM_END) { maybe_warnx("deflate failed"); in_tot = -1; goto out; } } /* clean up */ for (;;) { size_t len; ssize_t w; error = deflate(&z, Z_FINISH); if (error != Z_OK && error != Z_STREAM_END) { maybe_warnx("deflate failed"); in_tot = -1; goto out; } len = (char *)z.next_out - outbufp; w = write(out, outbufp, len); if (w == -1 || (size_t)w != len) { maybe_warn("write"); out_tot = -1; goto out; } out_tot += len; z.next_out = (unsigned char *)outbufp; z.avail_out = BUFLEN; if (error == Z_STREAM_END) break; } if (deflateEnd(&z) != Z_OK) { maybe_warnx("deflateEnd failed"); in_tot = -1; goto out; } i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c", (int)crc & 0xff, (int)(crc >> 8) & 0xff, (int)(crc >> 16) & 0xff, (int)(crc >> 24) & 0xff, (int)in_tot & 0xff, (int)(in_tot >> 8) & 0xff, (int)(in_tot >> 16) & 0xff, (int)(in_tot >> 24) & 0xff); if (i != 8) maybe_err("snprintf"); if (write(out, outbufp, i) != i) { maybe_warn("write"); in_tot = -1; } else out_tot += i; out: if (inbufp != NULL) free(inbufp); if (outbufp != NULL) free(outbufp); if (gsizep) *gsizep = out_tot; return in_tot; } /* * uncompress input to output then close the input. return the * uncompressed size written, and put the compressed sized read * into `*gsizep'. */ static off_t gz_uncompress(int in, int out, char *pre, size_t prelen, off_t *gsizep, const char *filename) { z_stream z; char *outbufp, *inbufp; off_t out_tot = -1, in_tot = 0; uint32_t out_sub_tot = 0; enum { GZSTATE_MAGIC0, GZSTATE_MAGIC1, GZSTATE_METHOD, GZSTATE_FLAGS, GZSTATE_SKIPPING, GZSTATE_EXTRA, GZSTATE_EXTRA2, GZSTATE_EXTRA3, GZSTATE_ORIGNAME, GZSTATE_COMMENT, GZSTATE_HEAD_CRC1, GZSTATE_HEAD_CRC2, GZSTATE_INIT, GZSTATE_READ, GZSTATE_CRC, GZSTATE_LEN, } state = GZSTATE_MAGIC0; int flags = 0, skip_count = 0; int error = Z_STREAM_ERROR, done_reading = 0; uLong crc = 0; ssize_t wr; int needmore = 0; #define ADVANCE() { z.next_in++; z.avail_in--; } if ((outbufp = malloc(BUFLEN)) == NULL) { maybe_err("malloc failed"); goto out2; } if ((inbufp = malloc(BUFLEN)) == NULL) { maybe_err("malloc failed"); goto out1; } memset(&z, 0, sizeof z); z.avail_in = prelen; z.next_in = (unsigned char *)pre; z.avail_out = BUFLEN; z.next_out = (unsigned char *)outbufp; z.zalloc = NULL; z.zfree = NULL; z.opaque = 0; in_tot = prelen; out_tot = 0; for (;;) { if ((z.avail_in == 0 || needmore) && done_reading == 0) { ssize_t in_size; if (z.avail_in > 0) { memmove(inbufp, z.next_in, z.avail_in); } z.next_in = (unsigned char *)inbufp; in_size = read(in, z.next_in + z.avail_in, BUFLEN - z.avail_in); if (in_size == -1) { maybe_warn("failed to read stdin"); goto stop_and_fail; } else if (in_size == 0) { done_reading = 1; } z.avail_in += in_size; needmore = 0; in_tot += in_size; } if (z.avail_in == 0) { if (done_reading && state != GZSTATE_MAGIC0) { maybe_warnx("%s: unexpected end of file", filename); goto stop_and_fail; } goto stop; } switch (state) { case GZSTATE_MAGIC0: if (*z.next_in != GZIP_MAGIC0) { if (in_tot > 0) { maybe_warnx("%s: trailing garbage " "ignored", filename); exit_value = 2; goto stop; } maybe_warnx("input not gziped (MAGIC0)"); goto stop_and_fail; } ADVANCE(); state++; out_sub_tot = 0; crc = crc32(0L, Z_NULL, 0); break; case GZSTATE_MAGIC1: if (*z.next_in != GZIP_MAGIC1 && *z.next_in != GZIP_OMAGIC1) { maybe_warnx("input not gziped (MAGIC1)"); goto stop_and_fail; } ADVANCE(); state++; break; case GZSTATE_METHOD: if (*z.next_in != Z_DEFLATED) { maybe_warnx("unknown compression method"); goto stop_and_fail; } ADVANCE(); state++; break; case GZSTATE_FLAGS: flags = *z.next_in; ADVANCE(); skip_count = 6; state++; break; case GZSTATE_SKIPPING: if (skip_count > 0) { skip_count--; ADVANCE(); } else state++; break; case GZSTATE_EXTRA: if ((flags & EXTRA_FIELD) == 0) { state = GZSTATE_ORIGNAME; break; } skip_count = *z.next_in; ADVANCE(); state++; break; case GZSTATE_EXTRA2: skip_count |= ((*z.next_in) << 8); ADVANCE(); state++; break; case GZSTATE_EXTRA3: if (skip_count > 0) { skip_count--; ADVANCE(); } else state++; break; case GZSTATE_ORIGNAME: if ((flags & ORIG_NAME) == 0) { state++; break; } if (*z.next_in == 0) state++; ADVANCE(); break; case GZSTATE_COMMENT: if ((flags & COMMENT) == 0) { state++; break; } if (*z.next_in == 0) state++; ADVANCE(); break; case GZSTATE_HEAD_CRC1: if (flags & HEAD_CRC) skip_count = 2; else skip_count = 0; state++; break; case GZSTATE_HEAD_CRC2: if (skip_count > 0) { skip_count--; ADVANCE(); } else state++; break; case GZSTATE_INIT: if (inflateInit2(&z, -MAX_WBITS) != Z_OK) { maybe_warnx("failed to inflateInit"); goto stop_and_fail; } state++; break; case GZSTATE_READ: error = inflate(&z, Z_FINISH); switch (error) { /* Z_BUF_ERROR goes with Z_FINISH... */ case Z_BUF_ERROR: if (z.avail_out > 0 && !done_reading) continue; case Z_STREAM_END: case Z_OK: break; case Z_NEED_DICT: maybe_warnx("Z_NEED_DICT error"); goto stop_and_fail; case Z_DATA_ERROR: maybe_warnx("data stream error"); goto stop_and_fail; case Z_STREAM_ERROR: maybe_warnx("internal stream error"); goto stop_and_fail; case Z_MEM_ERROR: maybe_warnx("memory allocation error"); goto stop_and_fail; default: maybe_warn("unknown error from inflate(): %d", error); } wr = BUFLEN - z.avail_out; if (wr != 0) { crc = crc32(crc, (const Bytef *)outbufp, (unsigned)wr); if ( #ifndef SMALL /* don't write anything with -t */ tflag == 0 && #endif write(out, outbufp, wr) != wr) { maybe_warn("error writing to output"); goto stop_and_fail; } out_tot += wr; out_sub_tot += wr; } if (error == Z_STREAM_END) { inflateEnd(&z); state++; } z.next_out = (unsigned char *)outbufp; z.avail_out = BUFLEN; break; case GZSTATE_CRC: { uLong origcrc; if (z.avail_in < 4) { if (!done_reading) { needmore = 1; continue; } maybe_warnx("truncated input"); goto stop_and_fail; } origcrc = ((unsigned)z.next_in[0] & 0xff) | ((unsigned)z.next_in[1] & 0xff) << 8 | ((unsigned)z.next_in[2] & 0xff) << 16 | ((unsigned)z.next_in[3] & 0xff) << 24; if (origcrc != crc) { maybe_warnx("invalid compressed" " data--crc error"); goto stop_and_fail; } } z.avail_in -= 4; z.next_in += 4; if (!z.avail_in && done_reading) { goto stop; } state++; break; case GZSTATE_LEN: { uLong origlen; if (z.avail_in < 4) { if (!done_reading) { needmore = 1; continue; } maybe_warnx("truncated input"); goto stop_and_fail; } origlen = ((unsigned)z.next_in[0] & 0xff) | ((unsigned)z.next_in[1] & 0xff) << 8 | ((unsigned)z.next_in[2] & 0xff) << 16 | ((unsigned)z.next_in[3] & 0xff) << 24; if (origlen != out_sub_tot) { maybe_warnx("invalid compressed" " data--length error"); goto stop_and_fail; } } z.avail_in -= 4; z.next_in += 4; if (error < 0) { maybe_warnx("decompression error"); goto stop_and_fail; } state = GZSTATE_MAGIC0; break; } continue; stop_and_fail: out_tot = -1; stop: break; } if (state > GZSTATE_INIT) inflateEnd(&z); free(inbufp); out1: free(outbufp); out2: if (gsizep) *gsizep = in_tot; return (out_tot); } #ifndef SMALL /* * set the owner, mode, flags & utimes using the given file descriptor. * file is only used in possible warning messages. */ static void copymodes(int fd, const struct stat *sbp, const char *file) { struct timespec times[2]; struct stat sb; /* * If we have no info on the input, give this file some * default values and return.. */ if (sbp == NULL) { mode_t mask = umask(022); (void)fchmod(fd, DEFFILEMODE & ~mask); (void)umask(mask); return; } sb = *sbp; /* if the chown fails, remove set-id bits as-per compress(1) */ if (fchown(fd, sb.st_uid, sb.st_gid) < 0) { if (errno != EPERM) maybe_warn("couldn't fchown: %s", file); sb.st_mode &= ~(S_ISUID|S_ISGID); } /* we only allow set-id and the 9 normal permission bits */ sb.st_mode &= S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO; if (fchmod(fd, sb.st_mode) < 0) maybe_warn("couldn't fchmod: %s", file); times[0] = sb.st_atim; times[1] = sb.st_mtim; if (futimens(fd, times) < 0) maybe_warn("couldn't futimens: %s", file); /* only try flags if they exist already */ if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0) maybe_warn("couldn't fchflags: %s", file); } #endif /* what sort of file is this? */ static enum filetype file_gettype(u_char *buf) { if (buf[0] == GZIP_MAGIC0 && (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1)) return FT_GZIP; else #ifndef NO_BZIP2_SUPPORT if (memcmp(buf, BZIP2_MAGIC, 3) == 0 && buf[3] >= '0' && buf[3] <= '9') return FT_BZIP2; else #endif #ifndef NO_COMPRESS_SUPPORT if (memcmp(buf, Z_MAGIC, 2) == 0) return FT_Z; else #endif #ifndef NO_PACK_SUPPORT if (memcmp(buf, PACK_MAGIC, 2) == 0) return FT_PACK; else #endif #ifndef NO_XZ_SUPPORT if (memcmp(buf, XZ_MAGIC, 4) == 0) /* XXX: We only have 4 bytes */ return FT_XZ; else #endif return FT_UNKNOWN; } #ifndef SMALL /* check the outfile is OK. */ static int check_outfile(const char *outfile) { struct stat sb; int ok = 1; if (lflag == 0 && stat(outfile, &sb) == 0) { if (fflag) unlink(outfile); else if (isatty(STDIN_FILENO)) { char ans[10] = { 'n', '\0' }; /* default */ fprintf(stderr, "%s already exists -- do you wish to " "overwrite (y or n)? " , outfile); (void)fgets(ans, sizeof(ans) - 1, stdin); if (ans[0] != 'y' && ans[0] != 'Y') { fprintf(stderr, "\tnot overwriting\n"); ok = 0; } else unlink(outfile); } else { maybe_warnx("%s already exists -- skipping", outfile); ok = 0; } } return ok; } static void unlink_input(const char *file, const struct stat *sb) { struct stat nsb; if (kflag) return; if (stat(file, &nsb) != 0) /* Must be gone already */ return; if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino) /* Definitely a different file */ return; unlink(file); } static void sigint_handler(int signo __unused) { if (remove_file != NULL) unlink(remove_file); _exit(2); } #endif static const suffixes_t * check_suffix(char *file, int xlate) { const suffixes_t *s; int len = strlen(file); char *sp; for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) { /* if it doesn't fit in "a.suf", don't bother */ if (s->ziplen >= len) continue; sp = file + len - s->ziplen; if (strcmp(s->zipped, sp) != 0) continue; if (xlate) strcpy(sp, s->normal); return s; } return NULL; } /* * compress the given file: create a corresponding .gz file and remove the * original. */ static off_t file_compress(char *file, char *outfile, size_t outsize) { int in; int out; off_t size, insize; #ifndef SMALL struct stat isb, osb; const suffixes_t *suff; #endif in = open(file, O_RDONLY); if (in == -1) { maybe_warn("can't open %s", file); return (-1); } #ifndef SMALL if (fstat(in, &isb) != 0) { maybe_warn("couldn't stat: %s", file); close(in); return (-1); } #endif if (cflag == 0) { #ifndef SMALL if (isb.st_nlink > 1 && fflag == 0) { maybe_warnx("%s has %d other link%s -- skipping", file, isb.st_nlink - 1, (isb.st_nlink - 1) == 1 ? "" : "s"); close(in); return (-1); } if (fflag == 0 && (suff = check_suffix(file, 0)) && suff->zipped[0] != 0) { maybe_warnx("%s already has %s suffix -- unchanged", file, suff->zipped); close(in); return (-1); } #endif /* Add (usually) .gz to filename */ if ((size_t)snprintf(outfile, outsize, "%s%s", file, suffixes[0].zipped) >= outsize) memcpy(outfile + outsize - suffixes[0].ziplen - 1, suffixes[0].zipped, suffixes[0].ziplen + 1); #ifndef SMALL if (check_outfile(outfile) == 0) { close(in); return (-1); } #endif } if (cflag == 0) { out = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600); if (out == -1) { maybe_warn("could not create output: %s", outfile); fclose(stdin); return (-1); } #ifndef SMALL remove_file = outfile; #endif } else out = STDOUT_FILENO; insize = gz_compress(in, out, &size, basename(file), (uint32_t)isb.st_mtime); (void)close(in); /* * If there was an error, insize will be -1. * If we compressed to stdout, just return the size. * Otherwise stat the file and check it is the correct size. * We only blow away the file if we can stat the output and it * has the expected size. */ if (cflag != 0) return (insize == -1 ? -1 : size); #ifndef SMALL if (fstat(out, &osb) != 0) { maybe_warn("couldn't stat: %s", outfile); goto bad_outfile; } if (osb.st_size != size) { maybe_warnx("output file: %s wrong size (%ju != %ju), deleting", outfile, (uintmax_t)osb.st_size, (uintmax_t)size); goto bad_outfile; } copymodes(out, &isb, outfile); remove_file = NULL; #endif if (close(out) == -1) maybe_warn("couldn't close output"); /* output is good, ok to delete input */ unlink_input(file, &isb); return (size); #ifndef SMALL bad_outfile: if (close(out) == -1) maybe_warn("couldn't close output"); maybe_warnx("leaving original %s", file); unlink(outfile); return (size); #endif } /* uncompress the given file and remove the original */ static off_t file_uncompress(char *file, char *outfile, size_t outsize) { struct stat isb, osb; off_t size; ssize_t rbytes; unsigned char header1[4]; enum filetype method; int fd, ofd, zfd = -1; #ifndef SMALL ssize_t rv; time_t timestamp = 0; char name[PATH_MAX + 1]; #endif /* gather the old name info */ fd = open(file, O_RDONLY); if (fd < 0) { maybe_warn("can't open %s", file); goto lose; } strlcpy(outfile, file, outsize); if (check_suffix(outfile, 1) == NULL && !(cflag || lflag)) { maybe_warnx("%s: unknown suffix -- ignored", file); goto lose; } rbytes = read(fd, header1, sizeof header1); if (rbytes != sizeof header1) { /* we don't want to fail here. */ #ifndef SMALL if (fflag) goto lose; #endif if (rbytes == -1) maybe_warn("can't read %s", file); else goto unexpected_EOF; goto lose; } method = file_gettype(header1); #ifndef SMALL if (fflag == 0 && method == FT_UNKNOWN) { maybe_warnx("%s: not in gzip format", file); goto lose; } #endif #ifndef SMALL if (method == FT_GZIP && Nflag) { unsigned char ts[4]; /* timestamp */ rv = pread(fd, ts, sizeof ts, GZIP_TIMESTAMP); if (rv >= 0 && rv < (ssize_t)(sizeof ts)) goto unexpected_EOF; if (rv == -1) { if (!fflag) maybe_warn("can't read %s", file); goto lose; } timestamp = ts[3] << 24 | ts[2] << 16 | ts[1] << 8 | ts[0]; if (header1[3] & ORIG_NAME) { rbytes = pread(fd, name, sizeof(name) - 1, GZIP_ORIGNAME); if (rbytes < 0) { maybe_warn("can't read %s", file); goto lose; } if (name[0] != '\0') { char *dp, *nf; /* Make sure that name is NUL-terminated */ name[rbytes] = '\0'; /* strip saved directory name */ nf = strrchr(name, '/'); if (nf == NULL) nf = name; else nf++; /* preserve original directory name */ dp = strrchr(file, '/'); if (dp == NULL) dp = file; else dp++; snprintf(outfile, outsize, "%.*s%.*s", (int) (dp - file), file, (int) rbytes, nf); } } } #endif lseek(fd, 0, SEEK_SET); if (cflag == 0 || lflag) { if (fstat(fd, &isb) != 0) goto lose; #ifndef SMALL if (isb.st_nlink > 1 && lflag == 0 && fflag == 0) { maybe_warnx("%s has %d other links -- skipping", file, isb.st_nlink - 1); goto lose; } if (nflag == 0 && timestamp) isb.st_mtime = timestamp; if (check_outfile(outfile) == 0) goto lose; #endif } if (cflag == 0 && lflag == 0) { zfd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600); if (zfd == STDOUT_FILENO) { /* We won't close STDOUT_FILENO later... */ zfd = dup(zfd); close(STDOUT_FILENO); } if (zfd == -1) { maybe_warn("can't open %s", outfile); goto lose; } #ifndef SMALL remove_file = outfile; #endif } else zfd = STDOUT_FILENO; switch (method) { #ifndef NO_BZIP2_SUPPORT case FT_BZIP2: /* XXX */ if (lflag) { maybe_warnx("no -l with bzip2 files"); goto lose; } size = unbzip2(fd, zfd, NULL, 0, NULL); break; #endif #ifndef NO_COMPRESS_SUPPORT case FT_Z: { FILE *in, *out; /* XXX */ if (lflag) { maybe_warnx("no -l with Lempel-Ziv files"); goto lose; } if ((in = zdopen(fd)) == NULL) { maybe_warn("zdopen for read: %s", file); goto lose; } out = fdopen(dup(zfd), "w"); if (out == NULL) { maybe_warn("fdopen for write: %s", outfile); fclose(in); goto lose; } size = zuncompress(in, out, NULL, 0, NULL); /* need to fclose() if ferror() is true... */ if (ferror(in) | fclose(in)) { maybe_warn("failed infile fclose"); unlink(outfile); (void)fclose(out); } if (fclose(out) != 0) { maybe_warn("failed outfile fclose"); unlink(outfile); goto lose; } break; } #endif #ifndef NO_PACK_SUPPORT case FT_PACK: if (lflag) { maybe_warnx("no -l with packed files"); goto lose; } size = unpack(fd, zfd, NULL, 0, NULL); break; #endif #ifndef NO_XZ_SUPPORT case FT_XZ: if (lflag) { maybe_warnx("no -l with xz files"); goto lose; } size = unxz(fd, zfd, NULL, 0, NULL); break; #endif #ifndef SMALL case FT_UNKNOWN: if (lflag) { maybe_warnx("no -l for unknown filetypes"); goto lose; } size = cat_fd(NULL, 0, NULL, fd); break; #endif default: if (lflag) { print_list(fd, isb.st_size, outfile, isb.st_mtime); close(fd); return -1; /* XXX */ } size = gz_uncompress(fd, zfd, NULL, 0, NULL, file); break; } if (close(fd) != 0) maybe_warn("couldn't close input"); if (zfd != STDOUT_FILENO && close(zfd) != 0) maybe_warn("couldn't close output"); if (size == -1) { if (cflag == 0) unlink(outfile); maybe_warnx("%s: uncompress failed", file); return -1; } /* if testing, or we uncompressed to stdout, this is all we need */ #ifndef SMALL if (tflag) return size; #endif /* if we are uncompressing to stdin, don't remove the file. */ if (cflag) return size; /* * if we create a file... */ /* * if we can't stat the file don't remove the file. */ ofd = open(outfile, O_RDWR, 0); if (ofd == -1) { maybe_warn("couldn't open (leaving original): %s", outfile); return -1; } if (fstat(ofd, &osb) != 0) { maybe_warn("couldn't stat (leaving original): %s", outfile); close(ofd); return -1; } if (osb.st_size != size) { maybe_warnx("stat gave different size: %ju != %ju (leaving original)", (uintmax_t)size, (uintmax_t)osb.st_size); close(ofd); unlink(outfile); return -1; } #ifndef SMALL copymodes(ofd, &isb, outfile); remove_file = NULL; #endif close(ofd); unlink_input(file, &isb); return size; unexpected_EOF: maybe_warnx("%s: unexpected end of file", file); lose: if (fd != -1) close(fd); if (zfd != -1 && zfd != STDOUT_FILENO) close(fd); return -1; } #ifndef SMALL static off_t cat_fd(unsigned char * prepend, size_t count, off_t *gsizep, int fd) { char buf[BUFLEN]; off_t in_tot; ssize_t w; in_tot = count; w = write(STDOUT_FILENO, prepend, count); if (w == -1 || (size_t)w != count) { maybe_warn("write to stdout"); return -1; } for (;;) { ssize_t rv; rv = read(fd, buf, sizeof buf); if (rv == 0) break; if (rv < 0) { maybe_warn("read from fd %d", fd); break; } if (write(STDOUT_FILENO, buf, rv) != rv) { maybe_warn("write to stdout"); break; } in_tot += rv; } if (gsizep) *gsizep = in_tot; return (in_tot); } #endif static void handle_stdin(void) { unsigned char header1[4]; off_t usize, gsize; enum filetype method; ssize_t bytes_read; #ifndef NO_COMPRESS_SUPPORT FILE *in; #endif #ifndef SMALL if (fflag == 0 && lflag == 0 && isatty(STDIN_FILENO)) { maybe_warnx("standard input is a terminal -- ignoring"); return; } #endif if (lflag) { struct stat isb; /* XXX could read the whole file, etc. */ if (fstat(STDIN_FILENO, &isb) < 0) { maybe_warn("fstat"); return; } print_list(STDIN_FILENO, isb.st_size, "stdout", isb.st_mtime); return; } bytes_read = read_retry(STDIN_FILENO, header1, sizeof header1); if (bytes_read == -1) { maybe_warn("can't read stdin"); return; } else if (bytes_read != sizeof(header1)) { maybe_warnx("(stdin): unexpected end of file"); return; } method = file_gettype(header1); switch (method) { default: #ifndef SMALL if (fflag == 0) { maybe_warnx("unknown compression format"); return; } usize = cat_fd(header1, sizeof header1, &gsize, STDIN_FILENO); break; #endif case FT_GZIP: usize = gz_uncompress(STDIN_FILENO, STDOUT_FILENO, (char *)header1, sizeof header1, &gsize, "(stdin)"); break; #ifndef NO_BZIP2_SUPPORT case FT_BZIP2: usize = unbzip2(STDIN_FILENO, STDOUT_FILENO, (char *)header1, sizeof header1, &gsize); break; #endif #ifndef NO_COMPRESS_SUPPORT case FT_Z: if ((in = zdopen(STDIN_FILENO)) == NULL) { maybe_warnx("zopen of stdin"); return; } usize = zuncompress(in, stdout, (char *)header1, sizeof header1, &gsize); fclose(in); break; #endif #ifndef NO_PACK_SUPPORT case FT_PACK: usize = unpack(STDIN_FILENO, STDOUT_FILENO, (char *)header1, sizeof header1, &gsize); break; #endif #ifndef NO_XZ_SUPPORT case FT_XZ: usize = unxz(STDIN_FILENO, STDOUT_FILENO, (char *)header1, sizeof header1, &gsize); break; #endif } #ifndef SMALL if (vflag && !tflag && usize != -1 && gsize != -1) print_verbage(NULL, NULL, usize, gsize); if (vflag && tflag) print_test("(stdin)", usize != -1); #endif } static void handle_stdout(void) { off_t gsize, usize; struct stat sb; time_t systime; uint32_t mtime; int ret; #ifndef SMALL if (fflag == 0 && isatty(STDOUT_FILENO)) { maybe_warnx("standard output is a terminal -- ignoring"); return; } #endif /* If stdin is a file use its mtime, otherwise use current time */ ret = fstat(STDIN_FILENO, &sb); #ifndef SMALL if (ret < 0) { maybe_warn("Can't stat stdin"); return; } #endif if (S_ISREG(sb.st_mode)) mtime = (uint32_t)sb.st_mtime; else { systime = time(NULL); #ifndef SMALL if (systime == -1) { maybe_warn("time"); return; } #endif mtime = (uint32_t)systime; } usize = gz_compress(STDIN_FILENO, STDOUT_FILENO, &gsize, "", mtime); #ifndef SMALL if (vflag && !tflag && usize != -1 && gsize != -1) print_verbage(NULL, NULL, usize, gsize); #endif } /* do what is asked for, for the path name */ static void handle_pathname(char *path) { char *opath = path, *s = NULL; ssize_t len; int slen; struct stat sb; /* check for stdout/stdin */ if (path[0] == '-' && path[1] == '\0') { if (dflag) handle_stdin(); else handle_stdout(); return; } retry: if (stat(path, &sb) != 0 || (fflag == 0 && cflag == 0 && lstat(path, &sb) != 0)) { /* lets try .gz if we're decompressing */ if (dflag && s == NULL && errno == ENOENT) { len = strlen(path); slen = suffixes[0].ziplen; s = malloc(len + slen + 1); if (s == NULL) maybe_err("malloc"); memcpy(s, path, len); memcpy(s + len, suffixes[0].zipped, slen + 1); path = s; goto retry; } maybe_warn("can't stat: %s", opath); goto out; } if (S_ISDIR(sb.st_mode)) { #ifndef SMALL if (rflag) handle_dir(path); else #endif maybe_warnx("%s is a directory", path); goto out; } if (S_ISREG(sb.st_mode)) handle_file(path, &sb); else maybe_warnx("%s is not a regular file", path); out: if (s) free(s); } /* compress/decompress a file */ static void handle_file(char *file, struct stat *sbp) { off_t usize, gsize; char outfile[PATH_MAX]; infile = file; if (dflag) { usize = file_uncompress(file, outfile, sizeof(outfile)); #ifndef SMALL if (vflag && tflag) print_test(file, usize != -1); #endif if (usize == -1) return; gsize = sbp->st_size; } else { gsize = file_compress(file, outfile, sizeof(outfile)); if (gsize == -1) return; usize = sbp->st_size; } #ifndef SMALL if (vflag && !tflag) print_verbage(file, (cflag) ? NULL : outfile, usize, gsize); #endif } #ifndef SMALL /* this is used with -r to recursively descend directories */ static void handle_dir(char *dir) { char *path_argv[2]; FTS *fts; FTSENT *entry; path_argv[0] = dir; path_argv[1] = 0; fts = fts_open(path_argv, FTS_PHYSICAL | FTS_NOCHDIR, NULL); if (fts == NULL) { warn("couldn't fts_open %s", dir); return; } while ((entry = fts_read(fts))) { switch(entry->fts_info) { case FTS_D: case FTS_DP: continue; case FTS_DNR: case FTS_ERR: case FTS_NS: maybe_warn("%s", entry->fts_path); continue; case FTS_F: handle_file(entry->fts_path, entry->fts_statp); } } (void)fts_close(fts); } #endif /* print a ratio - size reduction as a fraction of uncompressed size */ static void print_ratio(off_t in, off_t out, FILE *where) { int percent10; /* 10 * percent */ off_t diff; char buff[8]; int len; diff = in - out/2; if (diff <= 0) /* * Output is more than double size of input! print -99.9% * Quite possibly we've failed to get the original size. */ percent10 = -999; else { /* * We only need 12 bits of result from the final division, * so reduce the values until a 32bit division will suffice. */ while (in > 0x100000) { diff >>= 1; in >>= 1; } if (in != 0) percent10 = ((u_int)diff * 2000) / (u_int)in - 1000; else percent10 = 0; } len = snprintf(buff, sizeof buff, "%2.2d.", percent10); /* Move the '.' to before the last digit */ buff[len - 1] = buff[len - 2]; buff[len - 2] = '.'; fprintf(where, "%5s%%", buff); } #ifndef SMALL /* print compression statistics, and the new name (if there is one!) */ static void print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize) { if (file) fprintf(stderr, "%s:%s ", file, strlen(file) < 7 ? "\t\t" : "\t"); print_ratio(usize, gsize, stderr); if (nfile) fprintf(stderr, " -- replaced with %s", nfile); fprintf(stderr, "\n"); fflush(stderr); } /* print test results */ static void print_test(const char *file, int ok) { if (exit_value == 0 && ok == 0) exit_value = 1; fprintf(stderr, "%s:%s %s\n", file, strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK"); fflush(stderr); } #endif /* print a file's info ala --list */ /* eg: compressed uncompressed ratio uncompressed_name 354841 1679360 78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar */ static void print_list(int fd, off_t out, const char *outfile, time_t ts) { static int first = 1; #ifndef SMALL static off_t in_tot, out_tot; uint32_t crc = 0; #endif off_t in = 0, rv; if (first) { #ifndef SMALL if (vflag) printf("method crc date time "); #endif if (qflag == 0) printf(" compressed uncompressed " "ratio uncompressed_name\n"); } first = 0; /* print totals? */ #ifndef SMALL if (fd == -1) { in = in_tot; out = out_tot; } else #endif { /* read the last 4 bytes - this is the uncompressed size */ rv = lseek(fd, (off_t)(-8), SEEK_END); if (rv != -1) { unsigned char buf[8]; uint32_t usize; rv = read(fd, (char *)buf, sizeof(buf)); if (rv == -1) maybe_warn("read of uncompressed size"); else if (rv != sizeof(buf)) maybe_warnx("read of uncompressed size"); else { usize = buf[4] | buf[5] << 8 | buf[6] << 16 | buf[7] << 24; in = (off_t)usize; #ifndef SMALL crc = buf[0] | buf[1] << 8 | buf[2] << 16 | buf[3] << 24; #endif } } } #ifndef SMALL if (vflag && fd == -1) printf(" "); else if (vflag) { char *date = ctime(&ts); /* skip the day, 1/100th second, and year */ date += 4; date[12] = 0; printf("%5s %08x %11s ", "defla"/*XXX*/, crc, date); } in_tot += in; out_tot += out; #else (void)&ts; /* XXX */ #endif printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in); print_ratio(in, out, stdout); printf(" %s\n", outfile); } /* display the usage of NetBSD gzip */ static void usage(void) { fprintf(stderr, "%s\n", gzip_version); fprintf(stderr, #ifdef SMALL "usage: %s [-" OPT_LIST "] [ [ ...]]\n", #else "usage: %s [-123456789acdfhklLNnqrtVv] [-S .suffix] [ [ ...]]\n" " -1 --fast fastest (worst) compression\n" " -2 .. -8 set compression level\n" " -9 --best best (slowest) compression\n" " -c --stdout write to stdout, keep original files\n" " --to-stdout\n" " -d --decompress uncompress files\n" " --uncompress\n" " -f --force force overwriting & compress links\n" " -h --help display this help\n" " -k --keep don't delete input files during operation\n" " -l --list list compressed file contents\n" " -N --name save or restore original file name and time stamp\n" " -n --no-name don't save original file name or time stamp\n" " -q --quiet output no warnings\n" " -r --recursive recursively compress files in directories\n" " -S .suf use suffix .suf instead of .gz\n" " --suffix .suf\n" " -t --test test compressed file\n" " -V --version display program version\n" " -v --verbose print extra statistics\n", #endif getprogname()); exit(0); } #ifndef SMALL /* display the license information of FreeBSD gzip */ static void display_license(void) { fprintf(stderr, "%s (based on NetBSD gzip 20150113)\n", gzip_version); fprintf(stderr, "%s\n", gzip_copyright); exit(0); } #endif /* display the version of NetBSD gzip */ static void display_version(void) { fprintf(stderr, "%s\n", gzip_version); exit(0); } #ifndef NO_BZIP2_SUPPORT #include "unbzip2.c" #endif #ifndef NO_COMPRESS_SUPPORT #include "zuncompress.c" #endif #ifndef NO_PACK_SUPPORT #include "unpack.c" #endif #ifndef NO_XZ_SUPPORT #include "unxz.c" #endif static ssize_t read_retry(int fd, void *buf, size_t sz) { char *cp = buf; size_t left = MIN(sz, (size_t) SSIZE_MAX); while (left > 0) { ssize_t ret; ret = read(fd, cp, left); if (ret == -1) { return ret; } else if (ret == 0) { break; /* EOF */ } cp += ret; left -= ret; } return sz - left; } Index: user/alc/PQ_LAUNDRY/usr.bin/indent/args.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/args.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/args.c (revision 303642) @@ -1,327 +1,325 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)args.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Argument scanning and profile reading code. Default parameters are set * here as well. */ #include #include #include #include #include #include #include "indent_globs.h" #include "indent.h" /* profile types */ #define PRO_SPECIAL 1 /* special case */ #define PRO_BOOL 2 /* boolean */ #define PRO_INT 3 /* integer */ #define PRO_FONT 4 /* troff font */ /* profile specials for booleans */ #define ON 1 /* turn it on */ #define OFF 0 /* turn it off */ /* profile specials for specials */ #define IGN 1 /* ignore it */ #define CLI 2 /* case label indent (float) */ #define STDIN 3 /* use stdin */ #define KEY 4 /* type (keyword) */ static void scan_profile(FILE *); const char *option_source = "?"; /* * N.B.: because of the way the table here is scanned, options whose names are * substrings of other options must occur later; that is, with -lp vs -l, -lp * must be first. Also, while (most) booleans occur more than once, the last * default value is the one actually assigned. */ struct pro { const char *p_name; /* name, e.g. -bl, -cli */ int p_type; /* type (int, bool, special) */ int p_default; /* the default value (if int) */ int p_special; /* depends on type */ int *p_obj; /* the associated variable */ } pro[] = { {"T", PRO_SPECIAL, 0, KEY, 0}, {"bacc", PRO_BOOL, false, ON, &blanklines_around_conditional_compilation}, {"badp", PRO_BOOL, false, ON, &blanklines_after_declarations_at_proctop}, {"bad", PRO_BOOL, false, ON, &blanklines_after_declarations}, {"bap", PRO_BOOL, false, ON, &blanklines_after_procs}, {"bbb", PRO_BOOL, false, ON, &blanklines_before_blockcomments}, {"bc", PRO_BOOL, true, OFF, &ps.leave_comma}, {"bl", PRO_BOOL, true, OFF, &btype_2}, {"br", PRO_BOOL, true, ON, &btype_2}, {"bs", PRO_BOOL, false, ON, &Bill_Shannon}, {"cdb", PRO_BOOL, true, ON, &comment_delimiter_on_blankline}, {"cd", PRO_INT, 0, 0, &ps.decl_com_ind}, {"ce", PRO_BOOL, true, ON, &cuddle_else}, {"ci", PRO_INT, 0, 0, &continuation_indent}, {"cli", PRO_SPECIAL, 0, CLI, 0}, {"c", PRO_INT, 33, 0, &ps.com_ind}, {"di", PRO_INT, 16, 0, &ps.decl_indent}, {"dj", PRO_BOOL, false, ON, &ps.ljust_decl}, {"d", PRO_INT, 0, 0, &ps.unindent_displace}, {"eei", PRO_BOOL, false, ON, &extra_expression_indent}, {"ei", PRO_BOOL, true, ON, &ps.else_if}, {"fbc", PRO_FONT, 0, 0, (int *) &blkcomf}, {"fbs", PRO_BOOL, true, ON, &function_brace_split}, {"fbx", PRO_FONT, 0, 0, (int *) &boxcomf}, {"fb", PRO_FONT, 0, 0, (int *) &bodyf}, {"fc1", PRO_BOOL, true, ON, &format_col1_comments}, {"fcb", PRO_BOOL, true, ON, &format_block_comments}, {"fc", PRO_FONT, 0, 0, (int *) &scomf}, {"fk", PRO_FONT, 0, 0, (int *) &keywordf}, {"fs", PRO_FONT, 0, 0, (int *) &stringf}, {"ip", PRO_BOOL, true, ON, &ps.indent_parameters}, {"i", PRO_INT, 8, 0, &ps.ind_size}, {"lc", PRO_INT, 0, 0, &block_comment_max_col}, {"ldi", PRO_INT, -1, 0, &ps.local_decl_indent}, {"lp", PRO_BOOL, true, ON, &lineup_to_parens}, {"l", PRO_INT, 78, 0, &max_col}, {"nbacc", PRO_BOOL, false, OFF, &blanklines_around_conditional_compilation}, {"nbadp", PRO_BOOL, false, OFF, &blanklines_after_declarations_at_proctop}, {"nbad", PRO_BOOL, false, OFF, &blanklines_after_declarations}, {"nbap", PRO_BOOL, false, OFF, &blanklines_after_procs}, {"nbbb", PRO_BOOL, false, OFF, &blanklines_before_blockcomments}, {"nbc", PRO_BOOL, true, ON, &ps.leave_comma}, {"nbs", PRO_BOOL, false, OFF, &Bill_Shannon}, {"ncdb", PRO_BOOL, true, OFF, &comment_delimiter_on_blankline}, {"nce", PRO_BOOL, true, OFF, &cuddle_else}, {"ndj", PRO_BOOL, false, OFF, &ps.ljust_decl}, {"neei", PRO_BOOL, false, OFF, &extra_expression_indent}, {"nei", PRO_BOOL, true, OFF, &ps.else_if}, {"nfbs", PRO_BOOL, true, OFF, &function_brace_split}, {"nfc1", PRO_BOOL, true, OFF, &format_col1_comments}, {"nfcb", PRO_BOOL, true, OFF, &format_block_comments}, {"nip", PRO_BOOL, true, OFF, &ps.indent_parameters}, {"nlp", PRO_BOOL, true, OFF, &lineup_to_parens}, {"npcs", PRO_BOOL, false, OFF, &proc_calls_space}, {"npro", PRO_SPECIAL, 0, IGN, 0}, {"npsl", PRO_BOOL, true, OFF, &procnames_start_line}, {"nps", PRO_BOOL, false, OFF, &pointer_as_binop}, {"nsc", PRO_BOOL, true, OFF, &star_comment_cont}, {"nsob", PRO_BOOL, false, OFF, &swallow_optional_blanklines}, {"nut", PRO_BOOL, true, OFF, &use_tabs}, {"nv", PRO_BOOL, false, OFF, &verbose}, {"pcs", PRO_BOOL, false, ON, &proc_calls_space}, {"psl", PRO_BOOL, true, ON, &procnames_start_line}, {"ps", PRO_BOOL, false, ON, &pointer_as_binop}, {"sc", PRO_BOOL, true, ON, &star_comment_cont}, {"sob", PRO_BOOL, false, ON, &swallow_optional_blanklines}, {"st", PRO_SPECIAL, 0, STDIN, 0}, {"ta", PRO_BOOL, false, ON, &auto_typedefs}, {"troff", PRO_BOOL, false, ON, &troff}, {"ut", PRO_BOOL, true, ON, &use_tabs}, {"v", PRO_BOOL, false, ON, &verbose}, /* whew! */ {0, 0, 0, 0, 0} }; /* * set_profile reads $HOME/.indent.pro and ./.indent.pro and handles arguments * given in these files. */ void set_profile(void) { FILE *f; char fname[PATH_MAX]; static char prof[] = ".indent.pro"; snprintf(fname, sizeof(fname), "%s/%s", getenv("HOME"), prof); if ((f = fopen(option_source = fname, "r")) != NULL) { scan_profile(f); (void) fclose(f); } if ((f = fopen(option_source = prof, "r")) != NULL) { scan_profile(f); (void) fclose(f); } option_source = "Command line"; } static void scan_profile(FILE *f) { int comment, i; char *p; char buf[BUFSIZ]; while (1) { p = buf; comment = 0; while ((i = getc(f)) != EOF) { if (i == '*' && !comment && p > buf && p[-1] == '/') { comment = p - buf; *p++ = i; } else if (i == '/' && comment && p > buf && p[-1] == '*') { p = buf + comment - 1; comment = 0; } else if (isspace(i)) { if (p > buf && !comment) break; } else { *p++ = i; } } if (p != buf) { *p++ = 0; if (verbose) printf("profile: %s\n", buf); set_option(buf); } else if (i == EOF) return; } } -const char *param_start; - -static int +static const char * eqin(const char *s1, const char *s2) { while (*s1) { if (*s1++ != *s2++) - return (false); + return (NULL); } - param_start = s2; - return (true); + return (s2); } /* * Set the defaults. */ void set_defaults(void) { struct pro *p; /* * Because ps.case_indent is a float, we can't initialize it from the * table: */ ps.case_indent = 0.0; /* -cli0.0 */ for (p = pro; p->p_name; p++) if (p->p_type != PRO_SPECIAL && p->p_type != PRO_FONT) *p->p_obj = p->p_default; } void set_option(char *arg) { - struct pro *p; + struct pro *p; + const char *param_start; arg++; /* ignore leading "-" */ for (p = pro; p->p_name; p++) - if (*p->p_name == *arg && eqin(p->p_name, arg)) + if (*p->p_name == *arg && (param_start = eqin(p->p_name, arg)) != NULL) goto found; errx(1, "%s: unknown parameter \"%s\"", option_source, arg - 1); found: switch (p->p_type) { case PRO_SPECIAL: switch (p->p_special) { case IGN: break; case CLI: if (*param_start == 0) goto need_param; ps.case_indent = atof(param_start); break; case STDIN: if (input == NULL) input = stdin; if (output == NULL) output = stdout; break; case KEY: if (*param_start == 0) goto need_param; { char *str = strdup(param_start); if (str == NULL) err(1, NULL); addkey(str, 4); } break; default: errx(1, "set_option: internal error: p_special %d", p->p_special); } break; case PRO_BOOL: if (p->p_special == OFF) *p->p_obj = false; else *p->p_obj = true; break; case PRO_INT: if (!isdigit(*param_start)) { need_param: errx(1, "%s: ``%s'' requires a parameter", option_source, arg - 1); } *p->p_obj = atoi(param_start); break; case PRO_FONT: parsefont((struct fstate *) p->p_obj, param_start); break; default: errx(1, "set_option: internal error: p_type %d", p->p_type); } } Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/indent.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent.c (revision 303642) @@ -1,1240 +1,1247 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1976 Board of Trustees of the University of Illinois. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\ @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\ @(#) Copyright (c) 1980, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #if 0 #ifndef lint static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" static void bakcopy(void); +static void indent_declaration(int, int); const char *in_name = "Standard Input"; /* will always point to name of input * file */ const char *out_name = "Standard Output"; /* will always point to name * of output file */ char bakfile[MAXPATHLEN] = ""; int main(int argc, char **argv) { int dec_ind; /* current indentation for declarations */ int di_stack[20]; /* a stack of structure indentation levels */ int flushed_nl; /* used when buffering up comments to remember * that a newline was passed over */ int force_nl; /* when true, code must be broken */ int hd_type = 0; /* used to store type of stmt for if (...), * for (...), etc */ int i; /* local loop counter */ int scase; /* set to true when we see a case, so we will * know what to do with the following colon */ int sp_sw; /* when true, we are in the expression of * if(...), while(...), etc. */ int squest; /* when this is positive, we have seen a ? * without the matching : in a ?: * construct */ const char *t_ptr; /* used for copying tokens */ int tabs_to_var; /* true if using tabs to indent to var name */ int type_code; /* the type of token, returned by lexi */ int last_else = 0; /* true iff last keyword was an else */ /*-----------------------------------------------*\ | INITIALIZATION | \*-----------------------------------------------*/ found_err = 0; ps.p_stack[0] = stmt; /* this is the parser's stack */ ps.last_nl = true; /* this is true if the last thing scanned was * a newline */ ps.last_token = semicolon; combuf = (char *) malloc(bufsize); if (combuf == NULL) err(1, NULL); labbuf = (char *) malloc(bufsize); if (labbuf == NULL) err(1, NULL); codebuf = (char *) malloc(bufsize); if (codebuf == NULL) err(1, NULL); tokenbuf = (char *) malloc(bufsize); if (tokenbuf == NULL) err(1, NULL); l_com = combuf + bufsize - 5; l_lab = labbuf + bufsize - 5; l_code = codebuf + bufsize - 5; l_token = tokenbuf + bufsize - 5; combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and * comment buffers */ combuf[1] = codebuf[1] = labbuf[1] = '\0'; ps.else_if = 1; /* Default else-if special processing to on */ s_lab = e_lab = labbuf + 1; s_code = e_code = codebuf + 1; s_com = e_com = combuf + 1; s_token = e_token = tokenbuf + 1; in_buffer = (char *) malloc(10); if (in_buffer == NULL) err(1, NULL); in_buffer_limit = in_buffer + 8; buf_ptr = buf_end = in_buffer; line_no = 1; had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; sp_sw = force_nl = false; ps.in_or_st = false; ps.bl_line = true; dec_ind = 0; di_stack[ps.dec_nest = 0] = 0; ps.want_blank = ps.in_stmt = ps.ind_stmt = false; scase = ps.pcase = false; squest = 0; sc_end = NULL; bp_save = NULL; be_save = NULL; output = NULL; tabs_to_var = 0; /*--------------------------------------------------*\ | COMMAND LINE SCAN | \*--------------------------------------------------*/ #ifdef undef max_col = 78; /* -l78 */ lineup_to_parens = 1; /* -lp */ ps.ljust_decl = 0; /* -ndj */ ps.com_ind = 33; /* -c33 */ star_comment_cont = 1; /* -sc */ ps.ind_size = 8; /* -i8 */ verbose = 0; ps.decl_indent = 16; /* -di16 */ ps.local_decl_indent = -1; /* if this is not set to some nonnegative value * by an arg, we will set this equal to * ps.decl_ind */ ps.indent_parameters = 1; /* -ip */ ps.decl_com_ind = 0; /* if this is not set to some positive value * by an arg, we will set this equal to * ps.com_ind */ btype_2 = 1; /* -br */ cuddle_else = 1; /* -ce */ ps.unindent_displace = 0; /* -d0 */ ps.case_indent = 0; /* -cli0 */ format_block_comments = 1; /* -fcb */ format_col1_comments = 1; /* -fc1 */ procnames_start_line = 1; /* -psl */ proc_calls_space = 0; /* -npcs */ comment_delimiter_on_blankline = 1; /* -cdb */ ps.leave_comma = 1; /* -nbc */ #endif for (i = 1; i < argc; ++i) if (strcmp(argv[i], "-npro") == 0) break; set_defaults(); if (i >= argc) set_profile(); for (i = 1; i < argc; ++i) { /* * look thru args (if any) for changes to defaults */ if (argv[i][0] != '-') {/* no flag on parameter */ if (input == NULL) { /* we must have the input file */ in_name = argv[i]; /* remember name of input file */ input = fopen(in_name, "r"); if (input == NULL) /* check for open error */ err(1, "%s", in_name); continue; } else if (output == NULL) { /* we have the output file */ out_name = argv[i]; /* remember name of output file */ if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite * the file */ errx(1, "input and output files must be different"); } output = fopen(out_name, "w"); if (output == NULL) /* check for create error */ err(1, "%s", out_name); continue; } errx(1, "unknown parameter: %s", argv[i]); } else set_option(argv[i]); } /* end of for */ if (input == NULL) input = stdin; if (output == NULL) { if (troff || input == stdin) output = stdout; else { out_name = in_name; bakcopy(); } } if (ps.com_ind <= 1) ps.com_ind = 2; /* dont put normal comments before column 2 */ if (troff) { if (bodyf.font[0] == 0) parsefont(&bodyf, "R"); if (scomf.font[0] == 0) parsefont(&scomf, "I"); if (blkcomf.font[0] == 0) blkcomf = scomf, blkcomf.size += 2; if (boxcomf.font[0] == 0) boxcomf = blkcomf; if (stringf.font[0] == 0) parsefont(&stringf, "L"); if (keywordf.font[0] == 0) parsefont(&keywordf, "B"); writefdef(&bodyf, 'B'); writefdef(&scomf, 'C'); writefdef(&blkcomf, 'L'); writefdef(&boxcomf, 'X'); writefdef(&stringf, 'S'); writefdef(&keywordf, 'K'); } if (block_comment_max_col <= 0) block_comment_max_col = max_col; if (ps.local_decl_indent < 0) /* if not specified by user, set this */ ps.local_decl_indent = ps.decl_indent; if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; if (continuation_indent == 0) continuation_indent = ps.ind_size; fill_buffer(); /* get first batch of stuff into input buffer */ parse(semicolon); { char *p = buf_ptr; int col = 1; while (1) { if (*p == ' ') col++; else if (*p == '\t') col = ((col - 1) & ~7) + 9; else break; p++; } if (col > ps.ind_size) ps.ind_level = ps.i_l_follow = col / ps.ind_size; } if (troff) { const char *p = in_name, *beg = in_name; while (*p) if (*p++ == '/') beg = p; fprintf(output, ".Fn \"%s\"\n", beg); } /* * START OF MAIN LOOP */ while (1) { /* this is the main loop. it will go until we * reach eof */ int is_procname; type_code = lexi(); /* lexi reads one token. The actual * characters read are stored in "token". lexi * returns a code indicating the type of token */ is_procname = ps.procname[0]; /* * The following code moves everything following an if (), while (), * else, etc. up to the start of the following stmt to a buffer. This * allows proper handling of both kinds of brace placement. */ flushed_nl = false; while (ps.search_brace) { /* if we scanned an if(), while(), * etc., we might need to copy stuff * into a buffer we must loop, copying * stuff into save_com, until we find * the start of the stmt which follows * the if, or whatever */ switch (type_code) { case newline: ++line_no; if (sc_end != NULL) goto sw_buffer; /* dump comment, if any */ flushed_nl = true; case form_feed: break; /* form feeds and newlines found here will be * ignored */ case lbrace: /* this is a brace that starts the compound * stmt */ if (sc_end == NULL) { /* ignore buffering if a comment wasn't * stored up */ ps.search_brace = false; goto check_type; } if (btype_2) { save_com[0] = '{'; /* we either want to put the brace * right after the if */ goto sw_buffer; /* go to common code to get out of * this loop */ } case comment: /* we have a comment, so we must copy it into * the buffer */ if (!flushed_nl || sc_end != NULL) { - if (sc_end == NULL) { /* if this is the first comment, we - * must set up the buffer */ + if (sc_end == NULL) { /* if this is the first comment, we + * must set up the buffer */ save_com[0] = save_com[1] = ' '; sc_end = &(save_com[2]); } else { *sc_end++ = '\n'; /* add newline between * comments */ *sc_end++ = ' '; --line_no; } *sc_end++ = '/'; /* copy in start of comment */ *sc_end++ = '*'; for (;;) { /* loop until we get to the end of the comment */ *sc_end = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*sc_end++ == '*' && *buf_ptr == '/') break; /* we are at end of comment */ if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer * overflow */ diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); fflush(output); exit(1); } } *sc_end++ = '/'; /* add ending slash */ if (++buf_ptr >= buf_end) /* get past / in buffer */ fill_buffer(); break; } default: /* it is the start of a normal statement */ if (flushed_nl) /* if we flushed a newline, make sure it is * put back */ force_nl = true; if ((type_code == sp_paren && *token == 'i' && last_else && ps.else_if) || (type_code == sp_nparen && *token == 'e' && e_code != s_code && e_code[-1] == '}')) force_nl = false; if (sc_end == NULL) { /* ignore buffering if comment wasn't * saved up */ ps.search_brace = false; goto check_type; } if (force_nl) { /* if we should insert a nl here, put it into * the buffer */ force_nl = false; --line_no; /* this will be re-increased when the nl is * read from the buffer */ *sc_end++ = '\n'; *sc_end++ = ' '; if (verbose && !flushed_nl) /* print error msg if the line * was not already broken */ diag2(0, "Line broken"); flushed_nl = false; } for (t_ptr = token; *t_ptr; ++t_ptr) *sc_end++ = *t_ptr; /* copy token into temp buffer */ ps.procname[0] = 0; sw_buffer: ps.search_brace = false; /* stop looking for start of * stmt */ bp_save = buf_ptr; /* save current input buffer */ be_save = buf_end; buf_ptr = save_com; /* fix so that subsequent calls to * lexi will take tokens out of * save_com */ *sc_end++ = ' ';/* add trailing blank, just in case */ buf_end = sc_end; sc_end = NULL; break; } /* end of switch */ if (type_code != 0) /* we must make this check, just in case there * was an unexpected EOF */ type_code = lexi(); /* read another token */ /* if (ps.search_brace) ps.procname[0] = 0; */ if ((is_procname = ps.procname[0]) && flushed_nl && !procnames_start_line && ps.in_decl && type_code == ident) flushed_nl = 0; } /* end of while (search_brace) */ last_else = 0; check_type: if (type_code == 0) { /* we got eof */ if (s_lab != e_lab || s_code != e_code || s_com != e_com) /* must dump end of line */ dump_line(); if (ps.tos > 1) /* check for balanced braces */ diag2(1, "Stuff missing from end of file"); if (verbose) { printf("There were %d output lines and %d comments\n", ps.out_lines, ps.out_coms); printf("(Lines with comments)/(Lines with code): %6.3f\n", (1.0 * ps.com_lines) / code_lines); } fflush(output); exit(found_err); } if ( (type_code != comment) && (type_code != newline) && (type_code != preesc) && (type_code != form_feed)) { if (force_nl && (type_code != semicolon) && (type_code != lbrace || !btype_2)) { /* we should force a broken line here */ if (verbose && !flushed_nl) diag2(0, "Line broken"); flushed_nl = false; dump_line(); ps.want_blank = false; /* dont insert blank at line start */ force_nl = false; } ps.in_stmt = true; /* turn on flag which causes an extra level of * indentation. this is turned off by a ; or * '}' */ if (s_com != e_com) { /* the turkey has embedded a comment * in a line. fix it */ *e_code++ = ' '; for (t_ptr = s_com; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } *e_code++ = ' '; *e_code = '\0'; /* null terminate code sect */ ps.want_blank = false; e_com = s_com; } } else if (type_code != comment) /* preserve force_nl thru a comment */ force_nl = false; /* cancel forced newline after newline, form * feed, etc */ /*-----------------------------------------------------*\ | do switch on type of token scanned | \*-----------------------------------------------------*/ CHECK_SIZE_CODE; switch (type_code) { /* now, decide what to do with the token */ case form_feed: /* found a form feed in line */ ps.use_ff = true; /* a form feed is treated much like a newline */ dump_line(); ps.want_blank = false; break; case newline: if (ps.last_token != comma || ps.p_l_follow > 0 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { dump_line(); ps.want_blank = false; } ++line_no; /* keep track of input line number */ break; case lparen: /* got a '(' or '[' */ ++ps.p_l_follow; /* count parens to make Healy happy */ if (ps.want_blank && *token != '[' && (ps.last_token != ident || proc_calls_space || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) *e_code++ = ' '; - if (ps.in_decl && !ps.block_init) - if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { - ps.dumped_decl_indent = 1; + ps.want_blank = false; + if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && + !is_procname) { + /* function pointer declarations */ + if (troff) { sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); e_code += strlen(e_code); } else { - while ((e_code - s_code) < dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } - *e_code++ = token[0]; + indent_declaration(dec_ind, tabs_to_var); } - else + ps.dumped_decl_indent = true; + } + if (!troff) *e_code++ = token[0]; ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent && ps.paren_indents[0] < 2 * ps.ind_size) ps.paren_indents[0] = 2 * ps.ind_size; - ps.want_blank = false; if (ps.in_or_st && *token == '(' && ps.tos <= 2) { /* * this is a kluge to make sure that declarations will be * aligned right if proc decl has an explicit type on it, i.e. * "int a(x) {..." */ parse(semicolon); /* I said this was a kluge... */ ps.in_or_st = false; /* turn off flag for structure decl or * initialization */ } if (ps.sizeof_keyword) ps.sizeof_mask |= 1 << ps.p_l_follow; break; case rparen: /* got a ')' or ']' */ rparen_count--; if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { ps.last_u_d = true; ps.cast_mask &= (1 << ps.p_l_follow) - 1; ps.want_blank = false; } else ps.want_blank = true; ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; if (--ps.p_l_follow < 0) { ps.p_l_follow = 0; diag3(0, "Extra %c", *token); } if (e_code == s_code) /* if the paren starts the line */ ps.paren_level = ps.p_l_follow; /* then indent it */ *e_code++ = token[0]; if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if * (...), or some such */ sp_sw = false; force_nl = true;/* must force newline after if */ ps.last_u_d = true; /* inform lexi that a following * operator is unary */ ps.in_stmt = false; /* dont use stmt continuation * indentation */ parse(hd_type); /* let parser worry about if, or whatever */ } ps.search_brace = btype_2; /* this should insure that constructs * such as main(){...} and int[]{...} * have their braces put in the right * place */ break; case unary_op: /* this could be any unary operation */ - if (ps.want_blank) - *e_code++ = ' '; - - if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { - sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); - ps.dumped_decl_indent = 1; - e_code += strlen(e_code); + if (!ps.dumped_decl_indent && ps.in_decl && !is_procname && + !ps.block_init) { + /* pointer declarations */ + if (troff) { + if (ps.want_blank) + *e_code++ = ' '; + sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, + token); + e_code += strlen(e_code); + } + else { + /* if this is a unary op in a declaration, we should + * indent this token */ + for (i = 0; token[i]; ++i) + /* find length of token */; + indent_declaration(dec_ind - i, tabs_to_var); + } + ps.dumped_decl_indent = true; } - else { + else if (ps.want_blank) + *e_code++ = ' '; + { const char *res = token; - if (ps.in_decl && !ps.block_init) { /* if this is a unary op - * in a declaration, we - * should indent this - * token */ - for (i = 0; token[i]; ++i); /* find length of token */ - while ((e_code - s_code) < (dec_ind - i)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; /* pad it */ - } - } if (troff && token[0] == '-' && token[1] == '>') res = "\\(->"; for (t_ptr = res; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } } ps.want_blank = false; break; case binary_op: /* any binary operation */ if (ps.want_blank) *e_code++ = ' '; { const char *res = token; if (troff) switch (token[0]) { case '<': if (token[1] == '=') res = "\\(<="; break; case '>': if (token[1] == '=') res = "\\(>="; break; case '!': if (token[1] == '=') res = "\\(!="; break; case '|': if (token[1] == '|') res = "\\(br\\(br"; else if (token[1] == 0) res = "\\(br"; break; } for (t_ptr = res; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; /* move the operator */ } } ps.want_blank = true; break; case postop: /* got a trailing ++ or -- */ *e_code++ = token[0]; *e_code++ = token[1]; ps.want_blank = true; break; case question: /* got a ? */ squest++; /* this will be used when a later colon * appears so we can distinguish the * ?: construct */ if (ps.want_blank) *e_code++ = ' '; *e_code++ = '?'; ps.want_blank = true; break; case casestmt: /* got word 'case' or 'default' */ scase = true; /* so we can process the later colon properly */ goto copy_id; case colon: /* got a ':' */ if (squest > 0) { /* it is part of the ?: construct */ --squest; if (ps.want_blank) *e_code++ = ' '; *e_code++ = ':'; ps.want_blank = true; break; } if (ps.in_or_st) { *e_code++ = ':'; ps.want_blank = false; break; } ps.in_stmt = false; /* seeing a label does not imply we are in a * stmt */ for (t_ptr = s_code; *t_ptr; ++t_ptr) *e_lab++ = *t_ptr; /* turn everything so far into a label */ e_code = s_code; *e_lab++ = ':'; *e_lab++ = ' '; *e_lab = '\0'; force_nl = ps.pcase = scase; /* ps.pcase will be used by * dump_line to decide how to * indent the label. force_nl * will force a case n: to be * on a line by itself */ scase = false; ps.want_blank = false; break; case semicolon: /* got a ';' */ - if (ps.dec_nest == 0) { - /* we are not in an initialization or structure declaration */ - ps.in_or_st = false; - } + if (ps.dec_nest == 0) + ps.in_or_st = false;/* we are not in an initialization or + * structure declaration */ scase = false; /* these will only need resetting in an error */ squest = 0; if (ps.last_token == rparen && rparen_count == 0) ps.in_parameter_declaration = 0; ps.cast_mask = 0; ps.sizeof_mask = 0; ps.block_init = 0; ps.block_init_level = 0; ps.just_saw_decl--; - if (ps.in_decl && s_code == e_code && !ps.block_init) - while ((e_code - s_code) < (dec_ind - 1)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } + if (ps.in_decl && s_code == e_code && !ps.block_init && + !ps.dumped_decl_indent) { + /* indent stray semicolons in declarations */ + indent_declaration(dec_ind - 1, tabs_to_var); + ps.dumped_decl_indent = true; + } ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level * structure declaration, we * arent any more */ if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { /* * This should be true iff there were unbalanced parens in the * stmt. It is a bit complicated, because the semicolon might * be in a for stmt */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; if (sp_sw) { /* this is a check for an if, while, etc. with * unbalanced parens */ sp_sw = false; parse(hd_type); /* dont lose the if, or whatever */ } } *e_code++ = ';'; ps.want_blank = true; ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the * middle of a stmt */ if (!sp_sw) { /* if not if for (;;) */ parse(semicolon); /* let parser know about end of stmt */ force_nl = true;/* force newline after an end of stmt */ } break; case lbrace: /* got a '{' */ ps.in_stmt = false; /* dont indent the {} */ if (!ps.block_init) force_nl = true;/* force other stuff on same line as '{' onto * new line */ else if (ps.block_init_level <= 0) ps.block_init_level = 1; else ps.block_init_level++; if (s_code != e_code && !ps.block_init) { if (!btype_2) { dump_line(); ps.want_blank = false; } else if (ps.in_parameter_declaration && !ps.in_or_st) { ps.i_l_follow = 0; if (function_brace_split) { /* dump the line prior to the * brace ... */ dump_line(); ps.want_blank = false; } else /* add a space between the decl and brace */ ps.want_blank = true; } } if (ps.in_parameter_declaration) prefix_blankline_requested = 0; if (ps.p_l_follow > 0) { /* check for preceding unbalanced * parens */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; if (sp_sw) { /* check for unclosed if, for, etc. */ sp_sw = false; parse(hd_type); ps.ind_level = ps.i_l_follow; } } if (s_code == e_code) ps.ind_stmt = false; /* dont put extra indentation on line * with '{' */ if (ps.in_decl && ps.in_or_st) { /* this is either a structure * declaration or an init */ di_stack[ps.dec_nest++] = dec_ind; /* ? dec_ind = 0; */ } else { ps.decl_on_line = false; /* we can't be in the middle of * a declaration, so don't do * special indentation of * comments */ if (blanklines_after_declarations_at_proctop && ps.in_parameter_declaration) postfix_blankline_requested = 1; ps.in_parameter_declaration = 0; } dec_ind = 0; parse(lbrace); /* let parser know about this */ if (ps.want_blank) /* put a blank before '{' if '{' is not at * start of line */ *e_code++ = ' '; ps.want_blank = false; *e_code++ = '{'; ps.just_saw_decl = 0; break; case rbrace: /* got a '}' */ if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be * omitted in * declarations */ parse(semicolon); if (ps.p_l_follow) {/* check for unclosed if, for, else. */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; sp_sw = false; } ps.just_saw_decl = 0; ps.block_init_level--; if (s_code != e_code && !ps.block_init) { /* '}' must be first on * line */ if (verbose) diag2(0, "Line broken"); dump_line(); } *e_code++ = '}'; ps.want_blank = true; ps.in_stmt = ps.ind_stmt = false; if (ps.dec_nest > 0) { /* we are in multi-level structure * declaration */ dec_ind = di_stack[--ps.dec_nest]; if (ps.dec_nest == 0 && !ps.in_parameter_declaration) ps.just_saw_decl = 2; ps.in_decl = true; } prefix_blankline_requested = 0; parse(rbrace); /* let parser know about this */ ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead && ps.il[ps.tos] >= ps.ind_level; if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) postfix_blankline_requested = 1; break; case swstmt: /* got keyword "switch" */ sp_sw = true; hd_type = swstmt; /* keep this for when we have seen the * expression */ goto copy_id; /* go move the token into buffer */ case sp_paren: /* token is if, while, for */ sp_sw = true; /* the interesting stuff is done after the * expression is scanned */ hd_type = (*token == 'i' ? ifstmt : (*token == 'w' ? whilestmt : forstmt)); /* * remember the type of header for later use by parser */ goto copy_id; /* copy the token into line */ case sp_nparen: /* got else, do */ ps.in_stmt = false; if (*token == 'e') { if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { if (verbose) diag2(0, "Line broken"); dump_line();/* make sure this starts a line */ ps.want_blank = false; } force_nl = true;/* also, following stuff must go onto new line */ last_else = 1; parse(elselit); } else { if (e_code != s_code) { /* make sure this starts a line */ if (verbose) diag2(0, "Line broken"); dump_line(); ps.want_blank = false; } force_nl = true;/* also, following stuff must go onto new line */ last_else = 0; parse(dolit); } goto copy_id; /* move the token into line */ case decl: /* we have a declaration type (int, register, * etc.) */ parse(decl); /* let parser worry about indentation */ if (ps.last_token == rparen && ps.tos <= 1) { ps.in_parameter_declaration = 1; if (s_code != e_code) { dump_line(); ps.want_blank = 0; } } if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { ps.ind_level = ps.i_l_follow = 1; ps.ind_stmt = 0; } ps.in_or_st = true; /* this might be a structure or initialization * declaration */ ps.in_decl = ps.decl_on_line = true; if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) ps.just_saw_decl = 2; prefix_blankline_requested = 0; for (i = 0; token[i++];); /* get length of token */ if (ps.ind_level == 0 || ps.dec_nest > 0) { /* global variable or struct member in local variable */ dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); } else { /* local variable */ dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i; tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0); } goto copy_id; case ident: /* got an identifier or constant */ if (ps.in_decl) { /* if we are in a declaration, we must indent * identifier */ if (is_procname == 0 || !procnames_start_line) { - if (!ps.block_init) { - if (troff && !ps.dumped_decl_indent) { + if (!ps.block_init && !ps.dumped_decl_indent) { + if (troff) { if (ps.want_blank) *e_code++ = ' '; - ps.want_blank = false; sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); - ps.dumped_decl_indent = 1; e_code += strlen(e_code); - } else { - int cur_dec_ind; - int pos, startpos; - - /* - * in order to get the tab math right for - * indentations that are not multiples of 8 we - * need to modify both startpos and dec_ind - * (cur_dec_ind) here by eight minus the - * remainder of the current starting column - * divided by eight. This seems to be a - * properly working fix - */ - startpos = e_code - s_code; - cur_dec_ind = dec_ind; - pos = startpos; - if ((ps.ind_level * ps.ind_size) % 8 != 0) { - pos += (ps.ind_level * ps.ind_size) % 8; - cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; - } - - if (tabs_to_var) { - while ((pos & ~7) + 8 <= cur_dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = '\t'; - pos = (pos & ~7) + 8; - } - } - while (pos < cur_dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - pos++; - } - if (ps.want_blank && e_code - s_code == startpos) - *e_code++ = ' '; - ps.want_blank = false; - } + } else + indent_declaration(dec_ind, tabs_to_var); + ps.dumped_decl_indent = true; + ps.want_blank = false; } } else { if (ps.want_blank) *e_code++ = ' '; ps.want_blank = false; if (dec_ind && s_code != e_code) { *e_code = '\0'; dump_line(); } dec_ind = 0; } } else if (sp_sw && ps.p_l_follow == 0) { sp_sw = false; force_nl = true; ps.last_u_d = true; ps.in_stmt = false; parse(hd_type); } copy_id: if (ps.want_blank) *e_code++ = ' '; if (troff && ps.its_a_keyword) { e_code = chfont(&bodyf, &keywordf, e_code); for (t_ptr = token; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = keywordf.allcaps && islower(*t_ptr) ? toupper(*t_ptr) : *t_ptr; } e_code = chfont(&keywordf, &bodyf, e_code); } else for (t_ptr = token; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } ps.want_blank = true; break; case period: /* treat a period kind of like a binary * operation */ *e_code++ = '.'; /* move the period into line */ ps.want_blank = false; /* dont put a blank after a period */ break; case comma: ps.want_blank = (s_code != e_code); /* only put blank after comma * if comma does not start the * line */ - if (ps.in_decl && is_procname == 0 && !ps.block_init) - while ((e_code - s_code) < (dec_ind - 1)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } - + if (ps.in_decl && is_procname == 0 && !ps.block_init && + !ps.dumped_decl_indent) { + /* indent leading commas and not the actual identifiers */ + indent_declaration(dec_ind - 1, tabs_to_var); + ps.dumped_decl_indent = true; + } *e_code++ = ','; if (ps.p_l_follow == 0) { if (ps.block_init_level <= 0) ps.block_init = 0; if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) force_nl = true; } break; case preesc: /* got the character '#' */ if ((s_com != e_com) || (s_lab != e_lab) || (s_code != e_code)) dump_line(); *e_lab++ = '#'; /* move whole line to 'label' buffer */ { int in_comment = 0; int com_start = 0; char quote = 0; int com_end = 0; while (*buf_ptr == ' ' || *buf_ptr == '\t') { buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } while (*buf_ptr != '\n' || (in_comment && !had_eof)) { CHECK_SIZE_LAB; *e_lab = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); switch (*e_lab++) { case BACKSLASH: if (troff) *e_lab++ = BACKSLASH; if (!in_comment) { *e_lab++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } break; case '/': if (*buf_ptr == '*' && !in_comment && !quote) { in_comment = 1; *e_lab++ = *buf_ptr++; com_start = e_lab - s_lab - 2; } break; case '"': if (quote == '"') quote = 0; break; case '\'': if (quote == '\'') quote = 0; break; case '*': if (*buf_ptr == '/' && in_comment) { in_comment = 0; *e_lab++ = *buf_ptr++; com_end = e_lab - s_lab; } break; } } while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; - /* comment on preprocessor line */ if (e_lab - s_lab == com_end && bp_save == NULL) { + /* comment on preprocessor line */ if (sc_end == NULL) /* if this is the first comment, we * must set up the buffer */ sc_end = &(save_com[0]); else { *sc_end++ = '\n'; /* add newline between * comments */ *sc_end++ = ' '; --line_no; } bcopy(s_lab + com_start, sc_end, com_end - com_start); sc_end += com_end - com_start; if (sc_end >= &save_com[sc_size]) abort(); e_lab = s_lab + com_start; while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; bp_save = buf_ptr; /* save current input buffer */ be_save = buf_end; buf_ptr = save_com; /* fix so that subsequent calls to * lexi will take tokens out of * save_com */ *sc_end++ = ' '; /* add trailing blank, just in case */ buf_end = sc_end; sc_end = NULL; } *e_lab = '\0'; /* null terminate line */ ps.pcase = false; } - if (strncmp(s_lab, "#if", 3) == 0) { - if (blanklines_around_conditional_compilation) { - int c; - prefix_blankline_requested++; - while ((c = getc(input)) == '\n'); - ungetc(c, input); - } - if ((size_t)ifdef_level < sizeof(state_stack)/sizeof(state_stack[0])) { + if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ + if ((size_t)ifdef_level < nitems(state_stack)) { match_state[ifdef_level].tos = -1; state_stack[ifdef_level++] = ps; } else diag2(1, "#if stack overflow"); } - else if (strncmp(s_lab, "#else", 5) == 0) + else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ if (ifdef_level <= 0) - diag2(1, "Unmatched #else"); + diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); else { match_state[ifdef_level - 1] = ps; ps = state_stack[ifdef_level - 1]; } + } else if (strncmp(s_lab, "#endif", 6) == 0) { if (ifdef_level <= 0) diag2(1, "Unmatched #endif"); - else { + else ifdef_level--; - -#ifdef undef - /* - * This match needs to be more intelligent before the - * message is useful - */ - if (match_state[ifdef_level].tos >= 0 - && bcmp(&ps, &match_state[ifdef_level], sizeof ps)) - diag2(0, "Syntactically inconsistent #ifdef alternatives"); -#endif + } else { + struct directives { + int size; + const char *string; } - if (blanklines_around_conditional_compilation) { - postfix_blankline_requested++; - n_real_blanklines = 0; + recognized[] = { + {7, "include"}, + {6, "define"}, + {5, "undef"}, + {4, "line"}, + {5, "error"}, + {6, "pragma"} + }; + int d = nitems(recognized); + while (--d >= 0) + if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) + break; + if (d < 0) { + diag2(1, "Unrecognized cpp directive"); + break; } } + if (blanklines_around_conditional_compilation) { + postfix_blankline_requested++; + n_real_blanklines = 0; + } + else { + postfix_blankline_requested = 0; + prefix_blankline_requested = 0; + } break; /* subsequent processing of the newline * character will cause the line to be printed */ case comment: /* we have gotten a / followed by * this is a biggie */ if (flushed_nl) { /* we should force a broken line here */ flushed_nl = false; dump_line(); ps.want_blank = false; /* dont insert blank at line start */ force_nl = false; } pr_comment(); break; } /* end of big switch stmt */ *e_code = '\0'; /* make sure code section is null terminated */ if (type_code != comment && type_code != newline && type_code != preesc) ps.last_token = type_code; } /* end of main while (1) loop */ } /* * copy input file to backup file if in_name is /blah/blah/blah/file, then * backup file will be ".Bfile" then make the backup file the input and * original input file the output */ static void bakcopy(void) { int n, bakchn; char buff[8 * 1024]; const char *p; /* construct file name .Bfile */ for (p = in_name; *p; p++); /* skip to end of string */ while (p > in_name && *p != '/') /* find last '/' */ p--; if (*p == '/') p++; sprintf(bakfile, "%s.BAK", p); /* copy in_name to backup file */ bakchn = creat(bakfile, 0600); if (bakchn < 0) err(1, "%s", bakfile); while ((n = read(fileno(input), buff, sizeof(buff))) > 0) if (write(bakchn, buff, n) != n) err(1, "%s", bakfile); if (n < 0) err(1, "%s", in_name); close(bakchn); fclose(input); /* re-open backup file as the input file */ input = fopen(bakfile, "r"); if (input == NULL) err(1, "%s", bakfile); /* now the original input file will be the output */ output = fopen(in_name, "w"); if (output == NULL) { unlink(bakfile); err(1, "%s", in_name); + } +} + +static void +indent_declaration(int cur_dec_ind, int tabs_to_var) +{ + int pos = e_code - s_code; + char *startpos = e_code; + + /* + * get the tab math right for indentations that are not multiples of 8 + */ + if ((ps.ind_level * ps.ind_size) % 8 != 0) { + pos += (ps.ind_level * ps.ind_size) % 8; + cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; + } + if (tabs_to_var) + while ((pos & ~7) + 8 <= cur_dec_ind) { + CHECK_SIZE_CODE; + *e_code++ = '\t'; + pos = (pos & ~7) + 8; + } + while (pos < cur_dec_ind) { + CHECK_SIZE_CODE; + *e_code++ = ' '; + pos++; + } + if (e_code == startpos && ps.want_blank) { + *e_code++ = ' '; + ps.want_blank = false; } } Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent.h =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/indent.h (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent.h (revision 303642) @@ -1,47 +1,48 @@ -/* +/*- * Copyright (c) 2001 Jens Schweikhardt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 __FBSDID("$FreeBSD$"); #endif void addkey(char *, int); int compute_code_target(void); int compute_label_target(void); int count_spaces(int, char *); +int count_spaces_until(int, char *, char *); int lexi(void); void diag2(int, const char *); void diag3(int, const char *, int); void diag4(int, const char *, int, int); void dump_line(void); void fill_buffer(void); void parse(int); void parsefont(struct fstate *, const char *); void pr_comment(void); void set_defaults(void); void set_option(char *); void set_profile(void); void writefdef(struct fstate *f, int); Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent_codes.h =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/indent_codes.h (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent_codes.h (revision 303642) @@ -1,70 +1,70 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)indent_codes.h 8.1 (Berkeley) 6/6/93 * $FreeBSD$ */ #define newline 1 #define lparen 2 #define rparen 3 #define unary_op 4 #define binary_op 5 #define postop 6 #define question 7 #define casestmt 8 #define colon 9 #define semicolon 10 #define lbrace 11 #define rbrace 12 #define ident 13 #define comma 14 #define comment 15 #define swstmt 16 #define preesc 17 #define form_feed 18 #define decl 19 #define sp_paren 20 #define sp_nparen 21 #define ifstmt 22 #define whilestmt 23 #define forstmt 24 #define stmt 25 #define stmtl 26 #define elselit 27 #define dolit 28 #define dohead 29 #define ifhead 30 #define elsehead 31 #define period 32 Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent_globs.h =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/indent_globs.h (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent_globs.h (revision 303642) @@ -1,330 +1,335 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)indent_globs.h 8.1 (Berkeley) 6/6/93 * $FreeBSD$ */ #define BACKSLASH '\\' #define bufsize 200 /* size of internal buffers */ #define sc_size 5000 /* size of save_com buffer */ #define label_offset 2 /* number of levels a label is placed to left * of code */ #define tabsize 8 /* the size of a tab */ #define tabmask 0177770 /* mask used when figuring length of lines * with tabs */ #define false 0 #define true 1 FILE *input; /* the fid for the input file */ FILE *output; /* the output file */ #define CHECK_SIZE_CODE \ if (e_code >= l_code) { \ int nsize = l_code-s_code+400; \ + int code_len = e_code-s_code; \ codebuf = (char *) realloc(codebuf, nsize); \ if (codebuf == NULL) \ err(1, NULL); \ - e_code = codebuf + (e_code-s_code) + 1; \ + e_code = codebuf + code_len + 1; \ l_code = codebuf + nsize - 5; \ s_code = codebuf + 1; \ } #define CHECK_SIZE_COM \ if (e_com >= l_com) { \ int nsize = l_com-s_com+400; \ + int com_len = e_com - s_com; \ + int blank_pos = last_bl - s_com; \ combuf = (char *) realloc(combuf, nsize); \ if (combuf == NULL) \ err(1, NULL); \ - e_com = combuf + (e_com-s_com) + 1; \ - last_bl = combuf + (last_bl-s_com) + 1; \ + e_com = combuf + com_len + 1; \ + last_bl = combuf + blank_pos + 1; \ l_com = combuf + nsize - 5; \ s_com = combuf + 1; \ } #define CHECK_SIZE_LAB \ if (e_lab >= l_lab) { \ int nsize = l_lab-s_lab+400; \ + int label_len = e_lab - s_lab; \ labbuf = (char *) realloc(labbuf, nsize); \ if (labbuf == NULL) \ err(1, NULL); \ - e_lab = labbuf + (e_lab-s_lab) + 1; \ + e_lab = labbuf + label_len + 1; \ l_lab = labbuf + nsize - 5; \ s_lab = labbuf + 1; \ } #define CHECK_SIZE_TOKEN \ if (e_token >= l_token) { \ int nsize = l_token-s_token+400; \ + int token_len = e_token - s_token; \ tokenbuf = (char *) realloc(tokenbuf, nsize); \ if (tokenbuf == NULL) \ err(1, NULL); \ - e_token = tokenbuf + (e_token-s_token) + 1; \ + e_token = tokenbuf + token_len + 1; \ l_token = tokenbuf + nsize - 5; \ s_token = tokenbuf + 1; \ } char *labbuf; /* buffer for label */ char *s_lab; /* start ... */ char *e_lab; /* .. and end of stored label */ char *l_lab; /* limit of label buffer */ char *codebuf; /* buffer for code section */ char *s_code; /* start ... */ char *e_code; /* .. and end of stored code */ char *l_code; /* limit of code section */ char *combuf; /* buffer for comments */ char *s_com; /* start ... */ char *e_com; /* ... and end of stored comments */ char *l_com; /* limit of comment buffer */ #define token s_token char *tokenbuf; /* the last token scanned */ char *s_token; char *e_token; char *l_token; char *in_buffer; /* input buffer */ char *in_buffer_limit; /* the end of the input buffer */ char *buf_ptr; /* ptr to next character to be taken from * in_buffer */ char *buf_end; /* ptr to first after last char in in_buffer */ char save_com[sc_size]; /* input text is saved here when looking for * the brace after an if, while, etc */ char *sc_end; /* pointer into save_com buffer */ char *bp_save; /* saved value of buf_ptr when taking input * from save_com */ char *be_save; /* similarly saved value of buf_end */ int found_err; int pointer_as_binop; int blanklines_after_declarations; int blanklines_before_blockcomments; int blanklines_after_procs; int blanklines_around_conditional_compilation; int swallow_optional_blanklines; int n_real_blanklines; int prefix_blankline_requested; int postfix_blankline_requested; int break_comma; /* when true and not in parens, break after a * comma */ int btype_2; /* when true, brace should be on same line as * if, while, etc */ float case_ind; /* indentation level to be used for a "case * n:" */ int code_lines; /* count of lines with code */ int had_eof; /* set to true when input is exhausted */ int line_no; /* the current line number. */ int max_col; /* the maximum allowable line length */ int verbose; /* when true, non-essential error messages are * printed */ int cuddle_else; /* true if else should cuddle up to '}' */ int star_comment_cont; /* true iff comment continuation lines should * have stars at the beginning of each line. */ int comment_delimiter_on_blankline; int troff; /* true iff were generating troff input */ int procnames_start_line; /* if true, the names of procedures * being defined get placed in column * 1 (ie. a newline is placed between * the type of the procedure and its * name) */ int proc_calls_space; /* If true, procedure calls look like: * foo(bar) rather than foo (bar) */ int format_block_comments; /* true if comments beginning with * `/ * \n' are to be reformatted */ int format_col1_comments; /* If comments which start in column 1 * are to be magically reformatted * (just like comments that begin in * later columns) */ int inhibit_formatting; /* true if INDENT OFF is in effect */ int suppress_blanklines;/* set iff following blanklines should be * suppressed */ int continuation_indent;/* set to the indentation between the edge of * code and continuation lines */ int lineup_to_parens; /* if true, continued code within parens will * be lined up to the open paren */ int Bill_Shannon; /* true iff a blank should always be inserted * after sizeof */ int blanklines_after_declarations_at_proctop; /* This is vaguely * similar to * blanklines_after_decla * rations except that * it only applies to * the first set of * declarations in a * procedure (just after * the first '{') and it * causes a blank line * to be generated even * if there are no * declarations */ int block_comment_max_col; int extra_expression_indent; /* true if continuation lines from the * expression part of "if(e)", * "while(e)", "for(e;e;e)" should be * indented an extra tab stop so that * they don't conflict with the code * that follows */ int function_brace_split; /* split function declaration and * brace onto separate lines */ int use_tabs; /* set true to use tabs for spacing, * false uses all spaces */ int auto_typedefs; /* set true to recognize identifiers * ending in "_t" like typedefs */ /* -troff font state information */ struct fstate { char font[4]; char size; int allcaps:1; } __aligned(sizeof(int)); char *chfont(struct fstate *, struct fstate *, char *); struct fstate keywordf, /* keyword font */ stringf, /* string font */ boxcomf, /* Box comment font */ blkcomf, /* Block comment font */ scomf, /* Same line comment font */ bodyf; /* major body font */ -#define STACKSIZE 150 +#define STACKSIZE 256 struct parser_state { int last_token; struct fstate cfont; /* Current font */ int p_stack[STACKSIZE]; /* this is the parsers stack */ int il[STACKSIZE]; /* this stack stores indentation levels */ float cstk[STACKSIZE];/* used to store case stmt indentation levels */ int box_com; /* set to true when we are in a "boxed" * comment. In that case, the first non-blank * char should be lined up with the / in / followed by * */ int comment_delta, n_comment_delta; int cast_mask; /* indicates which close parens close off * casts */ int sizeof_mask; /* indicates which close parens close off * sizeof''s */ int block_init; /* true iff inside a block initialization */ int block_init_level; /* The level of brace nesting in an * initialization */ int last_nl; /* this is true if the last thing scanned was * a newline */ int in_or_st; /* Will be true iff there has been a * declarator (e.g. int or char) and no left * paren since the last semicolon. When true, * a '{' is starting a structure definition or * an initialization list */ int bl_line; /* set to 1 by dump_line if the line is blank */ int col_1; /* set to true if the last token started in * column 1 */ int com_col; /* this is the column in which the current * comment should start */ int com_ind; /* the column in which comments to the right * of code should start */ int com_lines; /* the number of lines with comments, set by * dump_line */ int dec_nest; /* current nesting level for structure or init */ int decl_com_ind; /* the column in which comments after * declarations should be put */ int decl_on_line; /* set to true if this line of code has part * of a declaration on it */ int i_l_follow; /* the level to which ind_level should be set * after the current line is printed */ int in_decl; /* set to true when we are in a declaration * stmt. The processing of braces is then * slightly different */ int in_stmt; /* set to 1 while in a stmt */ int ind_level; /* the current indentation level */ int ind_size; /* the size of one indentation level */ int ind_stmt; /* set to 1 if next line should have an extra * indentation level because we are in the * middle of a stmt */ int last_u_d; /* set to true after scanning a token which * forces a following operator to be unary */ int leave_comma; /* if true, never break declarations after * commas */ int ljust_decl; /* true if declarations should be left * justified */ int out_coms; /* the number of comments processed, set by * pr_comment */ int out_lines; /* the number of lines written, set by * dump_line */ int p_l_follow; /* used to remember how to indent following * statement */ int paren_level; /* parenthesization level. used to indent * within statements */ short paren_indents[20]; /* column positions of each paren */ int pcase; /* set to 1 if the current line label is a * case. It is printed differently from a * regular label */ int search_brace; /* set to true by parse when it is necessary * to buffer up all info up to the start of a * stmt after an if, while, etc */ int unindent_displace; /* comments not to the right of code * will be placed this many * indentation levels to the left of * code */ int use_ff; /* set to one if the current line should be * terminated with a form feed */ int want_blank; /* set to true when the following token should * be prefixed by a blank. (Said prefixing is * ignored in some cases.) */ int else_if; /* True iff else if pairs should be handled * specially */ int decl_indent; /* column to indent declared identifiers to */ int local_decl_indent; /* like decl_indent but for locals */ int its_a_keyword; int sizeof_keyword; int dumped_decl_indent; float case_indent; /* The distance to indent case labels from the * switch statement */ int in_parameter_declaration; int indent_parameters; int tos; /* pointer to top of stack */ char procname[100]; /* The name of the current procedure */ int just_saw_decl; } ps; int ifdef_level; int rparen_count; struct parser_state state_stack[5]; struct parser_state match_state[5]; Index: user/alc/PQ_LAUNDRY/usr.bin/indent/io.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/io.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/io.c (revision 303642) @@ -1,668 +1,662 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "indent_globs.h" #include "indent.h" int comment_open; static int paren_target; static int pad_output(int current, int target); void dump_line(void) { /* dump_line is the routine that actually * effects the printing of the new source. It * prints the label section, followed by the * code section with the appropriate nesting * level, followed by any comments */ int cur_col, target_col = 1; static int not_first_line; if (ps.procname[0]) { if (troff) { if (comment_open) { comment_open = 0; fprintf(output, ".*/\n"); } fprintf(output, ".Pr \"%s\"\n", ps.procname); } ps.ind_level = 0; ps.procname[0] = 0; } if (s_code == e_code && s_lab == e_lab && s_com == e_com) { if (suppress_blanklines > 0) suppress_blanklines--; else { ps.bl_line = true; n_real_blanklines++; } } else if (!inhibit_formatting) { suppress_blanklines = 0; ps.bl_line = false; if (prefix_blankline_requested && not_first_line) { if (swallow_optional_blanklines) { if (n_real_blanklines == 1) n_real_blanklines = 0; } else { if (n_real_blanklines == 0) n_real_blanklines = 1; } } while (--n_real_blanklines >= 0) putc('\n', output); n_real_blanklines = 0; if (ps.ind_level == 0) ps.ind_stmt = 0; /* this is a class A kludge. dont do * additional statement indentation if we are * at bracket level 0 */ if (e_lab != s_lab || e_code != s_code) ++code_lines; /* keep count of lines with code */ if (e_lab != s_lab) { /* print lab, if any */ if (comment_open) { comment_open = 0; fprintf(output, ".*/\n"); } while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; *e_lab = '\0'; cur_col = pad_output(1, compute_label_target()); if (s_lab[0] == '#' && (strncmp(s_lab, "#else", 5) == 0 || strncmp(s_lab, "#endif", 6) == 0)) { char *s = s_lab; if (e_lab[-1] == '\n') e_lab--; do putc(*s++, output); while (s < e_lab && 'a' <= *s && *s<='z'); while ((*s == ' ' || *s == '\t') && s < e_lab) s++; if (s < e_lab) fprintf(output, s[0]=='/' && s[1]=='*' ? "\t%.*s" : "\t/* %.*s */", (int)(e_lab - s), s); } else fprintf(output, "%.*s", (int)(e_lab - s_lab), s_lab); cur_col = count_spaces(cur_col, s_lab); } else cur_col = 1; /* there is no label section */ ps.pcase = false; if (s_code != e_code) { /* print code section, if any */ char *p; if (comment_open) { comment_open = 0; fprintf(output, ".*/\n"); } target_col = compute_code_target(); { int i; for (i = 0; i < ps.p_l_follow; i++) if (ps.paren_indents[i] >= 0) ps.paren_indents[i] = -(ps.paren_indents[i] + target_col); } cur_col = pad_output(cur_col, target_col); for (p = s_code; p < e_code; p++) if (*p == (char) 0200) fprintf(output, "%d", target_col * 7); else putc(*p, output); cur_col = count_spaces(cur_col, s_code); } if (s_com != e_com) { if (troff) { int all_here = 0; char *p; if (e_com[-1] == '/' && e_com[-2] == '*') e_com -= 2, all_here++; while (e_com > s_com && e_com[-1] == ' ') e_com--; *e_com = 0; p = s_com; while (*p == ' ') p++; if (p[0] == '/' && p[1] == '*') p += 2, all_here++; else if (p[0] == '*') p += p[1] == '/' ? 2 : 1; while (*p == ' ') p++; if (*p == 0) goto inhibit_newline; if (comment_open < 2 && ps.box_com) { comment_open = 0; fprintf(output, ".*/\n"); } if (comment_open == 0) { if ('a' <= *p && *p <= 'z') *p = *p + 'A' - 'a'; if (e_com - p < 50 && all_here == 2) { char *follow = p; fprintf(output, "\n.nr C! \\w\1"); while (follow < e_com) { switch (*follow) { case '\n': putc(' ', output); case 1: break; case '\\': putc('\\', output); default: putc(*follow, output); } follow++; } putc(1, output); } fprintf(output, "\n./* %dp %d %dp\n", ps.com_col * 7, (s_code != e_code || s_lab != e_lab) - ps.box_com, target_col * 7); } comment_open = 1 + ps.box_com; while (*p) { if (*p == BACKSLASH) putc(BACKSLASH, output); putc(*p++, output); } } else { /* print comment, if any */ int target = ps.com_col; char *com_st = s_com; target += ps.comment_delta; while (*com_st == '\t') com_st++, target += 8; /* ? */ while (target <= 0) if (*com_st == ' ') target++, com_st++; else if (*com_st == '\t') target = ((target - 1) & ~7) + 9, com_st++; else target = 1; if (cur_col > target) { /* if comment can't fit on this line, * put it on next line */ putc('\n', output); cur_col = 1; ++ps.out_lines; } while (e_com > com_st && isspace(e_com[-1])) e_com--; cur_col = pad_output(cur_col, target); - if (!ps.box_com) { - if (star_comment_cont && (com_st[1] != '*' || e_com <= com_st + 1)) { - if (com_st[1] == ' ' && com_st[0] == ' ' && e_com > com_st + 1) - com_st[1] = '*'; - else - fwrite(" * ", com_st[0] == '\t' ? 2 : com_st[0] == '*' ? 1 : 3, 1, output); - } - } fwrite(com_st, e_com - com_st, 1, output); ps.comment_delta = ps.n_comment_delta; - cur_col = count_spaces(cur_col, com_st); ++ps.com_lines; /* count lines with comments */ } } if (ps.use_ff) putc('\014', output); else putc('\n', output); inhibit_newline: ++ps.out_lines; if (ps.just_saw_decl == 1 && blanklines_after_declarations) { prefix_blankline_requested = 1; ps.just_saw_decl = 0; } else prefix_blankline_requested = postfix_blankline_requested; postfix_blankline_requested = 0; } ps.decl_on_line = ps.in_decl; /* if we are in the middle of a * declaration, remember that fact for * proper comment indentation */ ps.ind_stmt = ps.in_stmt & ~ps.in_decl; /* next line should be * indented if we have not * completed this stmt and if * we are not in the middle of * a declaration */ ps.use_ff = false; ps.dumped_decl_indent = 0; *(e_lab = s_lab) = '\0'; /* reset buffers */ *(e_code = s_code) = '\0'; - *(e_com = s_com) = '\0'; + *(e_com = s_com = combuf + 1) = '\0'; ps.ind_level = ps.i_l_follow; ps.paren_level = ps.p_l_follow; paren_target = -ps.paren_indents[ps.paren_level - 1]; not_first_line = 1; } int compute_code_target(void) { int target_col = ps.ind_size * ps.ind_level + 1; if (ps.paren_level) if (!lineup_to_parens) target_col += continuation_indent * (2 * continuation_indent == ps.ind_size ? 1 : ps.paren_level); else { int w; int t = paren_target; if ((w = count_spaces(t, s_code) - max_col) > 0 && count_spaces(target_col, s_code) <= max_col) { t -= w + 1; if (t > target_col) target_col = t; } else target_col = t; } else if (ps.ind_stmt) target_col += continuation_indent; return target_col; } int compute_label_target(void) { return ps.pcase ? (int) (case_ind * ps.ind_size) + 1 : *s_lab == '#' ? 1 : ps.ind_size * (ps.ind_level - label_offset) + 1; } /* * Copyright (C) 1976 by the Board of Trustees of the University of Illinois * * All rights reserved * * * NAME: fill_buffer * * FUNCTION: Reads one block of input into input_buffer * * HISTORY: initial coding November 1976 D A Willcox of CAC 1/7/77 A * Willcox of CAC Added check for switch back to partly full input * buffer from temporary buffer * */ void fill_buffer(void) { /* this routine reads stuff from the input */ char *p; int i; FILE *f = input; - if (bp_save != NULL) { /* there is a partly filled input buffer left */ - buf_ptr = bp_save; /* dont read anything, just switch buffers */ + if (bp_save != NULL) { /* there is a partly filled input buffer left */ + buf_ptr = bp_save; /* do not read anything, just switch buffers */ buf_end = be_save; bp_save = be_save = NULL; if (buf_ptr < buf_end) return; /* only return if there is really something in * this buffer */ } for (p = in_buffer;;) { if (p >= in_buffer_limit) { int size = (in_buffer_limit - in_buffer) * 2 + 10; int offset = p - in_buffer; in_buffer = realloc(in_buffer, size); if (in_buffer == NULL) errx(1, "input line too long"); p = in_buffer + offset; in_buffer_limit = in_buffer + size - 2; } if ((i = getc(f)) == EOF) { *p++ = ' '; *p++ = '\n'; had_eof = true; break; } *p++ = i; if (i == '\n') break; } buf_ptr = in_buffer; buf_end = p; if (p[-2] == '/' && p[-3] == '*') { if (in_buffer[3] == 'I' && strncmp(in_buffer, "/**INDENT**", 11) == 0) fill_buffer(); /* flush indent error message */ else { int com = 0; p = in_buffer; while (*p == ' ' || *p == '\t') p++; if (*p == '/' && p[1] == '*') { p += 2; while (*p == ' ' || *p == '\t') p++; if (p[0] == 'I' && p[1] == 'N' && p[2] == 'D' && p[3] == 'E' && p[4] == 'N' && p[5] == 'T') { p += 6; while (*p == ' ' || *p == '\t') p++; if (*p == '*') com = 1; else if (*p == 'O') { if (*++p == 'N') p++, com = 1; else if (*p == 'F' && *++p == 'F') p++, com = 2; } while (*p == ' ' || *p == '\t') p++; if (p[0] == '*' && p[1] == '/' && p[2] == '\n' && com) { if (s_com != e_com || s_lab != e_lab || s_code != e_code) dump_line(); if (!(inhibit_formatting = com - 1)) { n_real_blanklines = 0; postfix_blankline_requested = 0; prefix_blankline_requested = 0; suppress_blanklines = 1; } } } } } } if (inhibit_formatting) { p = in_buffer; do putc(*p, output); while (*p++ != '\n'); } } /* * Copyright (C) 1976 by the Board of Trustees of the University of Illinois * * All rights reserved * * * NAME: pad_output * * FUNCTION: Writes tabs and spaces to move the current column up to the desired * position. * * ALGORITHM: Put tabs and/or blanks into pobuf, then write pobuf. * * PARAMETERS: current integer The current column target * nteger The desired column * * RETURNS: Integer value of the new column. (If current >= target, no action is * taken, and current is returned. * * GLOBALS: None * * CALLS: write (sys) * * CALLED BY: dump_line * * HISTORY: initial coding November 1976 D A Willcox of CAC * */ static int pad_output(int current, int target) /* writes tabs and blanks (if necessary) to * get the current output position up to the * target column */ /* current: the current column value */ /* target: position we want it at */ { int curr; /* internal column pointer */ int tcur; if (troff) fprintf(output, "\\h'|%dp'", (target - 1) * 7); else { if (current >= target) return (current); /* line is already long enough */ curr = current; if (use_tabs) { while ((tcur = ((curr - 1) & tabmask) + tabsize + 1) <= target) { putc('\t', output); curr = tcur; } } while (curr++ < target) putc(' ', output); /* pad with final blanks */ } return (target); } /* * Copyright (C) 1976 by the Board of Trustees of the University of Illinois * * All rights reserved * * * NAME: count_spaces * * FUNCTION: Find out where printing of a given string will leave the current * character position on output. * * ALGORITHM: Run thru input string and add appropriate values to current * position. * * RETURNS: Integer value of position after printing "buffer" starting in column * "current". * * HISTORY: initial coding November 1976 D A Willcox of CAC * */ int -count_spaces(int current, char *buffer) +count_spaces_until(int cur, char *buffer, char *end) /* * this routine figures out where the character position will be after * printing the text in buffer starting at column "current" */ { char *buf; /* used to look thru buffer */ - int cur; /* current character counter */ - cur = current; - - for (buf = buffer; *buf != '\0'; ++buf) { + for (buf = buffer; *buf != '\0' && buf != end; ++buf) { switch (*buf) { case '\n': case 014: /* form feed */ cur = 1; break; case '\t': cur = ((cur - 1) & tabmask) + tabsize + 1; break; case 010: /* backspace */ --cur; break; default: ++cur; break; } /* end of switch */ } /* end of for loop */ return (cur); } +int +count_spaces(int cur, char *buffer) +{ + return (count_spaces_until(cur, buffer, NULL)); +} + void diag4(int level, const char *msg, int a, int b) { if (level) found_err = 1; if (output == stdout) { fprintf(stdout, "/**INDENT** %s@%d: ", level == 0 ? "Warning" : "Error", line_no); fprintf(stdout, msg, a, b); fprintf(stdout, " */\n"); } else { fprintf(stderr, "%s@%d: ", level == 0 ? "Warning" : "Error", line_no); fprintf(stderr, msg, a, b); fprintf(stderr, "\n"); } } void diag3(int level, const char *msg, int a) { if (level) found_err = 1; if (output == stdout) { fprintf(stdout, "/**INDENT** %s@%d: ", level == 0 ? "Warning" : "Error", line_no); fprintf(stdout, msg, a); fprintf(stdout, " */\n"); } else { fprintf(stderr, "%s@%d: ", level == 0 ? "Warning" : "Error", line_no); fprintf(stderr, msg, a); fprintf(stderr, "\n"); } } void diag2(int level, const char *msg) { if (level) found_err = 1; if (output == stdout) { fprintf(stdout, "/**INDENT** %s@%d: ", level == 0 ? "Warning" : "Error", line_no); fprintf(stdout, "%s", msg); fprintf(stdout, " */\n"); } else { fprintf(stderr, "%s@%d: ", level == 0 ? "Warning" : "Error", line_no); fprintf(stderr, "%s", msg); fprintf(stderr, "\n"); } } void writefdef(struct fstate *f, int nm) { fprintf(output, ".ds f%c %s\n.nr s%c %d\n", nm, f->font, nm, f->size); } char * chfont(struct fstate *of, struct fstate *nf, char *s) { if (of->font[0] != nf->font[0] || of->font[1] != nf->font[1]) { *s++ = '\\'; *s++ = 'f'; if (nf->font[1]) { *s++ = '('; *s++ = nf->font[0]; *s++ = nf->font[1]; } else *s++ = nf->font[0]; } if (nf->size != of->size) { *s++ = '\\'; *s++ = 's'; if (nf->size < of->size) { *s++ = '-'; *s++ = '0' + of->size - nf->size; } else { *s++ = '+'; *s++ = '0' + nf->size - of->size; } } return s; } void parsefont(struct fstate *f, const char *s0) { const char *s = s0; int sizedelta = 0; - bzero(f, sizeof *f); + memset(f, 0, sizeof(struct fstate)); while (*s) { if (isdigit(*s)) f->size = f->size * 10 + *s - '0'; else if (isupper(*s)) if (f->font[0]) f->font[1] = *s; else f->font[0] = *s; else if (*s == 'c') f->allcaps = 1; else if (*s == '+') sizedelta++; else if (*s == '-') sizedelta--; else { errx(1, "bad font specification: %s", s0); } s++; } if (f->font[0] == 0) f->font[0] = 'R'; if (bodyf.size == 0) bodyf.size = 11; if (f->size == 0) f->size = bodyf.size + sizedelta; else if (sizedelta > 0) f->size += bodyf.size; else f->size = bodyf.size - f->size; } Index: user/alc/PQ_LAUNDRY/usr.bin/indent/lexi.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/lexi.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/lexi.c (revision 303642) @@ -1,606 +1,606 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Here we have the token scanner for indent. It scans off one token and puts * it in the global variable "token". It returns a code, indicating the type * of token scanned. */ #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" #define alphanum 1 #define opchar 3 struct templ { const char *rwd; int rwcode; }; struct templ specials[1000] = { {"switch", 1}, {"case", 2}, {"break", 0}, {"struct", 3}, {"union", 3}, {"enum", 3}, {"default", 2}, {"int", 4}, {"char", 4}, {"float", 4}, {"double", 4}, {"long", 4}, {"short", 4}, {"typedef", 4}, {"unsigned", 4}, {"register", 4}, {"static", 4}, {"global", 4}, {"extern", 4}, {"void", 4}, {"const", 4}, {"volatile", 4}, {"goto", 0}, {"return", 0}, {"if", 5}, {"while", 5}, {"for", 5}, {"else", 6}, {"do", 6}, {"sizeof", 7}, {0, 0} }; char chartype[128] = { /* this is used to facilitate the decision of * what type (alphanumeric, operator) each * character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, 0, 0, 3, 3, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 0, 3, 0 }; int lexi(void) { int unary_delim; /* this is set to 1 if the current token * forces a following operator to be unary */ static int last_code; /* the last token type returned */ static int l_struct; /* set to 1 if the last token was 'struct' */ int code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ e_token = s_token; /* point to start of place to save token */ unary_delim = false; ps.col_1 = ps.last_nl; /* tell world that this token started in * column 1 iff the last thing scanned was nl */ ps.last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ps.col_1 = false; /* leading blanks imply token is not in column * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } /* Scan an alphanumeric token */ if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { /* * we have a character or number */ const char *j; /* used for searching thru list of * * reserved words */ struct templ *p; if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { int seendot = 0, seenexp = 0, seensfx = 0; if (*buf_ptr == '0' && (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; while (isxdigit(*buf_ptr)) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; } } else while (1) { if (*buf_ptr == '.') { if (seendot) break; else seendot++; } CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (!isdigit(*buf_ptr) && *buf_ptr != '.') { if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) break; else { seenexp++; seendot++; CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (*buf_ptr == '+' || *buf_ptr == '-') *e_token++ = *buf_ptr++; } } } while (1) { if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; seensfx |= 1; continue; } - if (!(seensfx & 2) && strchr("fFlL", *buf_ptr)) { + if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { CHECK_SIZE_TOKEN; if (buf_ptr[1] == buf_ptr[0]) *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; seensfx |= 2; continue; } break; } } else while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { /* fill_buffer() terminates buffer with newline */ if (*buf_ptr == BACKSLASH) { if (*(buf_ptr + 1) == '\n') { buf_ptr += 2; if (buf_ptr >= buf_end) fill_buffer(); } else break; } CHECK_SIZE_TOKEN; /* copy it over */ *e_token++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } *e_token++ = '\0'; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) fill_buffer(); } ps.its_a_keyword = false; ps.sizeof_keyword = false; if (l_struct && !ps.p_l_follow) { /* if last token was 'struct' and we're not * in parentheses, then this token * should be treated as a declaration */ l_struct = false; last_code = ident; ps.last_u_d = true; return (decl); } ps.last_u_d = l_struct; /* Operator after identifier is binary * unless last token was 'struct' */ l_struct = false; last_code = ident; /* Remember that this is the code we will * return */ if (auto_typedefs) { const char *q = s_token; size_t q_len = strlen(q); /* Check if we have an "_t" in the end */ if (q_len > 2 && (strcmp(q + q_len - 2, "_t") == 0)) { ps.its_a_keyword = true; ps.last_u_d = true; goto found_auto_typedef; } } /* * This loop will check if the token is a keyword. */ for (p = specials; (j = p->rwd) != NULL; p++) { const char *q = s_token; /* point at scanned token */ if (*j++ != *q++ || *j++ != *q++) continue; /* This test depends on the fact that * identifiers are always at least 1 character * long (ie. the first two bytes of the * identifier are always meaningful) */ if (q[-1] == 0) break; /* If its a one-character identifier */ while (*q++ == *j) if (*j++ == 0) goto found_keyword; /* I wish that C had a multi-level * break... */ } if (p->rwd) { /* we have a keyword */ found_keyword: ps.its_a_keyword = true; ps.last_u_d = true; switch (p->rwcode) { case 1: /* it is a switch */ return (swstmt); case 2: /* a case or default */ return (casestmt); case 3: /* a "struct" */ /* * Next time around, we will want to know that we have had a * 'struct' */ l_struct = true; /* FALLTHROUGH */ case 4: /* one of the declaration keywords */ found_auto_typedef: if (ps.p_l_follow) { ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask; break; /* inside parens: cast, param list or sizeof */ } last_code = decl; return (decl); case 5: /* if, while, for */ return (sp_paren); case 6: /* do, else */ return (sp_nparen); case 7: ps.sizeof_keyword = true; default: /* all others are treated like any other * identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { char *tp = buf_ptr; while (tp < buf_end) if (*tp++ == ')' && (*tp == ';' || *tp == ',')) goto not_proc; strncpy(ps.procname, token, sizeof ps.procname - 1); ps.in_parameter_declaration = 1; rparen_count = 1; not_proc:; } /* * The following hack attempts to guess whether or not the current * token is in fact a declaration keyword -- one that has been * typedefd */ if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') && !ps.p_l_follow && !ps.block_init && (ps.last_token == rparen || ps.last_token == semicolon || ps.last_token == decl || ps.last_token == lbrace || ps.last_token == rbrace)) { ps.its_a_keyword = true; ps.last_u_d = true; last_code = decl; return decl; } if (last_code == decl) /* if this is a declared variable, then * following sign is unary */ ps.last_u_d = true; /* will make "int a -1" work */ last_code = ident; return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ /* Scan a non-alphanumeric token */ *e_token++ = *buf_ptr; /* if it is only a one-character token, it is * moved here */ *e_token = '\0'; if (++buf_ptr >= buf_end) fill_buffer(); switch (*token) { case '\n': unary_delim = ps.last_u_d; ps.last_nl = true; /* remember that we just had a newline */ code = (had_eof ? 0 : newline); /* * if data has been exhausted, the newline is a dummy, and we should * return code to stop */ break; case '\'': /* start of quoted character */ case '"': /* start of string */ qchar = *token; if (troff) { e_token[-1] = '`'; if (qchar == '"') *e_token++ = '`'; e_token = chfont(&bodyf, &stringf, e_token); } do { /* copy the string */ while (1) { /* move one character or [/] */ if (*buf_ptr == '\n') { diag2(1, "Unterminated literal"); goto stop_lit; } CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, * since CHECK_SIZE guarantees that there * are at least 5 entries left */ *e_token = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*e_token == BACKSLASH) { /* if escape, copy extra char */ if (*buf_ptr == '\n') /* check for escaped newline */ ++line_no; if (troff) { *++e_token = BACKSLASH; if (*buf_ptr == BACKSLASH) *++e_token = BACKSLASH; } *++e_token = *buf_ptr++; ++e_token; /* we must increment this again because we * copied two chars */ if (buf_ptr >= buf_end) fill_buffer(); } else break; /* we copied one character */ } /* end of while (1) */ } while (*e_token++ != qchar); if (troff) { e_token = chfont(&stringf, &bodyf, e_token - 1); if (qchar == '"') *e_token++ = '\''; } stop_lit: code = ident; break; case ('('): case ('['): unary_delim = true; code = lparen; break; case (')'): case (']'): code = rparen; break; case '#': unary_delim = ps.last_u_d; code = preesc; break; case '?': unary_delim = true; code = question; break; case (':'): code = colon; unary_delim = true; break; case (';'): unary_delim = true; code = semicolon; break; case ('{'): unary_delim = true; /* * if (ps.in_or_st) ps.block_init = 1; */ /* ? code = ps.block_init ? lparen : lbrace; */ code = lbrace; break; case ('}'): unary_delim = true; /* ? code = ps.block_init ? rparen : rbrace; */ code = rbrace; break; case 014: /* a form feed */ unary_delim = ps.last_u_d; ps.last_nl = true; /* remember this so we can set 'ps.col_1' * right */ code = form_feed; break; case (','): unary_delim = true; code = comma; break; case '.': unary_delim = false; code = period; break; case '-': case '+': /* check for -, +, --, ++ */ code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ *e_token++ = *buf_ptr++; /* buffer overflow will be checked at end of loop */ if (last_code == ident || last_code == rparen) { code = (ps.last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } } else if (*buf_ptr == '=') /* check for operator += */ *e_token++ = *buf_ptr++; else if (*buf_ptr == '>') { /* check for operator -> */ *e_token++ = *buf_ptr++; if (!pointer_as_binop) { unary_delim = false; code = unary_op; ps.want_blank = false; } } break; /* buffer overflow will be checked at end of * switch */ case '=': if (ps.in_or_st) ps.block_init = 1; #ifdef undef if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ e_token[-1] = *buf_ptr++; if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) *e_token++ = *buf_ptr++; *e_token++ = '='; /* Flip =+ to += */ *e_token = 0; } #else if (*buf_ptr == '=') {/* == */ *e_token++ = '='; /* Flip =+ to += */ buf_ptr++; *e_token = 0; } #endif code = binary_op; unary_delim = true; break; /* can drop thru!!! */ case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } if (*buf_ptr == '=') *e_token++ = *buf_ptr++; code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; break; default: if (token[0] == '/' && *buf_ptr == '*') { /* it is start of comment */ *e_token++ = '*'; if (++buf_ptr >= buf_end) fill_buffer(); code = comment; unary_delim = ps.last_u_d; break; } while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { /* * handle ||, &&, etc, and also things as in int *****i */ *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ if (code != newline) { l_struct = false; last_code = code; } if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer(); ps.last_u_d = unary_delim; *e_token = '\0'; /* null terminate the token */ return (code); } /* * Add the given keyword to the keyword table, using val as the keyword type */ void addkey(char *key, int val) { struct templ *p = specials; while (p->rwd) if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) return; else p++; if (p >= specials + sizeof specials / sizeof specials[0]) return; /* For now, table overflows are silently * ignored */ p->rwd = key; p->rwcode = val; p[1].rwd = NULL; p[1].rwcode = 0; } Index: user/alc/PQ_LAUNDRY/usr.bin/indent/parse.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/parse.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/parse.c (revision 303642) @@ -1,332 +1,336 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); +#include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" static void reduce(void); void parse(int tk) /* tk: the code for the construct scanned */ { int i; #ifdef debug printf("%2d - %s\n", tk, token); #endif while (ps.p_stack[ps.tos] == ifhead && tk != elselit) { /* true if we have an if without an else */ ps.p_stack[ps.tos] = stmt; /* apply the if(..) stmt ::= stmt * reduction */ reduce(); /* see if this allows any reduction */ } switch (tk) { /* go on and figure out what to do with the * input */ case decl: /* scanned a declaration word */ ps.search_brace = btype_2; /* indicate that following brace should be on same line */ if (ps.p_stack[ps.tos] != decl) { /* only put one declaration * onto stack */ break_comma = true; /* while in declaration, newline should be * forced after comma */ ps.p_stack[++ps.tos] = decl; ps.il[ps.tos] = ps.i_l_follow; if (ps.ljust_decl) {/* only do if we want left justified * declarations */ ps.ind_level = 0; for (i = ps.tos - 1; i > 0; --i) if (ps.p_stack[i] == decl) ++ps.ind_level; /* indentation is number of * declaration levels deep we are */ ps.i_l_follow = ps.ind_level; } } break; case ifstmt: /* scanned if (...) */ if (ps.p_stack[ps.tos] == elsehead && ps.else_if) /* "else if ..." */ ps.i_l_follow = ps.il[ps.tos]; case dolit: /* 'do' */ case forstmt: /* for (...) */ ps.p_stack[++ps.tos] = tk; ps.il[ps.tos] = ps.ind_level = ps.i_l_follow; ++ps.i_l_follow; /* subsequent statements should be indented 1 */ ps.search_brace = btype_2; break; case lbrace: /* scanned { */ break_comma = false; /* don't break comma in an initial list */ if (ps.p_stack[ps.tos] == stmt || ps.p_stack[ps.tos] == decl || ps.p_stack[ps.tos] == stmtl) ++ps.i_l_follow; /* it is a random, isolated stmt group or a * declaration */ else { if (s_code == e_code) { /* * only do this if there is nothing on the line */ --ps.ind_level; /* * it is a group as part of a while, for, etc. */ if (ps.p_stack[ps.tos] == swstmt && ps.case_indent >= 1) --ps.ind_level; /* * for a switch, brace should be two levels out from the code */ } } ps.p_stack[++ps.tos] = lbrace; ps.il[ps.tos] = ps.ind_level; ps.p_stack[++ps.tos] = stmt; /* allow null stmt between braces */ ps.il[ps.tos] = ps.i_l_follow; break; case whilestmt: /* scanned while (...) */ if (ps.p_stack[ps.tos] == dohead) { /* it is matched with do stmt */ ps.ind_level = ps.i_l_follow = ps.il[ps.tos]; ps.p_stack[++ps.tos] = whilestmt; ps.il[ps.tos] = ps.ind_level = ps.i_l_follow; } else { /* it is a while loop */ ps.p_stack[++ps.tos] = whilestmt; ps.il[ps.tos] = ps.i_l_follow; ++ps.i_l_follow; ps.search_brace = btype_2; } break; case elselit: /* scanned an else */ if (ps.p_stack[ps.tos] != ifhead) diag2(1, "Unmatched 'else'"); else { ps.ind_level = ps.il[ps.tos]; /* indentation for else should * be same as for if */ ps.i_l_follow = ps.ind_level + 1; /* everything following should * be in 1 level */ ps.p_stack[ps.tos] = elsehead; /* remember if with else */ ps.search_brace = btype_2 | ps.else_if; } break; case rbrace: /* scanned a } */ /* stack should have or */ if (ps.p_stack[ps.tos - 1] == lbrace) { ps.ind_level = ps.i_l_follow = ps.il[--ps.tos]; ps.p_stack[ps.tos] = stmt; } else diag2(1, "Statement nesting error"); break; case swstmt: /* had switch (...) */ ps.p_stack[++ps.tos] = swstmt; ps.cstk[ps.tos] = case_ind; /* save current case indent level */ ps.il[ps.tos] = ps.i_l_follow; case_ind = ps.i_l_follow + ps.case_indent; /* cases should be one * level down from * switch */ ps.i_l_follow += ps.case_indent + 1; /* statements should be two * levels in */ ps.search_brace = btype_2; break; case semicolon: /* this indicates a simple stmt */ break_comma = false; /* turn off flag to break after commas in a * declaration */ ps.p_stack[++ps.tos] = stmt; ps.il[ps.tos] = ps.ind_level; break; default: /* this is an error */ diag2(1, "Unknown code to parser"); return; } /* end of switch */ + + if (ps.tos >= STACKSIZE) + errx(1, "Parser stack overflow"); reduce(); /* see if any reduction can be done */ #ifdef debug for (i = 1; i <= ps.tos; ++i) printf("(%d %d)", ps.p_stack[i], ps.il[i]); printf("\n"); #endif return; } /* * NAME: reduce * * FUNCTION: Implements the reduce part of the parsing algorithm * * ALGORITHM: The following reductions are done. Reductions are repeated * until no more are possible. * * Old TOS New TOS * * * do "dostmt" * if "ifstmt" * switch * decl * "ifelse" * for * while * "dostmt" while * * On each reduction, ps.i_l_follow (the indentation for the following line) * is set to the indentation level associated with the old TOS. * * PARAMETERS: None * * RETURNS: Nothing * * GLOBALS: ps.cstk ps.i_l_follow = ps.il ps.p_stack = ps.tos = * * CALLS: None * * CALLED BY: parse * * HISTORY: initial coding November 1976 D A Willcox of CAC * */ /*----------------------------------------------*\ | REDUCTION PHASE | \*----------------------------------------------*/ static void reduce(void) { int i; for (;;) { /* keep looping until there is nothing left to * reduce */ switch (ps.p_stack[ps.tos]) { case stmt: switch (ps.p_stack[ps.tos - 1]) { case stmt: case stmtl: /* stmtl stmt or stmt stmt */ ps.p_stack[--ps.tos] = stmtl; break; case dolit: /* */ ps.p_stack[--ps.tos] = dohead; ps.i_l_follow = ps.il[ps.tos]; break; case ifstmt: /* */ ps.p_stack[--ps.tos] = ifhead; for (i = ps.tos - 1; ( ps.p_stack[i] != stmt && ps.p_stack[i] != stmtl && ps.p_stack[i] != lbrace ); --i); ps.i_l_follow = ps.il[i]; /* * for the time being, we will assume that there is no else on * this if, and set the indentation level accordingly. If an * else is scanned, it will be fixed up later */ break; case swstmt: /* */ case_ind = ps.cstk[ps.tos - 1]; case decl: /* finish of a declaration */ case elsehead: /* < else> */ case forstmt: /* */ case whilestmt: /* */ ps.p_stack[--ps.tos] = stmt; ps.i_l_follow = ps.il[ps.tos]; break; default: /* */ return; } /* end of section for on top of stack */ break; case whilestmt: /* while (...) on top */ if (ps.p_stack[ps.tos - 1] == dohead) { /* it is termination of a do while */ ps.tos -= 2; break; } else return; default: /* anything else on top */ return; } } } Index: user/alc/PQ_LAUNDRY/usr.bin/indent/pr_comment.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/indent/pr_comment.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/indent/pr_comment.c (revision 303642) @@ -1,429 +1,335 @@ -/* +/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)pr_comment.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include +#include #include "indent_globs.h" #include "indent.h" /* * NAME: * pr_comment * * FUNCTION: * This routine takes care of scanning and printing comments. * * ALGORITHM: * 1) Decide where the comment should be aligned, and if lines should * be broken. * 2) If lines should not be broken and filled, just copy up to end of * comment. * 3) If lines should be filled, then scan thru input_buffer copying * characters to com_buf. Remember where the last blank, tab, or * newline was. When line is filled, print up to last blank and * continue copying. * * HISTORY: * November 1976 D A Willcox of CAC Initial coding * 12/6/76 D A Willcox of CAC Modification to handle * UNIX-style comments * */ /* * this routine processes comments. It makes an attempt to keep comments from * going over the max line length. If a line is too long, it moves everything * from the last blank to the next comment line. Blanks and tabs from the * beginning of the input line are removed */ void pr_comment(void) { int now_col; /* column we are in now */ int adj_max_col; /* Adjusted max_col for when we decide to * spill comments over the right margin */ char *last_bl; /* points to the last blank in the output * buffer */ char *t_ptr; /* used for moving string */ - int unix_comment; /* tri-state variable used to decide if it is - * a unix-style comment. 0 means only blanks - * since /+*, 1 means regular style comment, 2 - * means unix style comment */ int break_delim = comment_delimiter_on_blankline; int l_just_saw_decl = ps.just_saw_decl; - /* - * int ps.last_nl = 0; true iff the last significant thing - * weve seen is a newline - */ - int one_liner = 1; /* true iff this comment is a one-liner */ adj_max_col = max_col; ps.just_saw_decl = 0; last_bl = NULL; /* no blanks found so far */ ps.box_com = false; /* at first, assume that we are not in * a boxed comment or some other * comment that should not be touched */ ++ps.out_coms; /* keep track of number of comments */ - unix_comment = 1; /* set flag to let us figure out if there is a - * unix-style comment ** DISABLED: use 0 to - * reenable this hack! */ /* Figure where to align and how to treat the comment */ if (ps.col_1 && !format_col1_comments) { /* if comment starts in column * 1 it should not be touched */ ps.box_com = true; + break_delim = false; ps.com_col = 1; } else { if (*buf_ptr == '-' || *buf_ptr == '*' || (*buf_ptr == '\n' && !format_block_comments)) { ps.box_com = true; /* A comment with a '-' or '*' immediately * after the /+* is assumed to be a boxed * comment. A comment with a newline * immediately after the /+* is assumed to * be a block comment and is treated as a * box comment unless format_block_comments * is nonzero (the default). */ - break_delim = 0; + break_delim = false; } if ( /* ps.bl_line && */ (s_lab == e_lab) && (s_code == e_code)) { /* klg: check only if this line is blank */ /* * If this (*and previous lines are*) blank, dont put comment way * out at left */ ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1; adj_max_col = block_comment_max_col; if (ps.com_col <= 1) ps.com_col = 1 + !format_col1_comments; } else { int target_col; - break_delim = 0; + break_delim = false; if (s_code != e_code) target_col = count_spaces(compute_code_target(), s_code); else { target_col = 1; if (s_lab != e_lab) target_col = count_spaces(compute_label_target(), s_lab); } ps.com_col = ps.decl_on_line || ps.ind_level == 0 ? ps.decl_com_ind : ps.com_ind; if (ps.com_col < target_col) ps.com_col = ((target_col + 7) & ~7) + 1; if (ps.com_col + 24 > adj_max_col) adj_max_col = ps.com_col + 24; } } if (ps.box_com) { buf_ptr[-2] = 0; ps.n_comment_delta = 1 - count_spaces(1, in_buffer); buf_ptr[-2] = '/'; } else { ps.n_comment_delta = 0; while (*buf_ptr == ' ' || *buf_ptr == '\t') buf_ptr++; } ps.comment_delta = 0; *e_com++ = '/'; /* put '/' followed by '*' into buffer */ *e_com++ = '*'; if (*buf_ptr != ' ' && !ps.box_com) *e_com++ = ' '; - *e_com = '\0'; - if (troff) { - now_col = 1; - adj_max_col = 80; + /* Don't put a break delimiter if this comment is a one-liner */ + for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) { + if (t_ptr >= buf_end) + fill_buffer(); + if (t_ptr[0] == '*' && t_ptr[1] == '/') { + break_delim = false; + break; + } } - else - now_col = count_spaces(ps.com_col, s_com); /* figure what column we - * would be in if we - * printed the comment - * now */ + if (break_delim) { + char *t = e_com; + e_com = s_com + 2; + *e_com = 0; + if (blanklines_before_blockcomments) + prefix_blankline_requested = 1; + dump_line(); + e_com = s_com = t; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + } + + if (troff) + adj_max_col = 80; + /* Start to copy the comment */ while (1) { /* this loop will go until the comment is * copied */ - if (*buf_ptr > 040 && *buf_ptr != '*') - ps.last_nl = 0; CHECK_SIZE_COM; switch (*buf_ptr) { /* this checks for various spcl cases */ case 014: /* check for a form feed */ if (!ps.box_com) { /* in a text comment, break the line here */ ps.use_ff = true; /* fix so dump_line uses a form feed */ dump_line(); last_bl = NULL; - *e_com++ = ' '; - *e_com++ = '*'; - *e_com++ = ' '; - while (*++buf_ptr == ' ' || *buf_ptr == '\t'); + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + while (*++buf_ptr == ' ' || *buf_ptr == '\t') + ; } else { if (++buf_ptr >= buf_end) fill_buffer(); *e_com++ = 014; } break; case '\n': if (had_eof) { /* check for unexpected eof */ printf("Unterminated comment\n"); - *e_com = '\0'; dump_line(); return; } - one_liner = 0; + last_bl = NULL; if (ps.box_com || ps.last_nl) { /* if this is a boxed comment, * we dont ignore the newline */ - if (s_com == e_com) { + if (s_com == e_com) *e_com++ = ' '; - *e_com++ = ' '; - } - *e_com = '\0'; if (!ps.box_com && e_com - s_com > 3) { - if (break_delim == 1 && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; - dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; - } dump_line(); - CHECK_SIZE_COM; - *e_com++ = ' '; - *e_com++ = ' '; + if (star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } dump_line(); - now_col = ps.com_col; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } else { ps.last_nl = 1; - if (unix_comment != 1) { /* we not are in unix_style - * comment */ - if (unix_comment == 0 && s_code == e_code) { - /* - * if it is a UNIX-style comment, ignore the - * requirement that previous line be blank for - * unindention - */ - ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1; - if (ps.com_col <= 1) - ps.com_col = 2; - } - unix_comment = 2; /* permanently remember that we are in - * this type of comment */ - dump_line(); - ++line_no; - now_col = ps.com_col; - *e_com++ = ' '; - /* - * fix so that the star at the start of the line will line - * up - */ - do /* flush leading white space */ - if (++buf_ptr >= buf_end) - fill_buffer(); - while (*buf_ptr == ' ' || *buf_ptr == '\t'); - break; - } if (*(e_com - 1) == ' ' || *(e_com - 1) == '\t') last_bl = e_com - 1; /* * if there was a space at the end of the last line, remember * where it was */ else { /* otherwise, insert one */ last_bl = e_com; CHECK_SIZE_COM; *e_com++ = ' '; - ++now_col; } } ++line_no; /* keep track of input line number */ if (!ps.box_com) { int nstar = 1; do { /* flush any blanks and/or tabs at start of * next line */ if (++buf_ptr >= buf_end) fill_buffer(); if (*buf_ptr == '*' && --nstar >= 0) { if (++buf_ptr >= buf_end) fill_buffer(); if (*buf_ptr == '/') goto end_of_comment; } } while (*buf_ptr == ' ' || *buf_ptr == '\t'); } else if (++buf_ptr >= buf_end) fill_buffer(); break; /* end of case for newline */ case '*': /* must check for possibility of being at end * of comment */ if (++buf_ptr >= buf_end) /* get to next char after * */ fill_buffer(); - if (unix_comment == 0) /* set flag to show we are not in - * unix-style comment */ - unix_comment = 1; - if (*buf_ptr == '/') { /* it is the end!!! */ end_of_comment: if (++buf_ptr >= buf_end) fill_buffer(); - - if (*(e_com - 1) != ' ' && !ps.box_com) { /* insure blank before - * end */ + CHECK_SIZE_COM; + if (break_delim) { + if (e_com > s_com + 3) { + dump_line(); + } + else + s_com = e_com; *e_com++ = ' '; - ++now_col; } - if (break_delim == 1 && !one_liner && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; - dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; - } - if (break_delim == 2 && e_com > s_com + 3 - /* now_col > adj_max_col - 2 && !ps.box_com */ ) { - *e_com = '\0'; - dump_line(); - now_col = ps.com_col; - } - CHECK_SIZE_COM; - *e_com++ = '*'; - *e_com++ = '/'; - *e_com = '\0'; + if (e_com[-1] != ' ' && !ps.box_com) + *e_com++ = ' '; /* ensure blank before end */ + *e_com++ = '*', *e_com++ = '/', *e_com = '\0'; ps.just_saw_decl = l_just_saw_decl; return; } - else { /* handle isolated '*' */ + else /* handle isolated '*' */ *e_com++ = '*'; - ++now_col; - } break; default: /* we have a random char */ - if (unix_comment == 0 && *buf_ptr != ' ' && *buf_ptr != '\t') - unix_comment = 1; /* we are not in unix-style comment */ - - *e_com = *buf_ptr++; - if (buf_ptr >= buf_end) - fill_buffer(); - - if (*e_com == '\t') /* keep track of column */ - now_col = ((now_col - 1) & tabmask) + tabsize + 1; - else if (*e_com == '\b') /* this is a backspace */ - --now_col; - else - ++now_col; - - if (*e_com == ' ' || *e_com == '\t') - last_bl = e_com; - /* remember we saw a blank */ - - ++e_com; - if (now_col > adj_max_col && !ps.box_com && unix_comment == 1 && e_com[-1] > ' ') { + now_col = count_spaces_until(ps.com_col, s_com, e_com); + do { + *e_com = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + if (*e_com == ' ' || *e_com == '\t') + last_bl = e_com; /* remember we saw a blank */ + ++e_com; + now_col++; + } while (!memchr("*\n\r\b\t", *buf_ptr, 6) && + (now_col <= adj_max_col || !last_bl)); + ps.last_nl = false; + if (now_col > adj_max_col && !ps.box_com && e_com[-1] > ' ') { /* * the comment is too long, it must be broken up */ - if (break_delim == 1 && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; + if (last_bl == NULL) { dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + break; } - if (last_bl == NULL) { /* we have seen no blanks */ - last_bl = e_com; /* fake it */ - *e_com++ = ' '; - } - *e_com = '\0'; /* print what we have */ - *last_bl = '\0'; - while (last_bl > s_com && last_bl[-1] < 040) - *--last_bl = 0; + *e_com = '\0'; e_com = last_bl; dump_line(); - - *e_com++ = ' '; /* add blanks for continuation */ - *e_com++ = ' '; - *e_com++ = ' '; - - t_ptr = last_bl + 1; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + for (t_ptr = last_bl + 1; *t_ptr == ' ' || *t_ptr == '\t'; + t_ptr++) + ; last_bl = NULL; - if (t_ptr >= e_com) { - while (*t_ptr == ' ' || *t_ptr == '\t') - t_ptr++; - while (*t_ptr != '\0') { /* move unprinted part of - * comment down in buffer */ - if (*t_ptr == ' ' || *t_ptr == '\t') - last_bl = e_com; - *e_com++ = *t_ptr++; - } - } - *e_com = '\0'; - now_col = count_spaces(ps.com_col, s_com); /* recompute current - * position */ + while (*t_ptr != '\0') { + if (*t_ptr == ' ' || *t_ptr == '\t') + last_bl = e_com; + *e_com++ = *t_ptr++; + } } break; } } } Index: user/alc/PQ_LAUNDRY/usr.bin/locale/locale.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/locale/locale.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/locale/locale.c (revision 303642) @@ -1,690 +1,690 @@ /*- * Copyright (c) 2002, 2003 Alexey Zelkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * XXX: implement missing era_* (LC_TIME) keywords (require libc & * nl_langinfo(3) extensions) * * XXX: correctly handle reserved 'charmap' keyword and '-m' option (require * localedef(1) implementation). Currently it's handled via * nl_langinfo(CODESET). */ #include #include #include #include #include #include #include #include #include #include #include #include "setlocale.h" /* Local prototypes */ void init_locales_list(void); void list_charmaps(void); void list_locales(void); const char *lookup_localecat(int); char *kwval_lconv(int); int kwval_lookup(char *, char **, int *, int *); void showdetails(char *); void showkeywordslist(char *substring); void showlocale(void); void usage(void); /* Global variables */ static StringList *locales = NULL; int all_locales = 0; int all_charmaps = 0; int prt_categories = 0; int prt_keywords = 0; int more_params = 0; struct _lcinfo { const char *name; int id; } lcinfo [] = { { "LC_CTYPE", LC_CTYPE }, { "LC_COLLATE", LC_COLLATE }, { "LC_TIME", LC_TIME }, { "LC_NUMERIC", LC_NUMERIC }, { "LC_MONETARY", LC_MONETARY }, { "LC_MESSAGES", LC_MESSAGES } }; #define NLCINFO nitems(lcinfo) /* ids for values not referenced by nl_langinfo() */ #define KW_ZERO 10000 #define KW_GROUPING (KW_ZERO+1) #define KW_INT_CURR_SYMBOL (KW_ZERO+2) #define KW_CURRENCY_SYMBOL (KW_ZERO+3) #define KW_MON_DECIMAL_POINT (KW_ZERO+4) #define KW_MON_THOUSANDS_SEP (KW_ZERO+5) #define KW_MON_GROUPING (KW_ZERO+6) #define KW_POSITIVE_SIGN (KW_ZERO+7) #define KW_NEGATIVE_SIGN (KW_ZERO+8) #define KW_INT_FRAC_DIGITS (KW_ZERO+9) #define KW_FRAC_DIGITS (KW_ZERO+10) #define KW_P_CS_PRECEDES (KW_ZERO+11) #define KW_P_SEP_BY_SPACE (KW_ZERO+12) #define KW_N_CS_PRECEDES (KW_ZERO+13) #define KW_N_SEP_BY_SPACE (KW_ZERO+14) #define KW_P_SIGN_POSN (KW_ZERO+15) #define KW_N_SIGN_POSN (KW_ZERO+16) #define KW_INT_P_CS_PRECEDES (KW_ZERO+17) #define KW_INT_P_SEP_BY_SPACE (KW_ZERO+18) #define KW_INT_N_CS_PRECEDES (KW_ZERO+19) #define KW_INT_N_SEP_BY_SPACE (KW_ZERO+20) #define KW_INT_P_SIGN_POSN (KW_ZERO+21) #define KW_INT_N_SIGN_POSN (KW_ZERO+22) struct _kwinfo { const char *name; int isstr; /* true - string, false - number */ int catid; /* LC_* */ int value_ref; const char *comment; } kwinfo [] = { { "charmap", 1, LC_CTYPE, CODESET, "" }, /* hack */ { "decimal_point", 1, LC_NUMERIC, RADIXCHAR, "" }, { "thousands_sep", 1, LC_NUMERIC, THOUSEP, "" }, { "grouping", 1, LC_NUMERIC, KW_GROUPING, "" }, { "radixchar", 1, LC_NUMERIC, RADIXCHAR, "Same as decimal_point (FreeBSD only)" }, /* compat */ { "thousep", 1, LC_NUMERIC, THOUSEP, "Same as thousands_sep (FreeBSD only)" }, /* compat */ { "int_curr_symbol", 1, LC_MONETARY, KW_INT_CURR_SYMBOL, "" }, { "currency_symbol", 1, LC_MONETARY, KW_CURRENCY_SYMBOL, "" }, { "mon_decimal_point", 1, LC_MONETARY, KW_MON_DECIMAL_POINT, "" }, { "mon_thousands_sep", 1, LC_MONETARY, KW_MON_THOUSANDS_SEP, "" }, { "mon_grouping", 1, LC_MONETARY, KW_MON_GROUPING, "" }, { "positive_sign", 1, LC_MONETARY, KW_POSITIVE_SIGN, "" }, { "negative_sign", 1, LC_MONETARY, KW_NEGATIVE_SIGN, "" }, { "int_frac_digits", 0, LC_MONETARY, KW_INT_FRAC_DIGITS, "" }, { "frac_digits", 0, LC_MONETARY, KW_FRAC_DIGITS, "" }, { "p_cs_precedes", 0, LC_MONETARY, KW_P_CS_PRECEDES, "" }, { "p_sep_by_space", 0, LC_MONETARY, KW_P_SEP_BY_SPACE, "" }, { "n_cs_precedes", 0, LC_MONETARY, KW_N_CS_PRECEDES, "" }, { "n_sep_by_space", 0, LC_MONETARY, KW_N_SEP_BY_SPACE, "" }, { "p_sign_posn", 0, LC_MONETARY, KW_P_SIGN_POSN, "" }, { "n_sign_posn", 0, LC_MONETARY, KW_N_SIGN_POSN, "" }, { "int_p_cs_precedes", 0, LC_MONETARY, KW_INT_P_CS_PRECEDES, "" }, { "int_p_sep_by_space", 0, LC_MONETARY, KW_INT_P_SEP_BY_SPACE, "" }, { "int_n_cs_precedes", 0, LC_MONETARY, KW_INT_N_CS_PRECEDES, "" }, { "int_n_sep_by_space", 0, LC_MONETARY, KW_INT_N_SEP_BY_SPACE, "" }, { "int_p_sign_posn", 0, LC_MONETARY, KW_INT_P_SIGN_POSN, "" }, { "int_n_sign_posn", 0, LC_MONETARY, KW_INT_N_SIGN_POSN, "" }, { "d_t_fmt", 1, LC_TIME, D_T_FMT, "" }, { "d_fmt", 1, LC_TIME, D_FMT, "" }, { "t_fmt", 1, LC_TIME, T_FMT, "" }, { "am_str", 1, LC_TIME, AM_STR, "" }, { "pm_str", 1, LC_TIME, PM_STR, "" }, { "t_fmt_ampm", 1, LC_TIME, T_FMT_AMPM, "" }, { "day_1", 1, LC_TIME, DAY_1, "" }, { "day_2", 1, LC_TIME, DAY_2, "" }, { "day_3", 1, LC_TIME, DAY_3, "" }, { "day_4", 1, LC_TIME, DAY_4, "" }, { "day_5", 1, LC_TIME, DAY_5, "" }, { "day_6", 1, LC_TIME, DAY_6, "" }, { "day_7", 1, LC_TIME, DAY_7, "" }, { "abday_1", 1, LC_TIME, ABDAY_1, "" }, { "abday_2", 1, LC_TIME, ABDAY_2, "" }, { "abday_3", 1, LC_TIME, ABDAY_3, "" }, { "abday_4", 1, LC_TIME, ABDAY_4, "" }, { "abday_5", 1, LC_TIME, ABDAY_5, "" }, { "abday_6", 1, LC_TIME, ABDAY_6, "" }, { "abday_7", 1, LC_TIME, ABDAY_7, "" }, { "mon_1", 1, LC_TIME, MON_1, "" }, { "mon_2", 1, LC_TIME, MON_2, "" }, { "mon_3", 1, LC_TIME, MON_3, "" }, { "mon_4", 1, LC_TIME, MON_4, "" }, { "mon_5", 1, LC_TIME, MON_5, "" }, { "mon_6", 1, LC_TIME, MON_6, "" }, { "mon_7", 1, LC_TIME, MON_7, "" }, { "mon_8", 1, LC_TIME, MON_8, "" }, { "mon_9", 1, LC_TIME, MON_9, "" }, { "mon_10", 1, LC_TIME, MON_10, "" }, { "mon_11", 1, LC_TIME, MON_11, "" }, { "mon_12", 1, LC_TIME, MON_12, "" }, { "abmon_1", 1, LC_TIME, ABMON_1, "" }, { "abmon_2", 1, LC_TIME, ABMON_2, "" }, { "abmon_3", 1, LC_TIME, ABMON_3, "" }, { "abmon_4", 1, LC_TIME, ABMON_4, "" }, { "abmon_5", 1, LC_TIME, ABMON_5, "" }, { "abmon_6", 1, LC_TIME, ABMON_6, "" }, { "abmon_7", 1, LC_TIME, ABMON_7, "" }, { "abmon_8", 1, LC_TIME, ABMON_8, "" }, { "abmon_9", 1, LC_TIME, ABMON_9, "" }, { "abmon_10", 1, LC_TIME, ABMON_10, "" }, { "abmon_11", 1, LC_TIME, ABMON_11, "" }, { "abmon_12", 1, LC_TIME, ABMON_12, "" }, { "altmon_1", 1, LC_TIME, ALTMON_1, "(FreeBSD only)" }, { "altmon_2", 1, LC_TIME, ALTMON_2, "(FreeBSD only)" }, { "altmon_3", 1, LC_TIME, ALTMON_3, "(FreeBSD only)" }, { "altmon_4", 1, LC_TIME, ALTMON_4, "(FreeBSD only)" }, { "altmon_5", 1, LC_TIME, ALTMON_5, "(FreeBSD only)" }, { "altmon_6", 1, LC_TIME, ALTMON_6, "(FreeBSD only)" }, { "altmon_7", 1, LC_TIME, ALTMON_7, "(FreeBSD only)" }, { "altmon_8", 1, LC_TIME, ALTMON_8, "(FreeBSD only)" }, { "altmon_9", 1, LC_TIME, ALTMON_9, "(FreeBSD only)" }, { "altmon_10", 1, LC_TIME, ALTMON_10, "(FreeBSD only)" }, { "altmon_11", 1, LC_TIME, ALTMON_11, "(FreeBSD only)" }, { "altmon_12", 1, LC_TIME, ALTMON_12, "(FreeBSD only)" }, { "era", 1, LC_TIME, ERA, "(unavailable)" }, { "era_d_fmt", 1, LC_TIME, ERA_D_FMT, "(unavailable)" }, { "era_d_t_fmt", 1, LC_TIME, ERA_D_T_FMT, "(unavailable)" }, { "era_t_fmt", 1, LC_TIME, ERA_T_FMT, "(unavailable)" }, { "alt_digits", 1, LC_TIME, ALT_DIGITS, "" }, { "d_md_order", 1, LC_TIME, D_MD_ORDER, "(FreeBSD only)" }, /* local */ { "yesexpr", 1, LC_MESSAGES, YESEXPR, "" }, { "noexpr", 1, LC_MESSAGES, NOEXPR, "" }, { "yesstr", 1, LC_MESSAGES, YESSTR, "(POSIX legacy)" }, /* compat */ { "nostr", 1, LC_MESSAGES, NOSTR, "(POSIX legacy)" } /* compat */ }; -#define NKWINFO (sizeof(kwinfo)/sizeof(kwinfo[0])) +#define NKWINFO (nitems(kwinfo)) const char *boguslocales[] = { "UTF-8" }; -#define NBOGUS (sizeof(boguslocales)/sizeof(boguslocales[0])) +#define NBOGUS (nitems(boguslocales)) int main(int argc, char *argv[]) { int ch; int tmp; while ((ch = getopt(argc, argv, "ackms:")) != -1) { switch (ch) { case 'a': all_locales = 1; break; case 'c': prt_categories = 1; break; case 'k': prt_keywords = 1; break; case 'm': all_charmaps = 1; break; default: usage(); } } argc -= optind; argv += optind; /* validate arguments */ if (all_locales && all_charmaps) usage(); if ((all_locales || all_charmaps) && argc > 0) usage(); if ((all_locales || all_charmaps) && (prt_categories || prt_keywords)) usage(); /* process '-a' */ if (all_locales) { list_locales(); exit(0); } /* process '-m' */ if (all_charmaps) { list_charmaps(); exit(0); } /* check for special case '-k list' */ tmp = 0; if (prt_keywords && argc > 0) while (tmp < argc) if (strcasecmp(argv[tmp++], "list") == 0) { showkeywordslist(argv[tmp]); exit(0); } /* process '-c', '-k', or command line arguments. */ if (prt_categories || prt_keywords || argc > 0) { if (argc > 0) { setlocale(LC_ALL, ""); while (argc > 0) { showdetails(*argv); argv++; argc--; } } else { uint i; for (i = 0; i < nitems(kwinfo); i++) showdetails ((char *)kwinfo [i].name); } exit(0); } /* no arguments, show current locale state */ showlocale(); return (0); } void usage(void) { printf("Usage: locale [ -a | -m ]\n" " locale -k list [prefix]\n" " locale [ -ck ] [keyword ...]\n"); exit(1); } /* * Output information about all available locales * * XXX actually output of this function does not guarantee that locale * is really available to application, since it can be broken or * inconsistent thus setlocale() will fail. Maybe add '-V' function to * also validate these locales? */ void list_locales(void) { size_t i; init_locales_list(); for (i = 0; i < locales->sl_cur; i++) { printf("%s\n", locales->sl_str[i]); } } /* * qsort() helper function */ static int scmp(const void *s1, const void *s2) { return strcmp(*(const char **)s1, *(const char **)s2); } /* * Output information about all available charmaps * * XXX this function is doing a task in hackish way, i.e. by scaning * list of locales, spliting their codeset part and building list of * them. */ void list_charmaps(void) { size_t i; char *s, *cs; StringList *charmaps; /* initialize StringList */ charmaps = sl_init(); if (charmaps == NULL) err(1, "could not allocate memory"); /* fetch locales list */ init_locales_list(); /* split codesets and build their list */ for (i = 0; i < locales->sl_cur; i++) { s = locales->sl_str[i]; if ((cs = strchr(s, '.')) != NULL) { cs++; if (sl_find(charmaps, cs) == NULL) sl_add(charmaps, cs); } } /* add US-ASCII, if not yet added */ if (sl_find(charmaps, "US-ASCII") == NULL) sl_add(charmaps, "US-ASCII"); /* sort the list */ qsort(charmaps->sl_str, charmaps->sl_cur, sizeof(char *), scmp); /* print results */ for (i = 0; i < charmaps->sl_cur; i++) { printf("%s\n", charmaps->sl_str[i]); } } /* * Retrieve sorted list of system locales (or user locales, if PATH_LOCALE * environment variable is set) */ void init_locales_list(void) { DIR *dirp; struct dirent *dp; size_t i; int bogus; /* why call this function twice ? */ if (locales != NULL) return; /* initialize StringList */ locales = sl_init(); if (locales == NULL) err(1, "could not allocate memory"); /* get actual locales directory name */ if (__detect_path_locale() != 0) err(1, "unable to find locales storage"); /* open locales directory */ dirp = opendir(_PathLocale); if (dirp == NULL) err(1, "could not open directory '%s'", _PathLocale); /* scan directory and store its contents except "." and ".." */ while ((dp = readdir(dirp)) != NULL) { if (*(dp->d_name) == '.') continue; /* exclude "." and ".." */ for (bogus = i = 0; i < NBOGUS; i++) if (strncmp(dp->d_name, boguslocales[i], strlen(boguslocales[i])) == 0) bogus = 1; if (!bogus) sl_add(locales, strdup(dp->d_name)); } closedir(dirp); /* make sure that 'POSIX' and 'C' locales are present in the list. * POSIX 1003.1-2001 requires presence of 'POSIX' name only here, but * we also list 'C' for constistency */ if (sl_find(locales, "POSIX") == NULL) sl_add(locales, "POSIX"); if (sl_find(locales, "C") == NULL) sl_add(locales, "C"); /* make output nicer, sort the list */ qsort(locales->sl_str, locales->sl_cur, sizeof(char *), scmp); } /* * Show current locale status, depending on environment variables */ void showlocale(void) { size_t i; const char *lang, *vval, *eval; setlocale(LC_ALL, ""); lang = getenv("LANG"); if (lang == NULL) { lang = ""; } printf("LANG=%s\n", lang); /* XXX: if LANG is null, then set it to "C" to get implied values? */ for (i = 0; i < NLCINFO; i++) { vval = setlocale(lcinfo[i].id, NULL); eval = getenv(lcinfo[i].name); if (eval != NULL && !strcmp(eval, vval) && strcmp(lang, vval)) { /* * Appropriate environment variable set, its value * is valid and not overridden by LC_ALL * * XXX: possible side effect: if both LANG and * overridden environment variable are set into same * value, then it'll be assumed as 'implied' */ printf("%s=%s\n", lcinfo[i].name, vval); } else { printf("%s=\"%s\"\n", lcinfo[i].name, vval); } } vval = getenv("LC_ALL"); if (vval == NULL) { vval = ""; } printf("LC_ALL=%s\n", vval); } /* * keyword value lookup helper (via localeconv()) */ char * kwval_lconv(int id) { struct lconv *lc; char *rval; rval = NULL; lc = localeconv(); switch (id) { case KW_GROUPING: rval = lc->grouping; break; case KW_INT_CURR_SYMBOL: rval = lc->int_curr_symbol; break; case KW_CURRENCY_SYMBOL: rval = lc->currency_symbol; break; case KW_MON_DECIMAL_POINT: rval = lc->mon_decimal_point; break; case KW_MON_THOUSANDS_SEP: rval = lc->mon_thousands_sep; break; case KW_MON_GROUPING: rval = lc->mon_grouping; break; case KW_POSITIVE_SIGN: rval = lc->positive_sign; break; case KW_NEGATIVE_SIGN: rval = lc->negative_sign; break; case KW_INT_FRAC_DIGITS: rval = &(lc->int_frac_digits); break; case KW_FRAC_DIGITS: rval = &(lc->frac_digits); break; case KW_P_CS_PRECEDES: rval = &(lc->p_cs_precedes); break; case KW_P_SEP_BY_SPACE: rval = &(lc->p_sep_by_space); break; case KW_N_CS_PRECEDES: rval = &(lc->n_cs_precedes); break; case KW_N_SEP_BY_SPACE: rval = &(lc->n_sep_by_space); break; case KW_P_SIGN_POSN: rval = &(lc->p_sign_posn); break; case KW_N_SIGN_POSN: rval = &(lc->n_sign_posn); break; case KW_INT_P_CS_PRECEDES: rval = &(lc->int_p_cs_precedes); break; case KW_INT_P_SEP_BY_SPACE: rval = &(lc->int_p_sep_by_space); break; case KW_INT_N_CS_PRECEDES: rval = &(lc->int_n_cs_precedes); break; case KW_INT_N_SEP_BY_SPACE: rval = &(lc->int_n_sep_by_space); break; case KW_INT_P_SIGN_POSN: rval = &(lc->int_p_sign_posn); break; case KW_INT_N_SIGN_POSN: rval = &(lc->int_n_sign_posn); break; default: break; } return (rval); } /* * keyword value and properties lookup */ int kwval_lookup(char *kwname, char **kwval, int *cat, int *isstr) { int rval; size_t i; rval = 0; for (i = 0; i < NKWINFO; i++) { if (strcasecmp(kwname, kwinfo[i].name) == 0) { rval = 1; *cat = kwinfo[i].catid; *isstr = kwinfo[i].isstr; if (kwinfo[i].value_ref < KW_ZERO) { *kwval = nl_langinfo(kwinfo[i].value_ref); } else { *kwval = kwval_lconv(kwinfo[i].value_ref); } break; } } return (rval); } /* * Show details about requested keyword according to '-k' and/or '-c' * command line options specified. */ void showdetails(char *kw) { int isstr, cat, tmpval; char *kwval; if (kwval_lookup(kw, &kwval, &cat, &isstr) == 0) { /* * invalid keyword specified. * XXX: any actions? */ fprintf(stderr, "Unknown keyword: `%s'\n", kw); return; } if (prt_categories) { if (prt_keywords) printf("%-20s ", lookup_localecat(cat)); else printf("%-20s\t%s\n", kw, lookup_localecat(cat)); } if (prt_keywords) { if (isstr) { printf("%s=\"%s\"\n", kw, kwval); } else { tmpval = (char) *kwval; printf("%s=%d\n", kw, tmpval); } } if (!prt_categories && !prt_keywords) { if (isstr) { printf("%s\n", kwval); } else { tmpval = (char) *kwval; printf("%d\n", tmpval); } } } /* * Convert locale category id into string */ const char * lookup_localecat(int cat) { size_t i; for (i = 0; i < NLCINFO; i++) if (lcinfo[i].id == cat) { return (lcinfo[i].name); } return ("UNKNOWN"); } /* * Show list of keywords */ void showkeywordslist(char *substring) { size_t i; #define FMT "%-20s %-12s %-7s %-20s\n" if (substring == NULL) printf("List of available keywords\n\n"); else printf("List of available keywords starting with '%s'\n\n", substring); printf(FMT, "Keyword", "Category", "Type", "Comment"); printf("-------------------- ------------ ------- --------------------\n"); for (i = 0; i < NKWINFO; i++) { if (substring != NULL) { if (strncmp(kwinfo[i].name, substring, strlen(substring)) != 0) continue; } printf(FMT, kwinfo[i].name, lookup_localecat(kwinfo[i].catid), (kwinfo[i].isstr == 0) ? "number" : "string", kwinfo[i].comment); } } Index: user/alc/PQ_LAUNDRY/usr.bin/mandoc/Makefile =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/mandoc/Makefile (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/mandoc/Makefile (revision 303642) @@ -1,91 +1,90 @@ # $FreeBSD$ .include MDOCMLDIR= ${.CURDIR}/../../contrib/mdocml .PATH: ${MDOCMLDIR} PROG= mandoc MAN= mandoc.1 eqn.7 mandoc_char.7 tbl.7 man.7 mdoc.7 # roff.7 MLINKS= mandoc.1 mdocml.1 .if ${MK_MANDOCDB} != no && ${MK_MAN_UTILS} != no MAN+= apropos.1 makewhatis.8 MLINKS+= apropos.1 whatis.1 LINKS= ${BINDIR}/mandoc ${BINDIR}/whatis \ ${BINDIR}/mandoc ${BINDIR}/makewhatis \ ${BINDIR}/mandoc ${BINDIR}/apropos .endif LIBMAN_SRCS= man.c \ man_hash.c \ man_macro.c \ man_validate.c LIBMDOC_SRCS= att.c \ lib.c \ mdoc.c \ mdoc_argv.c \ mdoc_hash.c \ mdoc_macro.c \ mdoc_state.c \ mdoc_validate.c \ st.c \ LIBROFF_SRCS= eqn.c \ roff.c \ tbl.c \ tbl_data.c \ tbl_layout.c \ tbl_opts.c \ LIB_SRCS= ${LIBMAN_SRCS} \ ${LIBMDOC_SRCS} \ ${LIBROFF_SRCS} \ chars.c \ mandoc.c \ mandoc_aux.c \ mandoc_ohash.c \ msec.c \ preconv.c \ read.c HTML_SRCS= eqn_html.c \ html.c \ man_html.c \ mdoc_html.c \ tbl_html.c MAN_SRCS= mdoc_man.c TERM_SRCS= eqn_term.c \ man_term.c \ mdoc_term.c \ term.c \ term_ascii.c \ term_ps.c \ tbl_term.c DB_SRCS= mandocdb.c \ mansearch.c \ mansearch_const.c \ tag.c \ manpath.c SRCS= ${LIB_SRCS} \ ${HTML_SRCS} \ ${MAN_SRCS} \ ${TERM_SRCS} \ main.c \ out.c \ tree.c SRCS+= ${DB_SRCS} WARNS?= 2 CFLAGS+= -DHAVE_CONFIG_H \ - -D_WITH_GETLINE \ -I${.CURDIR}/../../lib/libopenbsd/ \ -I${.CURDIR}/../../contrib/sqlite3 LIBADD= openbsd sqlite3 z .include Index: user/alc/PQ_LAUNDRY/usr.bin/netstat/pfkey.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/netstat/pfkey.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/netstat/pfkey.c (revision 303642) @@ -1,208 +1,207 @@ /* $NetBSD: inet.c,v 1.35.2.1 1999/04/29 14:57:08 perry Exp $ */ /* $KAME: ipsec.c,v 1.25 2001/03/12 09:04:39 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1983, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef IPSEC #include #endif #include #include #include #include #include #include #include "netstat.h" #ifdef IPSEC static const char *pfkey_msgtypenames[] = { "reserved", "getspi", "update", "add", "delete", "get", "acquire", "register", "expire", "flush", "dump", "x_promisc", "x_pchange", "x_spdupdate", "x_spdadd", "x_spddelete", "x_spdget", "x_spdacquire", "x_spddump", "x_spdflush", "x_spdsetidx", "x_spdexpire", "x_spddelete2" }; static const char *pfkey_msgtype_names (int); static const char * pfkey_msgtype_names(int x) { - const int max = - sizeof(pfkey_msgtypenames)/sizeof(pfkey_msgtypenames[0]); + const int max = nitems(pfkey_msgtypenames); static char buf[20]; if (x < max && pfkey_msgtypenames[x]) return pfkey_msgtypenames[x]; snprintf(buf, sizeof(buf), "#%d", x); return buf; } void pfkey_stats(u_long off, const char *name, int family __unused, int proto __unused) { struct pfkeystat pfkeystat; unsigned first, type; if (off == 0) return; xo_emit("{T:/%s}:\n", name); xo_open_container(name); kread_counters(off, (char *)&pfkeystat, sizeof(pfkeystat)); #define p(f, m) if (pfkeystat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pfkeystat.f, plural(pfkeystat.f)) /* userland -> kernel */ p(out_total, "\t{:sent-requests/%ju} " "{N:/request%s sent from userland}\n"); p(out_bytes, "\t{:sent-bytes/%ju} " "{N:/byte%s sent from userland}\n"); for (first = 1, type = 0; type userland */ p(in_total, "\t{:received-requests/%ju} " "{N:/request%s sent to userland}\n"); p(in_bytes, "\t{:received-bytes/%ju} " "{N:/byte%s sent to userland}\n"); for (first = 1, type = 0; type < sizeof(pfkeystat.in_msgtype)/sizeof(pfkeystat.in_msgtype[0]); type++) { if (pfkeystat.in_msgtype[type] <= 0) continue; if (first) { xo_open_list("input-histogram"); xo_emit("\t{T:histogram by message type}:\n"); first = 0; } xo_open_instance("input-histogram"); xo_emit("\t\t{k:type/%s}: {:count/%ju}\n", pfkey_msgtype_names(type), (uintmax_t)pfkeystat.in_msgtype[type]); xo_close_instance("input-histogram"); } if (!first) xo_close_list("input-histogram"); p(in_msgtarget[KEY_SENDUP_ONE], "\t{:received-one-socket/%ju} " "{N:/message%s toward single socket}\n"); p(in_msgtarget[KEY_SENDUP_ALL], "\t{:received-all-sockets/%ju} " "{N:/message%s toward all sockets}\n"); p(in_msgtarget[KEY_SENDUP_REGISTERED], "\t{:received-registered-sockets/%ju} " "{N:/message%s toward registered sockets}\n"); p(in_nomem, "\t{:discarded-no-memory/%ju} " "{N:/message%s with memory allocation failure}\n"); #undef p xo_close_container(name); } #endif /* IPSEC */ Index: user/alc/PQ_LAUNDRY/usr.bin/nl/nl.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/nl/nl.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/nl/nl.c (revision 303642) @@ -1,410 +1,409 @@ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Klaus Klein. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #ifndef lint __COPYRIGHT( "@(#) Copyright (c) 1999\ The NetBSD Foundation, Inc. All rights reserved."); __RCSID("$FreeBSD$"); #endif -#define _WITH_GETLINE #include #include #include #include #include #include #include #include #include #include #include typedef enum { number_all, /* number all lines */ number_nonempty, /* number non-empty lines */ number_none, /* no line numbering */ number_regex /* number lines matching regular expression */ } numbering_type; struct numbering_property { const char * const name; /* for diagnostics */ numbering_type type; /* numbering type */ regex_t expr; /* for type == number_regex */ }; /* line numbering formats */ #define FORMAT_LN "%-*d" /* left justified, leading zeros suppressed */ #define FORMAT_RN "%*d" /* right justified, leading zeros suppressed */ #define FORMAT_RZ "%0*d" /* right justified, leading zeros kept */ #define FOOTER 0 #define BODY 1 #define HEADER 2 #define NP_LAST HEADER static struct numbering_property numbering_properties[NP_LAST + 1] = { { .name = "footer", .type = number_none }, { .name = "body", .type = number_nonempty }, { .name = "header", .type = number_none } }; #define max(a, b) ((a) > (b) ? (a) : (b)) /* * Maximum number of characters required for a decimal representation of a * (signed) int; courtesy of tzcode. */ #define INT_STRLEN_MAXIMUM \ ((sizeof (int) * CHAR_BIT - 1) * 302 / 1000 + 2) static void filter(void); static void parse_numbering(const char *, int); static void usage(void); /* * Dynamically allocated buffer suitable for string representation of ints. */ static char *intbuffer; /* delimiter characters that indicate the start of a logical page section */ static char delim[2 * MB_LEN_MAX]; static int delimlen; /* * Configurable parameters. */ /* line numbering format */ static const char *format = FORMAT_RN; /* increment value used to number logical page lines */ static int incr = 1; /* number of adjacent blank lines to be considered (and numbered) as one */ static unsigned int nblank = 1; /* whether to restart numbering at logical page delimiters */ static int restart = 1; /* characters used in separating the line number and the corrsp. text line */ static const char *sep = "\t"; /* initial value used to number logical page lines */ static int startnum = 1; /* number of characters to be used for the line number */ /* should be unsigned but required signed by `*' precision conversion */ static int width = 6; int main(int argc, char *argv[]) { int c; long val; unsigned long uval; char *ep; size_t intbuffersize, clen; char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' }; size_t delim1len = 1, delim2len = 1; (void)setlocale(LC_ALL, ""); while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) { switch (c) { case 'p': restart = 0; break; case 'b': parse_numbering(optarg, BODY); break; case 'd': clen = mbrlen(optarg, MB_CUR_MAX, NULL); if (clen == (size_t)-1 || clen == (size_t)-2) errc(EXIT_FAILURE, EILSEQ, NULL); if (clen != 0) { memcpy(delim1, optarg, delim1len = clen); clen = mbrlen(optarg + delim1len, MB_CUR_MAX, NULL); if (clen == (size_t)-1 || clen == (size_t)-2) errc(EXIT_FAILURE, EILSEQ, NULL); if (clen != 0) { memcpy(delim2, optarg + delim1len, delim2len = clen); if (optarg[delim1len + clen] != '\0') errx(EXIT_FAILURE, "invalid delim argument -- %s", optarg); } } break; case 'f': parse_numbering(optarg, FOOTER); break; case 'h': parse_numbering(optarg, HEADER); break; case 'i': errno = 0; val = strtol(optarg, &ep, 10); if ((ep != NULL && *ep != '\0') || ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) errx(EXIT_FAILURE, "invalid incr argument -- %s", optarg); incr = (int)val; break; case 'l': errno = 0; uval = strtoul(optarg, &ep, 10); if ((ep != NULL && *ep != '\0') || (uval == ULONG_MAX && errno != 0)) errx(EXIT_FAILURE, "invalid num argument -- %s", optarg); nblank = (unsigned int)uval; break; case 'n': if (strcmp(optarg, "ln") == 0) { format = FORMAT_LN; } else if (strcmp(optarg, "rn") == 0) { format = FORMAT_RN; } else if (strcmp(optarg, "rz") == 0) { format = FORMAT_RZ; } else errx(EXIT_FAILURE, "illegal format -- %s", optarg); break; case 's': sep = optarg; break; case 'v': errno = 0; val = strtol(optarg, &ep, 10); if ((ep != NULL && *ep != '\0') || ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) errx(EXIT_FAILURE, "invalid startnum value -- %s", optarg); startnum = (int)val; break; case 'w': errno = 0; val = strtol(optarg, &ep, 10); if ((ep != NULL && *ep != '\0') || ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) errx(EXIT_FAILURE, "invalid width value -- %s", optarg); width = (int)val; if (!(width > 0)) errx(EXIT_FAILURE, "width argument must be > 0 -- %d", width); break; case '?': default: usage(); /* NOTREACHED */ } } argc -= optind; argv += optind; switch (argc) { case 0: break; case 1: if (strcmp(argv[0], "-") != 0 && freopen(argv[0], "r", stdin) == NULL) err(EXIT_FAILURE, "%s", argv[0]); break; default: usage(); /* NOTREACHED */ } /* Generate the delimiter sequence */ memcpy(delim, delim1, delim1len); memcpy(delim + delim1len, delim2, delim2len); delimlen = delim1len + delim2len; /* Allocate a buffer suitable for preformatting line number. */ intbuffersize = max((int)INT_STRLEN_MAXIMUM, width) + 1; /* NUL */ if ((intbuffer = malloc(intbuffersize)) == NULL) err(EXIT_FAILURE, "cannot allocate preformatting buffer"); /* Do the work. */ filter(); exit(EXIT_SUCCESS); /* NOTREACHED */ } static void filter(void) { char *buffer; size_t buffersize; ssize_t linelen; int line; /* logical line number */ int section; /* logical page section */ unsigned int adjblank; /* adjacent blank lines */ int consumed; /* intbuffer measurement */ int donumber = 0, idx; adjblank = 0; line = startnum; section = BODY; buffer = NULL; buffersize = 0; while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) { for (idx = FOOTER; idx <= NP_LAST; idx++) { /* Does it look like a delimiter? */ if (delimlen * (idx + 1) > linelen) break; if (memcmp(buffer + delimlen * idx, delim, delimlen) != 0) break; /* Was this the whole line? */ if (buffer[delimlen * (idx + 1)] == '\n') { section = idx; adjblank = 0; if (restart) line = startnum; goto nextline; } } switch (numbering_properties[section].type) { case number_all: /* * Doing this for number_all only is disputable, but * the standard expresses an explicit dependency on * `-b a' etc. */ if (buffer[0] == '\n' && ++adjblank < nblank) donumber = 0; else donumber = 1, adjblank = 0; break; case number_nonempty: donumber = (buffer[0] != '\n'); break; case number_none: donumber = 0; break; case number_regex: donumber = (regexec(&numbering_properties[section].expr, buffer, 0, NULL, 0) == 0); break; } if (donumber) { /* Note: sprintf() is safe here. */ consumed = sprintf(intbuffer, format, width, line); (void)printf("%s", intbuffer + max(0, consumed - width)); line += incr; } else { (void)printf("%*s", width, ""); } (void)fputs(sep, stdout); (void)fwrite(buffer, linelen, 1, stdout); if (ferror(stdout)) err(EXIT_FAILURE, "output error"); nextline: ; } if (ferror(stdin)) err(EXIT_FAILURE, "input error"); free(buffer); } /* * Various support functions. */ static void parse_numbering(const char *argstr, int section) { int error; char errorbuf[NL_TEXTMAX]; switch (argstr[0]) { case 'a': numbering_properties[section].type = number_all; break; case 'n': numbering_properties[section].type = number_none; break; case 't': numbering_properties[section].type = number_nonempty; break; case 'p': /* If there was a previous expression, throw it away. */ if (numbering_properties[section].type == number_regex) regfree(&numbering_properties[section].expr); else numbering_properties[section].type = number_regex; /* Compile/validate the supplied regular expression. */ if ((error = regcomp(&numbering_properties[section].expr, &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) { (void)regerror(error, &numbering_properties[section].expr, errorbuf, sizeof (errorbuf)); errx(EXIT_FAILURE, "%s expr: %s -- %s", numbering_properties[section].name, errorbuf, &argstr[1]); } break; default: errx(EXIT_FAILURE, "illegal %s line numbering type -- %s", numbering_properties[section].name, argstr); } } static void usage(void) { (void)fprintf(stderr, "usage: nl [-p] [-b type] [-d delim] [-f type] [-h type] [-i incr] [-l num]\n" " [-n format] [-s sep] [-v startnum] [-w width] [file]\n"); exit(EXIT_FAILURE); } Index: user/alc/PQ_LAUNDRY/usr.bin/procstat/procstat_files.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/procstat/procstat_files.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/procstat/procstat_files.c (revision 303642) @@ -1,576 +1,575 @@ /*- * Copyright (c) 2007-2011 Robert N. M. Watson * Copyright (c) 2015 Allan Jude * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "procstat.h" static const char * protocol_to_string(int domain, int type, int protocol) { switch (domain) { case AF_INET: case AF_INET6: switch (protocol) { case IPPROTO_TCP: return ("TCP"); case IPPROTO_UDP: return ("UDP"); case IPPROTO_ICMP: return ("ICM"); case IPPROTO_RAW: return ("RAW"); case IPPROTO_SCTP: return ("SCT"); case IPPROTO_DIVERT: return ("IPD"); default: return ("IP?"); } case AF_LOCAL: switch (type) { case SOCK_STREAM: return ("UDS"); case SOCK_DGRAM: return ("UDD"); default: return ("UD?"); } default: return ("?"); } } static void addr_to_string(struct sockaddr_storage *ss, char *buffer, int buflen) { char buffer2[INET6_ADDRSTRLEN]; struct sockaddr_in6 *sin6; struct sockaddr_in *sin; struct sockaddr_un *sun; switch (ss->ss_family) { case AF_LOCAL: sun = (struct sockaddr_un *)ss; if (strlen(sun->sun_path) == 0) strlcpy(buffer, "-", buflen); else strlcpy(buffer, sun->sun_path, buflen); break; case AF_INET: sin = (struct sockaddr_in *)ss; snprintf(buffer, buflen, "%s:%d", inet_ntoa(sin->sin_addr), ntohs(sin->sin_port)); break; case AF_INET6: sin6 = (struct sockaddr_in6 *)ss; if (inet_ntop(AF_INET6, &sin6->sin6_addr, buffer2, sizeof(buffer2)) != NULL) snprintf(buffer, buflen, "%s.%d", buffer2, ntohs(sin6->sin6_port)); else strlcpy(buffer, "-", buflen); break; default: strlcpy(buffer, "", buflen); break; } } static struct cap_desc { uint64_t cd_right; const char *cd_desc; } cap_desc[] = { /* General file I/O. */ { CAP_READ, "rd" }, { CAP_WRITE, "wr" }, { CAP_SEEK, "se" }, { CAP_MMAP, "mm" }, { CAP_CREATE, "cr" }, { CAP_FEXECVE, "fe" }, { CAP_FSYNC, "fy" }, { CAP_FTRUNCATE, "ft" }, /* VFS methods. */ { CAP_FCHDIR, "cd" }, { CAP_FCHFLAGS, "cf" }, { CAP_FCHMOD, "cm" }, { CAP_FCHOWN, "cn" }, { CAP_FCNTL, "fc" }, { CAP_FLOCK, "fl" }, { CAP_FPATHCONF, "fp" }, { CAP_FSCK, "fk" }, { CAP_FSTAT, "fs" }, { CAP_FSTATFS, "sf" }, { CAP_FUTIMES, "fu" }, { CAP_LINKAT_SOURCE, "ls" }, { CAP_LINKAT_TARGET, "lt" }, { CAP_MKDIRAT, "md" }, { CAP_MKFIFOAT, "mf" }, { CAP_MKNODAT, "mn" }, { CAP_RENAMEAT_SOURCE, "rs" }, { CAP_RENAMEAT_TARGET, "rt" }, { CAP_SYMLINKAT, "sl" }, { CAP_UNLINKAT, "un" }, /* Lookups - used to constrain *at() calls. */ { CAP_LOOKUP, "lo" }, /* Extended attributes. */ { CAP_EXTATTR_GET, "eg" }, { CAP_EXTATTR_SET, "es" }, { CAP_EXTATTR_DELETE, "ed" }, { CAP_EXTATTR_LIST, "el" }, /* Access Control Lists. */ { CAP_ACL_GET, "ag" }, { CAP_ACL_SET, "as" }, { CAP_ACL_DELETE, "ad" }, { CAP_ACL_CHECK, "ac" }, /* Socket operations. */ { CAP_ACCEPT, "at" }, { CAP_BIND, "bd" }, { CAP_CONNECT, "co" }, { CAP_GETPEERNAME, "pn" }, { CAP_GETSOCKNAME, "sn" }, { CAP_GETSOCKOPT, "gs" }, { CAP_LISTEN, "ln" }, { CAP_PEELOFF, "pf" }, { CAP_SETSOCKOPT, "ss" }, { CAP_SHUTDOWN, "sh" }, /* Mandatory Access Control. */ { CAP_MAC_GET, "mg" }, { CAP_MAC_SET, "ms" }, /* Methods on semaphores. */ { CAP_SEM_GETVALUE, "sg" }, { CAP_SEM_POST, "sp" }, { CAP_SEM_WAIT, "sw" }, /* Event monitoring and posting. */ { CAP_EVENT, "ev" }, { CAP_KQUEUE_EVENT, "ke" }, { CAP_KQUEUE_CHANGE, "kc" }, /* Strange and powerful rights that should not be given lightly. */ { CAP_IOCTL, "io" }, { CAP_TTYHOOK, "ty" }, /* Process management via process descriptors. */ { CAP_PDGETPID, "pg" }, { CAP_PDWAIT, "pw" }, { CAP_PDKILL, "pk" }, /* * Rights that allow to use bindat(2) and connectat(2) syscalls on a * directory descriptor. */ { CAP_BINDAT, "ba" }, { CAP_CONNECTAT, "ca" }, /* Aliases and defines that combine multiple rights. */ { CAP_PREAD, "prd" }, { CAP_PWRITE, "pwr" }, { CAP_MMAP_R, "mmr" }, { CAP_MMAP_W, "mmw" }, { CAP_MMAP_X, "mmx" }, { CAP_MMAP_RW, "mrw" }, { CAP_MMAP_RX, "mrx" }, { CAP_MMAP_WX, "mwx" }, { CAP_MMAP_RWX, "mma" }, { CAP_RECV, "re" }, { CAP_SEND, "sd" }, { CAP_SOCK_CLIENT, "scl" }, { CAP_SOCK_SERVER, "ssr" }, }; -static const u_int cap_desc_count = sizeof(cap_desc) / - sizeof(cap_desc[0]); +static const u_int cap_desc_count = nitems(cap_desc); static u_int width_capability(cap_rights_t *rightsp) { u_int count, i, width; count = 0; width = 0; for (i = 0; i < cap_desc_count; i++) { if (cap_rights_is_set(rightsp, cap_desc[i].cd_right)) { width += strlen(cap_desc[i].cd_desc); if (count) width++; count++; } } return (width); } static void print_capability(cap_rights_t *rightsp, u_int capwidth) { u_int count, i, width; count = 0; width = 0; for (i = width_capability(rightsp); i < capwidth; i++) { if (i != 0) xo_emit(" "); else xo_emit("-"); } xo_open_list("capabilities"); for (i = 0; i < cap_desc_count; i++) { if (cap_rights_is_set(rightsp, cap_desc[i].cd_right)) { xo_emit("{D:/%s}{l:capabilities/%s}", count ? "," : "", cap_desc[i].cd_desc); width += strlen(cap_desc[i].cd_desc); if (count) width++; count++; } } xo_close_list("capabilities"); } void procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) { struct sockstat sock; struct filestat_list *head; struct filestat *fst; const char *str; struct vnstat vn; u_int capwidth, width; int error; char src_addr[PATH_MAX]; char dst_addr[PATH_MAX]; /* * To print the header in capability mode, we need to know the width * of the widest capability string. Even if we get no processes * back, we will print the header, so we defer aborting due to a lack * of processes until after the header logic. */ capwidth = 0; head = procstat_getfiles(procstat, kipp, 0); if (head != NULL && Cflag) { STAILQ_FOREACH(fst, head, next) { width = width_capability(&fst->fs_cap_rights); if (width > capwidth) capwidth = width; } if (capwidth < strlen("CAPABILITIES")) capwidth = strlen("CAPABILITIES"); } if (!hflag) { if (Cflag) xo_emit("{T:/%5s %-16s %5s %1s %-8s %-*s " "%-3s %-12s}\n", "PID", "COMM", "FD", "T", "FLAGS", capwidth, "CAPABILITIES", "PRO", "NAME"); else xo_emit("{T:/%5s %-16s %5s %1s %1s %-8s " "%3s %7s %-3s %-12s}\n", "PID", "COMM", "FD", "T", "V", "FLAGS", "REF", "OFFSET", "PRO", "NAME"); } if (head == NULL) return; xo_emit("{ek:process_id/%5d/%d}", kipp->ki_pid); xo_emit("{e:command/%-16s/%s}", kipp->ki_comm); xo_open_list("files"); STAILQ_FOREACH(fst, head, next) { xo_open_instance("files"); xo_emit("{dk:process_id/%5d/%d} ", kipp->ki_pid); xo_emit("{d:command/%-16s/%s} ", kipp->ki_comm); if (fst->fs_uflags & PS_FST_UFLAG_CTTY) xo_emit("{P: }{:fd/%s} ", "ctty"); else if (fst->fs_uflags & PS_FST_UFLAG_CDIR) xo_emit("{P: }{:fd/%s} ", "cwd"); else if (fst->fs_uflags & PS_FST_UFLAG_JAIL) xo_emit("{P: }{:fd/%s} ", "jail"); else if (fst->fs_uflags & PS_FST_UFLAG_RDIR) xo_emit("{P: }{:fd/%s} ", "root"); else if (fst->fs_uflags & PS_FST_UFLAG_TEXT) xo_emit("{P: }{:fd/%s} ", "text"); else if (fst->fs_uflags & PS_FST_UFLAG_TRACE) xo_emit("{:fd/%s} ", "trace"); else xo_emit("{:fd/%5d} ", fst->fs_fd); switch (fst->fs_type) { case PS_FST_TYPE_VNODE: str = "v"; xo_emit("{eq:fd_type/vnode}"); break; case PS_FST_TYPE_SOCKET: str = "s"; xo_emit("{eq:fd_type/socket}"); break; case PS_FST_TYPE_PIPE: str = "p"; xo_emit("{eq:fd_type/pipe}"); break; case PS_FST_TYPE_FIFO: str = "f"; xo_emit("{eq:fd_type/fifo}"); break; case PS_FST_TYPE_KQUEUE: str = "k"; xo_emit("{eq:fd_type/kqueue}"); break; case PS_FST_TYPE_CRYPTO: str = "c"; xo_emit("{eq:fd_type/crypto}"); break; case PS_FST_TYPE_MQUEUE: str = "m"; xo_emit("{eq:fd_type/mqueue}"); break; case PS_FST_TYPE_SHM: str = "h"; xo_emit("{eq:fd_type/shm}"); break; case PS_FST_TYPE_PTS: str = "t"; xo_emit("{eq:fd_type/pts}"); break; case PS_FST_TYPE_SEM: str = "e"; xo_emit("{eq:fd_type/sem}"); break; case PS_FST_TYPE_NONE: str = "?"; xo_emit("{eq:fd_type/none}"); break; case PS_FST_TYPE_UNKNOWN: default: str = "?"; xo_emit("{eq:fd_type/unknown}"); break; } xo_emit("{d:fd_type/%1s/%s} ", str); if (!Cflag) { str = "-"; if (fst->fs_type == PS_FST_TYPE_VNODE) { error = procstat_get_vnode_info(procstat, fst, &vn, NULL); switch (vn.vn_type) { case PS_FST_VTYPE_VREG: str = "r"; xo_emit("{eq:vode_type/regular}"); break; case PS_FST_VTYPE_VDIR: str = "d"; xo_emit("{eq:vode_type/directory}"); break; case PS_FST_VTYPE_VBLK: str = "b"; xo_emit("{eq:vode_type/block}"); break; case PS_FST_VTYPE_VCHR: str = "c"; xo_emit("{eq:vode_type/character}"); break; case PS_FST_VTYPE_VLNK: str = "l"; xo_emit("{eq:vode_type/link}"); break; case PS_FST_VTYPE_VSOCK: str = "s"; xo_emit("{eq:vode_type/socket}"); break; case PS_FST_VTYPE_VFIFO: str = "f"; xo_emit("{eq:vode_type/fifo}"); break; case PS_FST_VTYPE_VBAD: str = "x"; xo_emit("{eq:vode_type/revoked_device}"); break; case PS_FST_VTYPE_VNON: str = "?"; xo_emit("{eq:vode_type/non}"); break; case PS_FST_VTYPE_UNKNOWN: default: str = "?"; xo_emit("{eq:vode_type/unknown}"); break; } } xo_emit("{d:vnode_type/%1s/%s} ", str); } xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_READ ? "r" : "-"); xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_WRITE ? "w" : "-"); xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_APPEND ? "a" : "-"); xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_ASYNC ? "s" : "-"); xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_SYNC ? "f" : "-"); xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_NONBLOCK ? "n" : "-"); xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_DIRECT ? "d" : "-"); xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_HASLOCK ? "l" : "-"); xo_emit(" "); xo_open_list("fd_flags"); if (fst->fs_fflags & PS_FST_FFLAG_READ) xo_emit("{elq:fd_flags/read}"); if (fst->fs_fflags & PS_FST_FFLAG_WRITE) xo_emit("{elq:fd_flags/write}"); if (fst->fs_fflags & PS_FST_FFLAG_APPEND) xo_emit("{elq:fd_flags/append}"); if (fst->fs_fflags & PS_FST_FFLAG_ASYNC) xo_emit("{elq:fd_flags/async}"); if (fst->fs_fflags & PS_FST_FFLAG_SYNC) xo_emit("{elq:fd_flags/fsync}"); if (fst->fs_fflags & PS_FST_FFLAG_NONBLOCK) xo_emit("{elq:fd_flags/nonblocking}"); if (fst->fs_fflags & PS_FST_FFLAG_DIRECT) xo_emit("{elq:fd_flags/direct_io}"); if (fst->fs_fflags & PS_FST_FFLAG_HASLOCK) xo_emit("{elq:fd_flags/lock_held}"); xo_close_list("fd_flags"); if (!Cflag) { if (fst->fs_ref_count > -1) xo_emit("{:ref_count/%3d/%d} ", fst->fs_ref_count); else xo_emit("{q:ref_count/%3c/%c} ", '-'); if (fst->fs_offset > -1) xo_emit("{:offset/%7jd/%jd} ", (intmax_t)fst->fs_offset); else xo_emit("{q:offset/%7c/%c} ", '-'); } if (Cflag) { print_capability(&fst->fs_cap_rights, capwidth); xo_emit(" "); } switch (fst->fs_type) { case PS_FST_TYPE_SOCKET: error = procstat_get_socket_info(procstat, fst, &sock, NULL); if (error != 0) break; xo_emit("{:protocol/%-3s/%s} ", protocol_to_string(sock.dom_family, sock.type, sock.proto)); /* * While generally we like to print two addresses, * local and peer, for sockets, it turns out to be * more useful to print the first non-nul address for * local sockets, as typically they aren't bound and * connected, and the path strings can get long. */ if (sock.dom_family == AF_LOCAL) { struct sockaddr_un *sun = (struct sockaddr_un *)&sock.sa_local; if (sun->sun_path[0] != 0) addr_to_string(&sock.sa_local, src_addr, sizeof(src_addr)); else addr_to_string(&sock.sa_peer, src_addr, sizeof(src_addr)); xo_emit("{:path/%s}", src_addr); } else { addr_to_string(&sock.sa_local, src_addr, sizeof(src_addr)); addr_to_string(&sock.sa_peer, dst_addr, sizeof(dst_addr)); xo_emit("{:path/%s %s}", src_addr, dst_addr); } break; default: xo_emit("{:protocol/%-3s/%s} ", "-"); xo_emit("{:path/%-18s/%s}", fst->fs_path != NULL ? fst->fs_path : "-"); } xo_emit("\n"); xo_close_instance("files"); } xo_close_list("files"); procstat_freefiles(procstat, head); } Index: user/alc/PQ_LAUNDRY/usr.bin/sdiff/sdiff.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/sdiff/sdiff.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/sdiff/sdiff.c (revision 303642) @@ -1,1189 +1,1188 @@ /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */ /* * Written by Raymond Lai . * Public domain. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include "common.h" #include "extern.h" #define DIFF_PATH "/usr/bin/diff" #define WIDTH 126 /* * Each column must be at least one character wide, plus three * characters between the columns (space, [<|>], space). */ #define WIDTH_MIN 5 /* 3 kilobytes of chars */ #define MAX_CHECK 768 /* A single diff line. */ struct diffline { STAILQ_ENTRY(diffline) diffentries; char *left; char div; char *right; }; static void astrcat(char **, const char *); static void enqueue(char *, char, char *); static char *mktmpcpy(const char *); static int istextfile(FILE *); static void binexec(char *, char *, char *) __dead2; static void freediff(struct diffline *); static void int_usage(void); static int parsecmd(FILE *, FILE *, FILE *); static void printa(FILE *, size_t); static void printc(FILE *, size_t, FILE *, size_t); static void printcol(const char *, size_t *, const size_t); static void printd(FILE *, size_t); static void println(const char *, const char, const char *); static void processq(void); static void prompt(const char *, const char *); static void usage(void) __dead2; static char *xfgets(FILE *); static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead); static size_t line_width; /* width of a line (two columns and divider) */ static size_t width; /* width of each column */ static size_t file1ln, file2ln; /* line number of file1 and file2 */ static int Iflag = 0; /* ignore sets matching regexp */ static int lflag; /* print only left column for identical lines */ static int sflag; /* skip identical lines */ FILE *outfp; /* file to save changes to */ const char *tmpdir; /* TMPDIR or /tmp */ enum { HELP_OPT = CHAR_MAX + 1, NORMAL_OPT, FCASE_SENSITIVE_OPT, FCASE_IGNORE_OPT, FROMFILE_OPT, TOFILE_OPT, UNIDIR_OPT, STRIPCR_OPT, HORIZ_OPT, LEFTC_OPT, SUPCL_OPT, LF_OPT, /* the following groupings must be in sequence */ OLDGF_OPT, NEWGF_OPT, UNCGF_OPT, CHGF_OPT, OLDLF_OPT, NEWLF_OPT, UNCLF_OPT, /* end order-sensitive enums */ TSIZE_OPT, HLINES_OPT, LFILES_OPT, DIFFPROG_OPT, PIPE_FD, /* pid from the diff parent (if applicable) */ DIFF_PID, NOOP_OPT, }; static struct option longopts[] = { /* options only processed in sdiff */ { "left-column", no_argument, NULL, LEFTC_OPT }, { "suppress-common-lines", no_argument, NULL, 's' }, { "width", required_argument, NULL, 'w' }, { "output", required_argument, NULL, 'o' }, { "diff-program", required_argument, NULL, DIFFPROG_OPT }, { "pipe-fd", required_argument, NULL, PIPE_FD }, { "diff-pid", required_argument, NULL, DIFF_PID }, /* Options processed by diff. */ { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT }, { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT }, { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT }, { "tabsize", required_argument, NULL, TSIZE_OPT }, { "help", no_argument, NULL, HELP_OPT }, { "text", no_argument, NULL, 'a' }, { "ignore-blank-lines", no_argument, NULL, 'B' }, { "ignore-space-change", no_argument, NULL, 'b' }, { "minimal", no_argument, NULL, 'd' }, { "ignore-tab-expansion", no_argument, NULL, 'E' }, { "ignore-matching-lines", required_argument, NULL, 'I' }, { "ignore-case", no_argument, NULL, 'i' }, { "expand-tabs", no_argument, NULL, 't' }, { "speed-large-files", no_argument, NULL, 'H' }, { "ignore-all-space", no_argument, NULL, 'W' }, { NULL, 0, NULL, '\0'} }; static const char *help_msg[] = { "\nusage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n", "\t-l, --left-column, Only print the left column for identical lines.", "\t-o OUTFILE, --output=OUTFILE, nteractively merge file1 and file2 into outfile.", "\t-s, --suppress-common-lines, Skip identical lines.", "\t-w WIDTH, --width=WIDTH, Print a maximum of WIDTH characters on each line.", "\tOptions passed to diff(1) are:", "\t\t-a, --text, Treat file1 and file2 as text files.", "\t\t-b, --ignore-trailing-cr, Ignore trailing blank spaces.", "\t\t-d, --minimal, Minimize diff size.", "\t\t-I RE, --ignore-matching-lines=RE, Ignore changes whose line matches RE.", "\t\t-i, --ignore-case, Do a case-insensitive comparison.", "\t\t-t, --expand-tabs Expand tabs to spaces.", "\t\t-W, --ignore-all-spaces, Ignore all spaces.", "\t\t--speed-large-files, Assume large file with scattered changes.", "\t\t--strip-trailing-cr, Strip trailing carriage return.", "\t\t--ignore-file-name-case, Ignore case of file names.", "\t\t--no-ignore-file-name-case, Do not ignore file name case", "\t\t--tabsize NUM, Change size of tabs (default 8.)", NULL, }; /* * Create temporary file if source_file is not a regular file. * Returns temporary file name if one was malloced, NULL if unnecessary. */ static char * mktmpcpy(const char *source_file) { struct stat sb; ssize_t rcount; int ifd, ofd; u_char buf[BUFSIZ]; char *target_file; /* Open input and output. */ ifd = open(source_file, O_RDONLY, 0); /* File was opened successfully. */ if (ifd != -1) { if (fstat(ifd, &sb) == -1) err(2, "error getting file status from %s", source_file); /* Regular file. */ if (S_ISREG(sb.st_mode)) { close(ifd); return (NULL); } } else { /* If ``-'' does not exist the user meant stdin. */ if (errno == ENOENT && strcmp(source_file, "-") == 0) ifd = STDIN_FILENO; else err(2, "error opening %s", source_file); } /* Not a regular file, so copy input into temporary file. */ if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1) err(2, "asprintf"); if ((ofd = mkstemp(target_file)) == -1) { warn("error opening %s", target_file); goto FAIL; } while ((rcount = read(ifd, buf, sizeof(buf))) != -1 && rcount != 0) { ssize_t wcount; wcount = write(ofd, buf, (size_t)rcount); if (-1 == wcount || rcount != wcount) { warn("error writing to %s", target_file); goto FAIL; } } if (rcount == -1) { warn("error reading from %s", source_file); goto FAIL; } close(ifd); close(ofd); return (target_file); FAIL: unlink(target_file); exit(2); } int main(int argc, char **argv) { FILE *diffpipe=NULL, *file1, *file2; size_t diffargc = 0, wflag = WIDTH; int ch, fd[2] = {-1}, status; pid_t pid=0; pid_t ppid =-1; const char *outfile = NULL; struct option *popt; char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2, *tmp1, *tmp2, *s1, *s2; int i; /* * Process diff flags. */ /* * Allocate memory for diff arguments and NULL. * Each flag has at most one argument, so doubling argc gives an * upper limit of how many diff args can be passed. argv[0], * file1, and file2 won't have arguments so doubling them will * waste some memory; however we need an extra space for the * NULL at the end, so it sort of works out. */ if (!(diffargv = calloc(argc, sizeof(char **) * 2))) err(2, "main"); /* Add first argument, the program name. */ diffargv[diffargc++] = diffprog; /* create a dynamic string for merging single-switch options */ if ( asprintf(&diffargv[diffargc++], "-") < 0 ) err(2, "main"); while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:", longopts, NULL)) != -1) { const char *errstr; switch (ch) { /* only compatible --long-name-form with diff */ case FCASE_IGNORE_OPT: case FCASE_SENSITIVE_OPT: case STRIPCR_OPT: case TSIZE_OPT: case 'S': break; /* combine no-arg single switches */ case 'a': case 'B': case 'b': case 'd': case 'E': case 'i': case 't': case 'H': case 'W': for(popt = longopts; ch != popt->val && popt->name != NULL; popt++); diffargv[1] = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2); /* * In diff, the 'W' option is 'w' and the 'w' is 'W'. */ if (ch == 'W') sprintf(diffargv[1], "%sw", diffargv[1]); else sprintf(diffargv[1], "%s%c", diffargv[1], ch); break; case DIFFPROG_OPT: diffargv[0] = diffprog = optarg; break; case 'I': Iflag = 1; diffargv[diffargc++] = "-I"; diffargv[diffargc++] = optarg; break; case 'l': lflag = 1; break; case 'o': outfile = optarg; break; case 's': sflag = 1; break; case 'w': wflag = strtonum(optarg, WIDTH_MIN, INT_MAX, &errstr); if (errstr) errx(2, "width is %s: %s", errstr, optarg); break; case DIFF_PID: ppid = strtonum(optarg, 0, INT_MAX, &errstr); if (errstr) errx(2, "diff pid value is %s: %s", errstr, optarg); break; case HELP_OPT: for (i = 0; help_msg[i] != NULL; i++) printf("%s\n", help_msg[i]); exit(0); break; default: usage(); break; } } /* no single switches were used */ if (strcmp(diffargv[1], "-") == 0 ) { for ( i = 1; i < argc-1; i++) { diffargv[i] = diffargv[i+1]; } diffargv[diffargc-1] = NULL; diffargc--; } argc -= optind; argv += optind; if (argc != 2) usage(); if (outfile && (outfp = fopen(outfile, "w")) == NULL) err(2, "could not open: %s", optarg); if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') tmpdir = _PATH_TMP; filename1 = argv[0]; filename2 = argv[1]; /* * Create temporary files for diff and sdiff to share if file1 * or file2 are not regular files. This allows sdiff and diff * to read the same inputs if one or both inputs are stdin. * * If any temporary files were created, their names would be * saved in tmp1 or tmp2. tmp1 should never equal tmp2. */ tmp1 = tmp2 = NULL; /* file1 and file2 are the same, so copy to same temp file. */ if (strcmp(filename1, filename2) == 0) { if ((tmp1 = mktmpcpy(filename1))) filename1 = filename2 = tmp1; /* Copy file1 and file2 into separate temp files. */ } else { if ((tmp1 = mktmpcpy(filename1))) filename1 = tmp1; if ((tmp2 = mktmpcpy(filename2))) filename2 = tmp2; } diffargv[diffargc++] = filename1; diffargv[diffargc++] = filename2; /* Add NULL to end of array to indicate end of array. */ diffargv[diffargc++] = NULL; /* Subtract column divider and divide by two. */ width = (wflag - 3) / 2; /* Make sure line_width can fit in size_t. */ if (width > (SIZE_MAX - 3) / 2) errx(2, "width is too large: %zu", width); line_width = width * 2 + 3; if (ppid == -1 ) { if (pipe(fd)) err(2, "pipe"); switch (pid = fork()) { case 0: /* child */ /* We don't read from the pipe. */ close(fd[0]); if (dup2(fd[1], STDOUT_FILENO) == -1) err(2, "child could not duplicate descriptor"); /* Free unused descriptor. */ close(fd[1]); execvp(diffprog, diffargv); err(2, "could not execute diff: %s", diffprog); break; case -1: err(2, "could not fork"); break; } /* parent */ /* We don't write to the pipe. */ close(fd[1]); /* Open pipe to diff command. */ if ((diffpipe = fdopen(fd[0], "r")) == NULL) err(2, "could not open diff pipe"); } if ((file1 = fopen(filename1, "r")) == NULL) err(2, "could not open %s", filename1); if ((file2 = fopen(filename2, "r")) == NULL) err(2, "could not open %s", filename2); if (!istextfile(file1) || !istextfile(file2)) { /* Close open files and pipe, delete temps */ fclose(file1); fclose(file2); if (diffpipe != NULL) fclose(diffpipe); if (tmp1) if (unlink(tmp1)) warn("Error deleting %s.", tmp1); if (tmp2) if (unlink(tmp2)) warn("Error deleting %s.", tmp2); free(tmp1); free(tmp2); binexec(diffprog, filename1, filename2); } /* Line numbers start at one. */ file1ln = file2ln = 1; /* Read and parse diff output. */ while (parsecmd(diffpipe, file1, file2) != EOF) ; fclose(diffpipe); /* Wait for diff to exit. */ if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) || WEXITSTATUS(status) >= 2) err(2, "diff exited abnormally."); /* Delete and free unneeded temporary files. */ if (tmp1) if (unlink(tmp1)) warn("Error deleting %s.", tmp1); if (tmp2) if (unlink(tmp2)) warn("Error deleting %s.", tmp2); free(tmp1); free(tmp2); filename1 = filename2 = tmp1 = tmp2 = NULL; /* No more diffs, so print common lines. */ if (lflag) while ((s1 = xfgets(file1))) enqueue(s1, ' ', NULL); else for (;;) { s1 = xfgets(file1); s2 = xfgets(file2); if (s1 || s2) enqueue(s1, ' ', s2); else break; } fclose(file1); fclose(file2); /* Process unmodified lines. */ processq(); /* Return diff exit status. */ return (WEXITSTATUS(status)); } /* * When sdiff/zsdiff detects a binary file as input, executes them with * diff/zdiff to maintain the same behavior as GNU sdiff with binary input. */ static void binexec(char *diffprog, char *f1, char *f2) { char *args[] = {diffprog, f1, f2, (char *) 0}; execv(diffprog, args); /* If execv() fails, sdiff's execution will continue below. */ errx(1, "Could not execute diff process.\n"); } /* * Checks whether a file appears to be a text file. */ static int istextfile(FILE *f) { int ch, i; if (f == NULL) return (1); rewind(f); for (i = 0; i <= MAX_CHECK; i++) { ch = fgetc(f); if (ch == '\0') { rewind(f); return (0); } if (ch == EOF) break; } rewind(f); return (1); } /* * Prints an individual column (left or right), taking into account * that tabs are variable-width. Takes a string, the current column * the cursor is on the screen, and the maximum value of the column. * The column value is updated as we go along. */ static void printcol(const char *s, size_t *col, const size_t col_max) { for (; *s && *col < col_max; ++s) { size_t new_col; switch (*s) { case '\t': /* * If rounding to next multiple of eight causes * an integer overflow, just return. */ if (*col > SIZE_MAX - 8) return; /* Round to next multiple of eight. */ new_col = (*col / 8 + 1) * 8; /* * If printing the tab goes past the column * width, don't print it and just quit. */ if (new_col > col_max) return; *col = new_col; break; default: ++(*col); } putchar(*s); } } /* * Prompts user to either choose between two strings or edit one, both, * or neither. */ static void prompt(const char *s1, const char *s2) { char *cmd; /* Print command prompt. */ putchar('%'); /* Get user input. */ for (; (cmd = xfgets(stdin)); free(cmd)) { const char *p; /* Skip leading whitespace. */ for (p = cmd; isspace(*p); ++p) ; switch (*p) { case 'e': /* Skip `e'. */ ++p; if (eparse(p, s1, s2) == -1) goto USAGE; break; case 'l': case '1': /* Choose left column as-is. */ if (s1 != NULL) fprintf(outfp, "%s\n", s1); /* End of command parsing. */ break; case 'q': goto QUIT; case 'r': case '2': /* Choose right column as-is. */ if (s2 != NULL) fprintf(outfp, "%s\n", s2); /* End of command parsing. */ break; case 's': sflag = 1; goto PROMPT; case 'v': sflag = 0; /* FALLTHROUGH */ default: /* Interactive usage help. */ USAGE: int_usage(); PROMPT: putchar('%'); /* Prompt user again. */ continue; } free(cmd); return; } /* * If there was no error, we received an EOF from stdin, so we * should quit. */ QUIT: fclose(outfp); exit(0); } /* * Takes two strings, separated by a column divider. NULL strings are * treated as empty columns. If the divider is the ` ' character, the * second column is not printed (-l flag). In this case, the second * string must be NULL. When the second column is NULL, the divider * does not print the trailing space following the divider character. * * Takes into account that tabs can take multiple columns. */ static void println(const char *s1, const char div, const char *s2) { size_t col; /* Print first column. Skips if s1 == NULL. */ col = 0; if (s1) { /* Skip angle bracket and space. */ printcol(s1, &col, width); } /* Otherwise, we pad this column up to width. */ for (; col < width; ++col) putchar(' '); /* Only print left column. */ if (div == ' ' && !s2) { printf(" (\n"); return; } /* * Print column divider. If there is no second column, we don't * need to add the space for padding. */ if (!s2) { printf(" %c\n", div); return; } printf(" %c ", div); col += 3; /* Skip angle bracket and space. */ printcol(s2, &col, line_width); putchar('\n'); } /* * Reads a line from file and returns as a string. If EOF is reached, * NULL is returned. The returned string must be freed afterwards. */ static char * xfgets(FILE *file) { size_t linecap; ssize_t l; char *s; clearerr(file); linecap = 0; s = NULL; if ((l = getline(&s, &linecap, file)) == -1) { if (ferror(file)) err(2, "error reading file"); return (NULL); } if (s[l-1] == '\n') s[l-1] = '\0'; return (s); } /* * Parse ed commands from diffpipe and print lines from file1 (lines * to change or delete) or file2 (lines to add or change). * Returns EOF or 0. */ static int parsecmd(FILE *diffpipe, FILE *file1, FILE *file2) { size_t file1start, file1end, file2start, file2end, n; /* ed command line and pointer to characters in line */ char *line, *p, *q; const char *errstr; char c, cmd; /* Read ed command. */ if (!(line = xfgets(diffpipe))) return (EOF); p = line; /* Go to character after line number. */ while (isdigit(*p)) ++p; c = *p; *p++ = 0; file1start = strtonum(line, 0, INT_MAX, &errstr); if (errstr) errx(2, "file1 start is %s: %s", errstr, line); /* A range is specified for file1. */ if (c == ',') { q = p; /* Go to character after file2end. */ while (isdigit(*p)) ++p; c = *p; *p++ = 0; file1end = strtonum(q, 0, INT_MAX, &errstr); if (errstr) errx(2, "file1 end is %s: %s", errstr, line); if (file1start > file1end) errx(2, "invalid line range in file1: %s", line); } else file1end = file1start; cmd = c; /* Check that cmd is valid. */ if (!(cmd == 'a' || cmd == 'c' || cmd == 'd')) errx(2, "ed command not recognized: %c: %s", cmd, line); q = p; /* Go to character after line number. */ while (isdigit(*p)) ++p; c = *p; *p++ = 0; file2start = strtonum(q, 0, INT_MAX, &errstr); if (errstr) errx(2, "file2 start is %s: %s", errstr, line); /* * There should either be a comma signifying a second line * number or the line should just end here. */ if (c != ',' && c != '\0') errx(2, "invalid line range in file2: %c: %s", c, line); if (c == ',') { file2end = strtonum(p, 0, INT_MAX, &errstr); if (errstr) errx(2, "file2 end is %s: %s", errstr, line); if (file2start >= file2end) errx(2, "invalid line range in file2: %s", line); } else file2end = file2start; /* Appends happen _after_ stated line. */ if (cmd == 'a') { if (file1start != file1end) errx(2, "append cannot have a file1 range: %s", line); if (file1start == SIZE_MAX) errx(2, "file1 line range too high: %s", line); file1start = ++file1end; } /* * I'm not sure what the deal is with the line numbers for * deletes, though. */ else if (cmd == 'd') { if (file2start != file2end) errx(2, "delete cannot have a file2 range: %s", line); if (file2start == SIZE_MAX) errx(2, "file2 line range too high: %s", line); file2start = ++file2end; } /* * Continue reading file1 and file2 until we reach line numbers * specified by diff. Should only happen with -I flag. */ for (; file1ln < file1start && file2ln < file2start; ++file1ln, ++file2ln) { char *s1, *s2; if (!(s1 = xfgets(file1))) errx(2, "file1 shorter than expected"); if (!(s2 = xfgets(file2))) errx(2, "file2 shorter than expected"); /* If the -l flag was specified, print only left column. */ if (lflag) { free(s2); /* * XXX - If -l and -I are both specified, all * unchanged or ignored lines are shown with a * `(' divider. This matches GNU sdiff, but I * believe it is a bug. Just check out: * gsdiff -l -I '^$' samefile samefile. */ if (Iflag) enqueue(s1, '(', NULL); else enqueue(s1, ' ', NULL); } else enqueue(s1, ' ', s2); } /* Ignore deleted lines. */ for (; file1ln < file1start; ++file1ln) { char *s; if (!(s = xfgets(file1))) errx(2, "file1 shorter than expected"); enqueue(s, '(', NULL); } /* Ignore added lines. */ for (; file2ln < file2start; ++file2ln) { char *s; if (!(s = xfgets(file2))) errx(2, "file2 shorter than expected"); /* If -l flag was given, don't print right column. */ if (lflag) free(s); else enqueue(NULL, ')', s); } /* Process unmodified or skipped lines. */ processq(); switch (cmd) { case 'a': printa(file2, file2end); n = file2end - file2start + 1; break; case 'c': printc(file1, file1end, file2, file2end); n = file1end - file1start + 1 + 1 + file2end - file2start + 1; break; case 'd': printd(file1, file1end); n = file1end - file1start + 1; break; default: errx(2, "invalid diff command: %c: %s", cmd, line); } free(line); /* Skip to next ed line. */ while (n--) { if (!(line = xfgets(diffpipe))) errx(2, "diff ended early"); free(line); } return (0); } /* * Queues up a diff line. */ static void enqueue(char *left, char div, char *right) { struct diffline *diffp; if (!(diffp = malloc(sizeof(struct diffline)))) err(2, "enqueue"); diffp->left = left; diffp->div = div; diffp->right = right; STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries); } /* * Free a diffline structure and its elements. */ static void freediff(struct diffline *diffp) { free(diffp->left); free(diffp->right); free(diffp); } /* * Append second string into first. Repeated appends to the same string * are cached, making this an O(n) function, where n = strlen(append). */ static void astrcat(char **s, const char *append) { /* Length of string in previous run. */ static size_t offset = 0; size_t newsiz; /* * String from previous run. Compared to *s to see if we are * dealing with the same string. If so, we can use offset. */ static const char *oldstr = NULL; char *newstr; /* * First string is NULL, so just copy append. */ if (!*s) { if (!(*s = strdup(append))) err(2, "astrcat"); /* Keep track of string. */ offset = strlen(*s); oldstr = *s; return; } /* * *s is a string so concatenate. */ /* Did we process the same string in the last run? */ /* * If this is a different string from the one we just processed * cache new string. */ if (oldstr != *s) { offset = strlen(*s); oldstr = *s; } /* Size = strlen(*s) + \n + strlen(append) + '\0'. */ newsiz = offset + 1 + strlen(append) + 1; /* Resize *s to fit new string. */ newstr = realloc(*s, newsiz); if (newstr == NULL) err(2, "astrcat"); *s = newstr; /* *s + offset should be end of string. */ /* Concatenate. */ strlcpy(*s + offset, "\n", newsiz - offset); strlcat(*s + offset, append, newsiz - offset); /* New string length should be exactly newsiz - 1 characters. */ /* Store generated string's values. */ offset = newsiz - 1; oldstr = *s; } /* * Process diff set queue, printing, prompting, and saving each diff * line stored in queue. */ static void processq(void) { struct diffline *diffp; char divc, *left, *right; /* Don't process empty queue. */ if (STAILQ_EMPTY(&diffhead)) return; /* Remember the divider. */ divc = STAILQ_FIRST(&diffhead)->div; left = NULL; right = NULL; /* * Go through set of diffs, concatenating each line in left or * right column into two long strings, `left' and `right'. */ STAILQ_FOREACH(diffp, &diffhead, diffentries) { /* * Print changed lines if -s was given, * print all lines if -s was not given. */ if (!sflag || diffp->div == '|' || diffp->div == '<' || diffp->div == '>') println(diffp->left, diffp->div, diffp->right); /* Append new lines to diff set. */ if (diffp->left) astrcat(&left, diffp->left); if (diffp->right) astrcat(&right, diffp->right); } /* Empty queue and free each diff line and its elements. */ while (!STAILQ_EMPTY(&diffhead)) { diffp = STAILQ_FIRST(&diffhead); STAILQ_REMOVE_HEAD(&diffhead, diffentries); freediff(diffp); } /* Write to outfp, prompting user if lines are different. */ if (outfp) switch (divc) { case ' ': case '(': case ')': fprintf(outfp, "%s\n", left); break; case '|': case '<': case '>': prompt(left, right); break; default: errx(2, "invalid divider: %c", divc); } /* Free left and right. */ free(left); free(right); } /* * Print lines following an (a)ppend command. */ static void printa(FILE *file, size_t line2) { char *line; for (; file2ln <= line2; ++file2ln) { if (!(line = xfgets(file))) errx(2, "append ended early"); enqueue(NULL, '>', line); } processq(); } /* * Print lines following a (c)hange command, from file1ln to file1end * and from file2ln to file2end. */ static void printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end) { struct fileline { STAILQ_ENTRY(fileline) fileentries; char *line; }; STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead); /* Read lines to be deleted. */ for (; file1ln <= file1end; ++file1ln) { struct fileline *linep; char *line1; /* Read lines from both. */ if (!(line1 = xfgets(file1))) errx(2, "error reading file1 in delete in change"); /* Add to delete queue. */ if (!(linep = malloc(sizeof(struct fileline)))) err(2, "printc"); linep->line = line1; STAILQ_INSERT_TAIL(&delqhead, linep, fileentries); } /* Process changed lines.. */ for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end; ++file2ln) { struct fileline *del; char *add; /* Get add line. */ if (!(add = xfgets(file2))) errx(2, "error reading add in change"); del = STAILQ_FIRST(&delqhead); enqueue(del->line, '|', add); STAILQ_REMOVE_HEAD(&delqhead, fileentries); /* * Free fileline structure but not its elements since * they are queued up. */ free(del); } processq(); /* Process remaining lines to add. */ for (; file2ln <= file2end; ++file2ln) { char *add; /* Get add line. */ if (!(add = xfgets(file2))) errx(2, "error reading add in change"); enqueue(NULL, '>', add); } processq(); /* Process remaining lines to delete. */ while (!STAILQ_EMPTY(&delqhead)) { struct fileline *filep; filep = STAILQ_FIRST(&delqhead); enqueue(filep->line, '<', NULL); STAILQ_REMOVE_HEAD(&delqhead, fileentries); free(filep); } processq(); } /* * Print deleted lines from file, from file1ln to file1end. */ static void printd(FILE *file1, size_t file1end) { char *line1; /* Print out lines file1ln to line2. */ for (; file1ln <= file1end; ++file1ln) { if (!(line1 = xfgets(file1))) errx(2, "file1 ended early in delete"); enqueue(line1, '<', NULL); } processq(); } /* * Interactive mode usage. */ static void int_usage(void) { puts("e:\tedit blank diff\n" "eb:\tedit both diffs concatenated\n" "el:\tedit left diff\n" "er:\tedit right diff\n" "l | 1:\tchoose left diff\n" "r | 2:\tchoose right diff\n" "s:\tsilent mode--don't print identical lines\n" "v:\tverbose mode--print identical lines\n" "q:\tquit"); } static void usage(void) { fprintf(stderr, "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1" " file2\n"); exit(2); } Index: user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c (revision 303642) @@ -1,945 +1,945 @@ /*- * Copyright (c) 1992 Diomidis Spinellis. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Diomidis Spinellis of Imperial College, University of London. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifndef lint static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "defs.h" #include "extern.h" #define LHSZ 128 #define LHMASK (LHSZ - 1) static struct labhash { struct labhash *lh_next; u_int lh_hash; struct s_command *lh_cmd; int lh_ref; } *labels[LHSZ]; static const char *compile_addr(const char *, struct s_addr *); static char *compile_ccl(const char **, char *); static const char *compile_delimited(const char *, char *, int); static const char *compile_flags(const char *, struct s_subst *); static const regex_t *compile_re(const char *, int); static const char *compile_subst(const char *, struct s_subst *); static char *compile_text(size_t *); static const char *compile_tr(const char *, struct s_tr **); static struct s_command **compile_stream(struct s_command **); static char *duptoeol(const char *, const char *, size_t *); static void enterlabel(struct s_command *); static struct s_command *findlabel(const char *); static void fixuplabel(struct s_command *, const struct s_command *); static void uselabel(void); /* * Command specification. This is used to drive the command parser. */ struct s_format { char code; /* Command code */ int naddr; /* Number of address args */ enum e_args args; /* Argument type */ }; static struct s_format cmd_fmts[] = { {'{', 2, GROUP}, {'}', 0, ENDGROUP}, {'a', 1, TEXT}, {'b', 2, BRANCH}, {'c', 2, TEXT}, {'d', 2, EMPTY}, {'D', 2, EMPTY}, {'g', 2, EMPTY}, {'G', 2, EMPTY}, {'h', 2, EMPTY}, {'H', 2, EMPTY}, {'i', 1, TEXT}, {'l', 2, EMPTY}, {'n', 2, EMPTY}, {'N', 2, EMPTY}, {'p', 2, EMPTY}, {'P', 2, EMPTY}, {'q', 1, EMPTY}, {'r', 1, RFILE}, {'s', 2, SUBST}, {'t', 2, BRANCH}, {'w', 2, WFILE}, {'x', 2, EMPTY}, {'y', 2, TR}, {'!', 2, NONSEL}, {':', 0, LABEL}, {'#', 0, COMMENT}, {'=', 1, EMPTY}, {'\0', 0, COMMENT}, }; /* The compiled program. */ struct s_command *prog; /* * Compile the program into prog. * Initialise appends. */ void compile(void) { *compile_stream(&prog) = NULL; fixuplabel(prog, NULL); uselabel(); if (appendnum == 0) appends = NULL; else if ((appends = malloc(sizeof(struct s_appends) * appendnum)) == NULL) err(1, "malloc"); if ((match = malloc((maxnsub + 1) * sizeof(regmatch_t))) == NULL) err(1, "malloc"); } #define EATSPACE() do { \ while (*p && isspace((unsigned char)*p)) \ p++; \ } while (0) #define EATSPACEN() do { \ while (*p && *p != '\n' && isspace((unsigned char)*p)) \ p++; \ } while (0) static struct s_command ** compile_stream(struct s_command **link) { const char *p; struct s_command *cmd, *cmd2, *stack; struct s_format *fp; char re[_POSIX2_LINE_MAX + 1]; int naddr; /* Number of addresses */ stack = NULL; for (;;) { if ((p = cu_fgets(NULL)) == NULL) { if (stack != NULL) errx(1, "%lu: %s: unexpected EOF (pending }'s)", linenum, fname); return (link); } semicolon: EATSPACEN(); switch (*p) { case '#': case '\0': case '\n': continue; /* to next command-unit */ case ';': p++; goto semicolon; } if ((*link = cmd = malloc(sizeof(struct s_command))) == NULL) err(1, "malloc"); link = &cmd->next; cmd->startline = cmd->nonsel = 0; /* First parse the addresses */ naddr = 0; /* Valid characters to start an address */ #define addrchar(c) (strchr("0123456789/\\$", (c))) if (addrchar(*p)) { naddr++; if ((cmd->a1 = malloc(sizeof(struct s_addr))) == NULL) err(1, "malloc"); p = compile_addr(p, cmd->a1); EATSPACE(); /* EXTENSION */ if (*p == ',') { p++; EATSPACE(); /* EXTENSION */ naddr++; if ((cmd->a2 = malloc(sizeof(struct s_addr))) == NULL) err(1, "malloc"); p = compile_addr(p, cmd->a2); EATSPACE(); } else cmd->a2 = NULL; } else cmd->a1 = cmd->a2 = NULL; nonsel: /* Now parse the command */ if (*p == '\0' || *p == '\n') errx(1, "%lu: %s: command expected", linenum, fname); cmd->code = *p; for (fp = cmd_fmts; fp->code; fp++) if (fp->code == *p) break; if (!fp->code) errx(1, "%lu: %s: invalid command code %c (%s)", linenum, fname, *p, p); if (naddr > fp->naddr) errx(1, "%lu: %s: command %c expects up to %d address(es), found %d", linenum, fname, *p, fp->naddr, naddr); switch (fp->args) { case NONSEL: /* ! */ p++; EATSPACE(); cmd->nonsel = 1; goto nonsel; case GROUP: /* { */ p++; EATSPACEN(); cmd->next = stack; stack = cmd; link = &cmd->u.c; if (*p != '\0' && *p != '\n') goto semicolon; break; case ENDGROUP: /* * Short-circuit command processing, since end of * group is really just a noop. */ cmd->nonsel = 1; if (stack == NULL) errx(1, "%lu: %s: unexpected }", linenum, fname); cmd2 = stack; stack = cmd2->next; cmd2->next = cmd; /*FALLTHROUGH*/ case EMPTY: /* d D g G h H l n N p P q x = \0 */ p++; EATSPACEN(); if (*p == ';') { p++; link = &cmd->next; goto semicolon; } if (*p != '\0' && *p != '\n') errx(1, "%lu: %s: extra characters at the end of %c command", linenum, fname, cmd->code); break; case TEXT: /* a c i */ p++; EATSPACE(); if (*p != '\\') errx(1, "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code); p++; EATSPACEN(); if (*p != '\n') errx(1, "%lu: %s: extra characters (%c) after \\ at the end of %c command", linenum, fname, *p, cmd->code); cmd->t = compile_text(&cmd->tlen); break; case COMMENT: /* \0 # */ break; case WFILE: /* w */ p++; EATSPACE(); if (*p == '\0') errx(1, "%lu: %s: filename expected", linenum, fname); cmd->t = duptoeol(p, "w command", &cmd->tlen); if (aflag) cmd->u.fd = -1; else if ((cmd->u.fd = open(cmd->t, O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) err(1, "%s", p); break; case RFILE: /* r */ p++; EATSPACE(); if (*p == '\0') errx(1, "%lu: %s: filename expected", linenum, fname); else cmd->t = duptoeol(p, "read command", &cmd->tlen); break; case BRANCH: /* b t */ p++; EATSPACEN(); if (*p == '\0' || *p == '\n') cmd->t = NULL; else cmd->t = duptoeol(p, "branch", &cmd->tlen); break; case LABEL: /* : */ p++; EATSPACE(); cmd->t = duptoeol(p, "label", &cmd->tlen); if (cmd->t[0] == '\0') errx(1, "%lu: %s: empty label", linenum, fname); enterlabel(cmd); break; case SUBST: /* s */ p++; if (*p == '\0' || *p == '\\' || *p == '\n') errx(1, "%lu: %s: substitute pattern can not be delimited by newline or backslash", linenum, fname); if ((cmd->u.s = calloc(1, sizeof(struct s_subst))) == NULL) err(1, "malloc"); p = compile_delimited(p, re, 0); if (p == NULL) errx(1, "%lu: %s: unterminated substitute pattern", linenum, fname); --p; p = compile_subst(p, cmd->u.s); p = compile_flags(p, cmd->u.s); if (*re != '\0') cmd->u.s->re = compile_re(re, cmd->u.s->icase); EATSPACE(); if (*p == ';') { p++; link = &cmd->next; goto semicolon; } break; case TR: /* y */ p++; p = compile_tr(p, &cmd->u.y); EATSPACE(); if (*p == ';') { p++; link = &cmd->next; goto semicolon; } if (*p) errx(1, "%lu: %s: extra text at the end of a transform command", linenum, fname); break; } } } /* * Get a delimited string. P points to the delimiter of the string; d points * to a buffer area. Newline and delimiter escapes are processed; other * escapes are ignored. * * Returns a pointer to the first character after the final delimiter or NULL * in the case of a non-terminated string. The character array d is filled * with the processed string. */ static const char * compile_delimited(const char *p, char *d, int is_tr) { char c; c = *p++; if (c == '\0') return (NULL); else if (c == '\\') errx(1, "%lu: %s: \\ can not be used as a string delimiter", linenum, fname); else if (c == '\n') errx(1, "%lu: %s: newline can not be used as a string delimiter", linenum, fname); while (*p) { if (*p == '[' && *p != c) { if ((d = compile_ccl(&p, d)) == NULL) errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname); continue; } else if (*p == '\\' && p[1] == '[') { *d++ = *p++; } else if (*p == '\\' && p[1] == c) p++; else if (*p == '\\' && p[1] == 'n') { *d++ = '\n'; p += 2; continue; } else if (*p == '\\' && p[1] == '\\') { if (is_tr) p++; else *d++ = *p++; } else if (*p == c) { *d = '\0'; return (p + 1); } *d++ = *p++; } return (NULL); } /* compile_ccl: expand a POSIX character class */ static char * compile_ccl(const char **sp, char *t) { int c, d; const char *s = *sp; *t++ = *s++; if (*s == '^') *t++ = *s++; if (*s == ']') *t++ = *s++; for (; *s && (*t = *s) != ']'; s++, t++) if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { *++t = *++s, t++, s++; for (c = *s; (*t = *s) != ']' || c != d; s++, t++) if ((c = *s) == '\0') return NULL; } return (*s == ']') ? *sp = ++s, ++t : NULL; } /* * Compiles the regular expression in RE and returns a pointer to the compiled * regular expression. * Cflags are passed to regcomp. */ static const regex_t * compile_re(const char *re, int case_insensitive) { regex_t *rep; int eval, flags; flags = rflags; if (case_insensitive) flags |= REG_ICASE; if ((rep = malloc(sizeof(regex_t))) == NULL) err(1, "malloc"); if ((eval = regcomp(rep, re, flags)) != 0) errx(1, "%lu: %s: RE error: %s", linenum, fname, strregerror(eval, rep)); if (maxnsub < rep->re_nsub) maxnsub = rep->re_nsub; return (rep); } /* * Compile the substitution string of a regular expression and set res to * point to a saved copy of it. Nsub is the number of parenthesized regular * expressions. */ static const char * compile_subst(const char *p, struct s_subst *s) { int asize, size; u_char ref; char c, *text, *op, *sp; int more = 0, sawesc = 0; c = *p++; /* Terminator character */ if (c == '\0') return (NULL); s->maxbref = 0; s->linenum = linenum; asize = 2 * _POSIX2_LINE_MAX + 1; if ((text = malloc(asize)) == NULL) err(1, "malloc"); size = 0; do { op = sp = text + size; for (; *p != '\0' && *p != '\n'; p++) { if (*p == '\\' || sawesc) { /* * If this is a continuation from the last * buffer, we won't have a character to * skip over. */ if (sawesc) sawesc = 0; else p++; if (*p == '\0') { /* * This escaped character is continued * in the next part of the line. Note * this fact, then cause the loop to * exit w/ normal EOL case and reenter * above with the new buffer. */ sawesc = 1; p--; break; } else if (*p == '\n') { *sp++ = '\n'; break; } else if (strchr("123456789", *p) != NULL) { *sp++ = '\\'; ref = *p - '0'; if (s->re != NULL && ref > s->re->re_nsub) errx(1, "%lu: %s: \\%c not defined in the RE", linenum, fname, *p); if (s->maxbref < ref) s->maxbref = ref; } else if (*p == '&' || *p == '\\') *sp++ = '\\'; } else if (*p == c) { if (*++p == '\0' && more) { const char *nextp; nextp = cu_fgets(&more); if (nextp != NULL) p = nextp; } *sp++ = '\0'; size += sp - op; if ((s->new = realloc(text, size)) == NULL) err(1, "realloc"); return (p); } else if (*p == '\n') { errx(1, "%lu: %s: unescaped newline inside substitute pattern", linenum, fname); /* NOTREACHED */ } *sp++ = *p; } size += sp - op; if (asize - size < _POSIX2_LINE_MAX + 1) { asize *= 2; if ((text = realloc(text, asize)) == NULL) err(1, "realloc"); } - } while ((p = cu_fgets(&more))); + } while ((p = cu_fgets(&more)) != NULL); errx(1, "%lu: %s: unterminated substitute in regular expression", linenum, fname); /* NOTREACHED */ } /* * Compile the flags of the s command */ static const char * compile_flags(const char *p, struct s_subst *s) { int gn; /* True if we have seen g or n */ unsigned long nval; char *q; s->n = 1; /* Default */ s->p = 0; s->wfile = NULL; s->wfd = -1; s->icase = 0; for (gn = 0;;) { EATSPACEN(); /* EXTENSION */ switch (*p) { case 'g': if (gn) errx(1, "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); gn = 1; s->n = 0; break; case '\0': case '\n': case ';': return (p); case 'p': s->p = 1; break; case 'i': case 'I': s->icase = 1; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (gn) errx(1, "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); gn = 1; errno = 0; nval = strtol(p, &q, 10); if (errno == ERANGE || nval > INT_MAX) errx(1, "%lu: %s: overflow in the 'N' substitute flag", linenum, fname); s->n = nval; p = q; continue; case 'w': p++; #ifdef HISTORIC_PRACTICE if (*p != ' ') { warnx("%lu: %s: space missing before w wfile", linenum, fname); return (p); } #endif EATSPACE(); s->wfile = duptoeol(p, "w flag", NULL); if (!aflag && (s->wfd = open(s->wfile, O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) err(1, "%s", s->wfile); return (p); default: errx(1, "%lu: %s: bad flag in substitute command: '%c' (%.10s)", linenum, fname, *p, p); break; } p++; } } /* * Compile a translation set of strings into a lookup table. */ static const char * compile_tr(const char *p, struct s_tr **py) { struct s_tr *y; int i; const char *op, *np; char old[_POSIX2_LINE_MAX + 1]; char new[_POSIX2_LINE_MAX + 1]; size_t oclen, oldlen, nclen, newlen; mbstate_t mbs1, mbs2; if ((*py = y = malloc(sizeof(*y))) == NULL) err(1, "malloc"); y->multis = NULL; y->nmultis = 0; if (*p == '\0' || *p == '\\') errx(1, "%lu: %s: transform pattern can not be delimited by newline or backslash", linenum, fname); p = compile_delimited(p, old, 1); if (p == NULL) errx(1, "%lu: %s: unterminated transform source string", linenum, fname); p = compile_delimited(p - 1, new, 1); if (p == NULL) errx(1, "%lu: %s: unterminated transform target string", linenum, fname); EATSPACE(); op = old; oldlen = mbsrtowcs(NULL, &op, 0, NULL); if (oldlen == (size_t)-1) err(1, "mbsrtowcs"); np = new; newlen = mbsrtowcs(NULL, &np, 0, NULL); if (newlen == (size_t)-1) err(1, "mbsrtowcs"); if (newlen != oldlen) errx(1, "%lu: %s: transform strings are not the same length", linenum, fname); if (MB_CUR_MAX == 1) { /* * The single-byte encoding case is easy: generate a * lookup table. */ for (i = 0; i <= UCHAR_MAX; i++) y->bytetab[i] = (char)i; for (; *op; op++, np++) y->bytetab[(u_char)*op] = *np; } else { /* * Multi-byte encoding case: generate a lookup table as * above, but only for single-byte characters. The first * bytes of multi-byte characters have their lookup table * entries set to 0, which causes do_tr() to search through * an auxiliary vector of multi-byte mappings. */ memset(&mbs1, 0, sizeof(mbs1)); memset(&mbs2, 0, sizeof(mbs2)); for (i = 0; i <= UCHAR_MAX; i++) y->bytetab[i] = (btowc(i) != WEOF) ? i : 0; while (*op != '\0') { oclen = mbrlen(op, MB_LEN_MAX, &mbs1); if (oclen == (size_t)-1 || oclen == (size_t)-2) errc(1, EILSEQ, NULL); nclen = mbrlen(np, MB_LEN_MAX, &mbs2); if (nclen == (size_t)-1 || nclen == (size_t)-2) errc(1, EILSEQ, NULL); if (oclen == 1 && nclen == 1) y->bytetab[(u_char)*op] = *np; else { y->bytetab[(u_char)*op] = 0; y->multis = realloc(y->multis, (y->nmultis + 1) * sizeof(*y->multis)); if (y->multis == NULL) err(1, "realloc"); i = y->nmultis++; y->multis[i].fromlen = oclen; memcpy(y->multis[i].from, op, oclen); y->multis[i].tolen = nclen; memcpy(y->multis[i].to, np, nclen); } op += oclen; np += nclen; } } return (p); } /* * Compile the text following an a, c, or i command. */ static char * compile_text(size_t *ptlen) { int asize, esc_nl, size; char *text, *s; const char *p, *op; asize = 2 * _POSIX2_LINE_MAX + 1; if ((text = malloc(asize)) == NULL) err(1, "malloc"); size = 0; - while ((p = cu_fgets(NULL))) { + while ((p = cu_fgets(NULL)) != NULL) { op = s = text + size; for (esc_nl = 0; *p != '\0'; p++) { if (*p == '\\' && p[1] != '\0' && *++p == '\n') esc_nl = 1; *s++ = *p; if (*p == '\n') break; } size += s - op; if (!esc_nl) { *s = '\0'; break; } if (asize - size < _POSIX2_LINE_MAX + 1) { asize *= 2; if ((text = realloc(text, asize)) == NULL) err(1, "realloc"); } } text[size] = '\0'; if ((text = realloc(text, size + 1)) == NULL) err(1, "realloc"); *ptlen = size; return (text); } /* * Get an address and return a pointer to the first character after * it. Fill the structure pointed to according to the address. */ static const char * compile_addr(const char *p, struct s_addr *a) { char *end, re[_POSIX2_LINE_MAX + 1]; int icase; icase = 0; a->type = 0; switch (*p) { case '\\': /* Context address */ ++p; /* FALLTHROUGH */ case '/': /* Context address */ p = compile_delimited(p, re, 0); if (p == NULL) errx(1, "%lu: %s: unterminated regular expression", linenum, fname); /* Check for case insensitive regexp flag */ if (*p == 'I') { icase = 1; p++; } if (*re == '\0') a->u.r = NULL; else a->u.r = compile_re(re, icase); a->type = AT_RE; return (p); case '$': /* Last line */ a->type = AT_LAST; return (p + 1); case '+': /* Relative line number */ a->type = AT_RELLINE; p++; /* FALLTHROUGH */ /* Line number */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (a->type == 0) a->type = AT_LINE; a->u.l = strtol(p, &end, 10); return (end); default: errx(1, "%lu: %s: expected context address", linenum, fname); return (NULL); } } /* * duptoeol -- * Return a copy of all the characters up to \n or \0. */ static char * duptoeol(const char *s, const char *ctype, size_t *ptlen) { size_t len; int ws; char *p; const char *start; ws = 0; for (start = s; *s != '\0' && *s != '\n'; ++s) ws = isspace((unsigned char)*s); if (ws) warnx("%lu: %s: whitespace after %s", linenum, fname, ctype); len = s - start; if ((p = malloc(len + 1)) == NULL) err(1, "malloc"); memmove(p, start, len); p[len] = '\0'; if (ptlen != NULL) *ptlen = len; return p; } /* * Convert goto label names to addresses, and count a and r commands, in * the given subset of the script. Free the memory used by labels in b * and t commands (but not by :). * * TODO: Remove } nodes */ static void fixuplabel(struct s_command *cp, const struct s_command *end) { for (; cp != end; cp = cp->next) switch (cp->code) { case 'a': case 'r': appendnum++; break; case 'b': case 't': /* Resolve branch target. */ if (cp->t == NULL) { cp->u.c = NULL; break; } if ((cp->u.c = findlabel(cp->t)) == NULL) errx(1, "%lu: %s: %c: undefined label '%s'", linenum, fname, cp->code, cp->t); free(cp->t); break; case '{': /* Do interior commands. */ fixuplabel(cp->u.c, cp->next); break; } } /* * Associate the given command label for later lookup. */ static void enterlabel(struct s_command *cp) { struct labhash **lhp, *lh; u_char *p; u_int h, c; for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) h = (h << 5) + h + c; lhp = &labels[h & LHMASK]; for (lh = *lhp; lh != NULL; lh = lh->lh_next) if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t); if ((lh = malloc(sizeof *lh)) == NULL) err(1, "malloc"); lh->lh_next = *lhp; lh->lh_hash = h; lh->lh_cmd = cp; lh->lh_ref = 0; *lhp = lh; } /* * Find the label contained in the command l in the command linked * list cp. L is excluded from the search. Return NULL if not found. */ static struct s_command * findlabel(const char *name) { struct labhash *lh; const u_char *p; u_int h, c; for (h = 0, p = (const u_char *)name; (c = *p) != 0; p++) h = (h << 5) + h + c; for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { lh->lh_ref = 1; return (lh->lh_cmd); } } return (NULL); } /* * Warn about any unused labels. As a side effect, release the label hash * table space. */ static void uselabel(void) { struct labhash *lh, *next; int i; for (i = 0; i < LHSZ; i++) { for (lh = labels[i]; lh != NULL; lh = next) { next = lh->lh_next; if (!lh->lh_ref) warnx("%lu: %s: unused label '%s'", linenum, fname, lh->lh_cmd->t); free(lh); } } } Index: user/alc/PQ_LAUNDRY/usr.bin/sed/main.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/sed/main.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/sed/main.c (revision 303642) @@ -1,529 +1,532 @@ /*- * Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson. * Copyright (c) 1992 Diomidis Spinellis. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Diomidis Spinellis of Imperial College, University of London. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1992, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif #ifndef lint static const char sccsid[] = "@(#)main.c 8.2 (Berkeley) 1/3/94"; #endif #include #include #include #include #include #include #include #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include "defs.h" #include "extern.h" /* * Linked list of units (strings and files) to be compiled */ struct s_compunit { struct s_compunit *next; enum e_cut {CU_FILE, CU_STRING} type; const char *s; /* Pointer to string or fname */ }; /* * Linked list pointer to compilation units and pointer to current * next pointer. */ static struct s_compunit *script, **cu_nextp = &script; /* * Linked list of files to be processed */ struct s_flist { const char *fname; struct s_flist *next; }; /* * Linked list pointer to files and pointer to current * next pointer. */ static struct s_flist *files, **fl_nextp = &files; FILE *infile; /* Current input file */ FILE *outfile; /* Current output file */ int aflag, eflag, nflag; int rflags = 0; static int rval; /* Exit status */ static int ispan; /* Whether inplace editing spans across files */ /* * Current file and line number; line numbers restart across compilation * units, but span across input files. The latter is optional if editing * in place. */ const char *fname; /* File name. */ const char *outfname; /* Output file name */ static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */ static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */ static const char *inplace; /* Inplace edit file extension. */ u_long linenum; static void add_compunit(enum e_cut, const char *); static void add_file(const char *); static void usage(void); int main(int argc, char *argv[]) { + char *temp_arg; int c, fflag; (void) setlocale(LC_ALL, ""); fflag = 0; inplace = NULL; while ((c = getopt(argc, argv, "EI:ae:f:i:lnru")) != -1) switch (c) { case 'r': /* Gnu sed compat */ case 'E': rflags = REG_EXTENDED; break; case 'I': inplace = optarg; ispan = 1; /* span across input files */ break; case 'a': aflag = 1; break; case 'e': eflag = 1; - add_compunit(CU_STRING, optarg); + asprintf(&temp_arg, "%s\n", optarg); + if (temp_arg == NULL) + errx(1, "Couldn't allocate temporary buffer"); + add_compunit(CU_STRING, temp_arg); break; case 'f': fflag = 1; add_compunit(CU_FILE, optarg); break; case 'i': inplace = optarg; ispan = 0; /* don't span across input files */ break; case 'l': if(setvbuf(stdout, NULL, _IOLBF, 0) != 0) warnx("setting line buffered output failed"); break; case 'n': nflag = 1; break; case 'u': if(setvbuf(stdout, NULL, _IONBF, 0) != 0) warnx("setting unbuffered output failed"); break; default: case '?': usage(); } argc -= optind; argv += optind; /* First usage case; script is the first arg */ if (!eflag && !fflag && *argv) { add_compunit(CU_STRING, *argv); argv++; } compile(); /* Continue with first and start second usage */ if (*argv) for (; *argv; argv++) add_file(*argv); else add_file(NULL); process(); cfclose(prog, NULL); if (fclose(stdout)) err(1, "stdout"); exit(rval); } static void usage(void) { (void)fprintf(stderr, "usage: %s script [-Ealnru] [-i extension] [file ...]\n" "\t%s [-Ealnu] [-i extension] [-e script] ... [-f script_file]" " ... [file ...]\n", getprogname(), getprogname()); exit(1); } /* * Like fgets, but go through the chain of compilation units chaining them * together. Empty strings and files are ignored. */ const char * cu_fgets(int *more) { static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF; static FILE *f; /* Current open file */ static const char *s; /* Current pointer inside string */ static char string_ident[30], *lastresult; static size_t lastsize; char *p; const char *start; again: switch (state) { case ST_EOF: if (script == NULL) { if (more != NULL) *more = 0; return (NULL); } linenum = 0; switch (script->type) { case CU_FILE: if ((f = fopen(script->s, "r")) == NULL) err(1, "%s", script->s); fname = script->s; state = ST_FILE; goto again; case CU_STRING: if (((size_t)snprintf(string_ident, sizeof(string_ident), "\"%s\"", script->s)) >= sizeof(string_ident) - 1) (void)strcpy(string_ident + sizeof(string_ident) - 6, " ...\""); fname = string_ident; s = script->s; state = ST_STRING; goto again; } case ST_FILE: p = lastresult; if (getline(&p, &lastsize, f) != -1) { linenum++; if (linenum == 1 && p[0] == '#' && p[1] == 'n') nflag = 1; if (more != NULL) *more = !feof(f); return (lastresult = p); } else if (ferror(f)) err(1, "%s", script->s); script = script->next; (void)fclose(f); state = ST_EOF; goto again; case ST_STRING: if (linenum == 0 && s[0] == '#' && s[1] == 'n') nflag = 1; else if (s[0] == '\0') { state = ST_EOF; script = script->next; goto again; } start = s; for (;;) { switch (*s) { case '\0': state = ST_EOF; script = script->next; /* FALLTHROUGH */ case '\n': s++; linenum++; if (more != NULL) *more = 0; return (start); default: s++; } } } /* NOTREACHED */ return (NULL); } /* * Like fgets, but go through the list of files chaining them together. * Set len to the length of the line. */ int mf_fgets(SPACE *sp, enum e_spflag spflag) { struct stat sb; ssize_t len; char *dirbuf, *basebuf; static char *p = NULL; static size_t plen = 0; int c; static int firstfile; if (infile == NULL) { /* stdin? */ if (files->fname == NULL) { if (inplace != NULL) errx(1, "-I or -i may not be used with stdin"); infile = stdin; fname = "stdin"; outfile = stdout; outfname = "stdout"; } firstfile = 1; } for (;;) { if (infile != NULL && (c = getc(infile)) != EOF) { (void)ungetc(c, infile); break; } /* If we are here then either eof or no files are open yet */ if (infile == stdin) { sp->len = 0; return (0); } if (infile != NULL) { fclose(infile); if (*oldfname != '\0') { /* if there was a backup file, remove it */ unlink(oldfname); /* * Backup the original. Note that hard links * are not supported on all filesystems. */ if ((link(fname, oldfname) != 0) && (rename(fname, oldfname) != 0)) { warn("rename()"); if (*tmpfname) unlink(tmpfname); exit(1); } *oldfname = '\0'; } if (*tmpfname != '\0') { if (outfile != NULL && outfile != stdout) if (fclose(outfile) != 0) { warn("fclose()"); unlink(tmpfname); exit(1); } outfile = NULL; if (rename(tmpfname, fname) != 0) { /* this should not happen really! */ warn("rename()"); unlink(tmpfname); exit(1); } *tmpfname = '\0'; } outfname = NULL; } if (firstfile == 0) files = files->next; else firstfile = 0; if (files == NULL) { sp->len = 0; return (0); } fname = files->fname; if (inplace != NULL) { if (lstat(fname, &sb) != 0) err(1, "%s", fname); if (!(sb.st_mode & S_IFREG)) errx(1, "%s: %s %s", fname, "in-place editing only", "works for regular files"); if (*inplace != '\0') { strlcpy(oldfname, fname, sizeof(oldfname)); len = strlcat(oldfname, inplace, sizeof(oldfname)); if ((size_t)len > sizeof(oldfname)) errx(1, "%s: name too long", fname); } if ((dirbuf = strdup(fname)) == NULL || (basebuf = strdup(fname)) == NULL) err(1, "strdup"); len = snprintf(tmpfname, sizeof(tmpfname), "%s/.!%ld!%s", dirname(dirbuf), (long)getpid(), basename(basebuf)); free(dirbuf); free(basebuf); if ((size_t)len >= sizeof(tmpfname)) errx(1, "%s: name too long", fname); unlink(tmpfname); if (outfile != NULL && outfile != stdout) fclose(outfile); if ((outfile = fopen(tmpfname, "w")) == NULL) err(1, "%s", fname); fchown(fileno(outfile), sb.st_uid, sb.st_gid); fchmod(fileno(outfile), sb.st_mode & ALLPERMS); outfname = tmpfname; if (!ispan) { linenum = 0; resetstate(); } } else { outfile = stdout; outfname = "stdout"; } if ((infile = fopen(fname, "r")) == NULL) { warn("%s", fname); rval = 1; continue; } } /* * We are here only when infile is open and we still have something * to read from it. * * Use getline() so that we can handle essentially infinite input * data. The p and plen are static so each invocation gives * getline() the same buffer which is expanded as needed. */ len = getline(&p, &plen, infile); if (len == -1) err(1, "%s", fname); if (len != 0 && p[len - 1] == '\n') { sp->append_newline = 1; len--; } else if (!lastline()) { sp->append_newline = 1; } else { sp->append_newline = 0; } cspace(sp, p, len, spflag); linenum++; return (1); } /* * Add a compilation unit to the linked list */ static void add_compunit(enum e_cut type, const char *s) { struct s_compunit *cu; if ((cu = malloc(sizeof(struct s_compunit))) == NULL) err(1, "malloc"); cu->type = type; cu->s = s; cu->next = NULL; *cu_nextp = cu; cu_nextp = &cu->next; } /* * Add a file to the linked list */ static void add_file(const char *s) { struct s_flist *fp; if ((fp = malloc(sizeof(struct s_flist))) == NULL) err(1, "malloc"); fp->next = NULL; *fl_nextp = fp; fp->fname = s; fl_nextp = &fp->next; } static int next_files_have_lines(void) { struct s_flist *file; FILE *file_fd; int ch; file = files; while ((file = file->next) != NULL) { if ((file_fd = fopen(file->fname, "r")) == NULL) continue; if ((ch = getc(file_fd)) != EOF) { /* * This next file has content, therefore current * file doesn't contains the last line. */ ungetc(ch, file_fd); fclose(file_fd); return (1); } fclose(file_fd); } return (0); } int lastline(void) { int ch; if (feof(infile)) { return !( (inplace == NULL || ispan) && next_files_have_lines()); } if ((ch = getc(infile)) == EOF) { return !( (inplace == NULL || ispan) && next_files_have_lines()); } ungetc(ch, infile); return (0); } Index: user/alc/PQ_LAUNDRY/usr.bin/soelim/soelim.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/soelim/soelim.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/soelim/soelim.c (revision 303642) @@ -1,178 +1,177 @@ /*- * Copyright (c) 2014 Baptiste Daroussin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include #define C_OPTION 0x1 static StringList *includes; static void usage(void) { fprintf(stderr, "usage: soelim [-Crtv] [-I dir] [files]\n"); exit(EXIT_FAILURE); } static FILE * soelim_fopen(const char *name) { FILE *f; char path[PATH_MAX]; size_t i; if (strcmp(name, "-") == 0) return (stdin); if ((f = fopen(name, "r")) != NULL) return (f); if (*name == '/') { warn("can't open '%s'", name); return (NULL); } for (i = 0; i < includes->sl_cur; i++) { snprintf(path, sizeof(path), "%s/%s", includes->sl_str[i], name); if ((f = fopen(path, "r")) != NULL) return (f); } warn("can't open '%s'", name); return (f); } static int soelim_file(FILE *f, int flag) { char *line = NULL; char *walk, *cp; size_t linecap = 0; ssize_t linelen; if (f == NULL) return (1); while ((linelen = getline(&line, &linecap, f)) > 0) { if (strncmp(line, ".so", 3) != 0) { printf("%s", line); continue; } walk = line + 3; if (!isspace(*walk) && ((flag & C_OPTION) == 0)) { printf("%s", line); continue; } while (isspace(*walk)) walk++; cp = walk; while (*cp != '\0' && !isspace(*cp)) cp++; *cp = 0; if (cp < line + linelen) cp++; if (*walk == '\0') { printf("%s", line); continue; } if (soelim_file(soelim_fopen(walk), flag) == 1) { free(line); return (1); } if (*cp != '\0') printf("%s", cp); } free(line); fclose(f); return (0); } int main(int argc, char **argv) { int ch, i; int ret = 0; int flags = 0; includes = sl_init(); if (includes == NULL) err(EXIT_FAILURE, "sl_init()"); while ((ch = getopt(argc, argv, "CrtvI:")) != -1) { switch (ch) { case 'C': flags |= C_OPTION; break; case 'r': case 'v': case 't': /* stub compatibility with groff's soelim */ break; case 'I': sl_add(includes, optarg); break; default: sl_free(includes, 0); usage(); } } argc -= optind; argv += optind; if (argc == 0) ret = soelim_file(stdin, flags); for (i = 0; i < argc; i++) ret = soelim_file(soelim_fopen(argv[i]), flags); sl_free(includes, 0); return (ret); } Index: user/alc/PQ_LAUNDRY/usr.bin/uniq/uniq.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.bin/uniq/uniq.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.bin/uniq/uniq.c (revision 303642) @@ -1,357 +1,356 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Case Larsen. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1989, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; #endif static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include #include #include static int cflag, dflag, uflag, iflag; static int numchars, numfields, repeats; static FILE *file(const char *, const char *); static wchar_t *convert(const char *); static int inlcmp(const char *, const char *); static void show(FILE *, const char *); static wchar_t *skip(wchar_t *); static void obsolete(char *[]); static void usage(void); static void strerror_init(void) { /* * Cache NLS data before entering capability mode. * XXXPJD: There should be strerror_init() and strsignal_init() in libc. */ (void)catopen("libc", NL_CAT_LOCALE); } int main (int argc, char *argv[]) { wchar_t *tprev, *tthis; FILE *ifp, *ofp; int ch, comp; size_t prevbuflen, thisbuflen, b1; char *prevline, *thisline, *p; const char *ifn; cap_rights_t rights; (void) setlocale(LC_ALL, ""); obsolete(argv); while ((ch = getopt(argc, argv, "cdif:s:u")) != -1) switch (ch) { case 'c': cflag = 1; break; case 'd': dflag = 1; break; case 'i': iflag = 1; break; case 'f': numfields = strtol(optarg, &p, 10); if (numfields < 0 || *p) errx(1, "illegal field skip value: %s", optarg); break; case 's': numchars = strtol(optarg, &p, 10); if (numchars < 0 || *p) errx(1, "illegal character skip value: %s", optarg); break; case 'u': uflag = 1; break; case '?': default: usage(); } argc -= optind; argv += optind; /* If no flags are set, default is -d -u. */ if (cflag) { if (dflag || uflag) usage(); } else if (!dflag && !uflag) dflag = uflag = 1; if (argc > 2) usage(); ifp = stdin; ifn = "stdin"; ofp = stdout; if (argc > 0 && strcmp(argv[0], "-") != 0) ifp = file(ifn = argv[0], "r"); cap_rights_init(&rights, CAP_FSTAT, CAP_READ); if (cap_rights_limit(fileno(ifp), &rights) < 0 && errno != ENOSYS) err(1, "unable to limit rights for %s", ifn); cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); if (argc > 1) ofp = file(argv[1], "w"); else cap_rights_set(&rights, CAP_IOCTL); if (cap_rights_limit(fileno(ofp), &rights) < 0 && errno != ENOSYS) { err(1, "unable to limit rights for %s", argc > 1 ? argv[1] : "stdout"); } if (cap_rights_is_set(&rights, CAP_IOCTL)) { unsigned long cmd; cmd = TIOCGETA; /* required by isatty(3) in printf(3) */ if (cap_ioctls_limit(fileno(ofp), &cmd, 1) < 0 && errno != ENOSYS) { err(1, "unable to limit ioctls for %s", argc > 1 ? argv[1] : "stdout"); } } strerror_init(); if (cap_enter() < 0 && errno != ENOSYS) err(1, "unable to enter capability mode"); prevbuflen = thisbuflen = 0; prevline = thisline = NULL; if (getline(&prevline, &prevbuflen, ifp) < 0) { if (ferror(ifp)) err(1, "%s", ifn); exit(0); } tprev = convert(prevline); if (!cflag && uflag && dflag) show(ofp, prevline); tthis = NULL; while (getline(&thisline, &thisbuflen, ifp) >= 0) { if (tthis != NULL) free(tthis); tthis = convert(thisline); if (tthis == NULL && tprev == NULL) comp = inlcmp(thisline, prevline); else if (tthis == NULL || tprev == NULL) comp = 1; else comp = wcscoll(tthis, tprev); if (comp) { /* If different, print; set previous to new value. */ if (cflag || !dflag || !uflag) show(ofp, prevline); p = prevline; b1 = prevbuflen; prevline = thisline; prevbuflen = thisbuflen; if (tprev != NULL) free(tprev); tprev = tthis; if (!cflag && uflag && dflag) show(ofp, prevline); thisline = p; thisbuflen = b1; tthis = NULL; repeats = 0; } else ++repeats; } if (ferror(ifp)) err(1, "%s", ifn); if (cflag || !dflag || !uflag) show(ofp, prevline); exit(0); } static wchar_t * convert(const char *str) { size_t n; wchar_t *buf, *ret, *p; if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) return (NULL); if (SIZE_MAX / sizeof(*buf) < n + 1) errx(1, "conversion buffer length overflow"); if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) err(1, "malloc"); if (mbstowcs(buf, str, n + 1) != n) errx(1, "internal mbstowcs() error"); /* The last line may not end with \n. */ if (n > 0 && buf[n - 1] == L'\n') buf[n - 1] = L'\0'; /* If requested get the chosen fields + character offsets. */ if (numfields || numchars) { if ((ret = wcsdup(skip(buf))) == NULL) err(1, "wcsdup"); free(buf); } else ret = buf; if (iflag) { for (p = ret; *p != L'\0'; p++) *p = towlower(*p); } return (ret); } static int inlcmp(const char *s1, const char *s2) { int c1, c2; while (*s1 == *s2++) if (*s1++ == '\0') return (0); c1 = (unsigned char)*s1; c2 = (unsigned char)*(s2 - 1); /* The last line may not end with \n. */ if (c1 == '\n') c1 = '\0'; if (c2 == '\n') c2 = '\0'; return (c1 - c2); } /* * show -- * Output a line depending on the flags and number of repetitions * of the line. */ static void show(FILE *ofp, const char *str) { if (cflag) (void)fprintf(ofp, "%4d %s", repeats + 1, str); if ((dflag && repeats) || (uflag && !repeats)) (void)fprintf(ofp, "%s", str); } static wchar_t * skip(wchar_t *str) { int nchars, nfields; for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { while (iswblank(*str)) str++; while (*str != L'\0' && !iswblank(*str)) str++; } for (nchars = numchars; nchars-- && *str != L'\0'; ++str) ; return(str); } static FILE * file(const char *name, const char *mode) { FILE *fp; if ((fp = fopen(name, mode)) == NULL) err(1, "%s", name); return(fp); } static void obsolete(char *argv[]) { int len; char *ap, *p, *start; while ((ap = *++argv)) { /* Return if "--" or not an option of any form. */ if (ap[0] != '-') { if (ap[0] != '+') return; } else if (ap[1] == '-') return; if (!isdigit((unsigned char)ap[1])) continue; /* * Digit signifies an old-style option. Malloc space for dash, * new option and argument. */ len = strlen(ap); if ((start = p = malloc(len + 3)) == NULL) err(1, "malloc"); *p++ = '-'; *p++ = ap[0] == '+' ? 's' : 'f'; (void)strcpy(p, ap + 1); *argv = start; } } static void usage(void) { (void)fprintf(stderr, "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); exit(1); } Index: user/alc/PQ_LAUNDRY/usr.sbin/autofs/common.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/autofs/common.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/autofs/common.c (revision 303642) @@ -1,1224 +1,1223 @@ /*- * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include "autofs_ioctl.h" #include "common.h" extern FILE *yyin; extern char *yytext; extern int yylex(void); static void parse_master_yyin(struct node *root, const char *master); static void parse_map_yyin(struct node *parent, const char *map, const char *executable_key); char * checked_strdup(const char *s) { char *c; assert(s != NULL); c = strdup(s); if (c == NULL) log_err(1, "strdup"); return (c); } /* * Concatenate two strings, inserting separator between them, unless not needed. */ char * concat(const char *s1, char separator, const char *s2) { char *result; char s1last, s2first; int ret; if (s1 == NULL) s1 = ""; if (s2 == NULL) s2 = ""; if (s1[0] == '\0') s1last = '\0'; else s1last = s1[strlen(s1) - 1]; s2first = s2[0]; if (s1last == separator && s2first == separator) { /* * If s1 ends with the separator and s2 begins with * it - skip the latter; otherwise concatenating "/" * and "/foo" would end up returning "//foo". */ ret = asprintf(&result, "%s%s", s1, s2 + 1); } else if (s1last == separator || s2first == separator || s1[0] == '\0' || s2[0] == '\0') { ret = asprintf(&result, "%s%s", s1, s2); } else { ret = asprintf(&result, "%s%c%s", s1, separator, s2); } if (ret < 0) log_err(1, "asprintf"); //log_debugx("%s: got %s and %s, returning %s", __func__, s1, s2, result); return (result); } void create_directory(const char *path) { char *component, *copy, *tofree, *partial, *tmp; int error; assert(path[0] == '/'); /* * +1 to skip the leading slash. */ copy = tofree = checked_strdup(path + 1); partial = checked_strdup(""); for (;;) { component = strsep(©, "/"); if (component == NULL) break; tmp = concat(partial, '/', component); free(partial); partial = tmp; //log_debugx("creating \"%s\"", partial); error = mkdir(partial, 0755); if (error != 0 && errno != EEXIST) { log_warn("cannot create %s", partial); return; } } free(tofree); } struct node * node_new_root(void) { struct node *n; n = calloc(1, sizeof(*n)); if (n == NULL) log_err(1, "calloc"); // XXX n->n_key = checked_strdup("/"); n->n_options = checked_strdup(""); TAILQ_INIT(&n->n_children); return (n); } struct node * node_new(struct node *parent, char *key, char *options, char *location, const char *config_file, int config_line) { struct node *n; n = calloc(1, sizeof(*n)); if (n == NULL) log_err(1, "calloc"); TAILQ_INIT(&n->n_children); assert(key != NULL); assert(key[0] != '\0'); n->n_key = key; if (options != NULL) n->n_options = options; else n->n_options = strdup(""); n->n_location = location; assert(config_file != NULL); n->n_config_file = config_file; assert(config_line >= 0); n->n_config_line = config_line; assert(parent != NULL); n->n_parent = parent; TAILQ_INSERT_TAIL(&parent->n_children, n, n_next); return (n); } struct node * node_new_map(struct node *parent, char *key, char *options, char *map, const char *config_file, int config_line) { struct node *n; n = calloc(1, sizeof(*n)); if (n == NULL) log_err(1, "calloc"); TAILQ_INIT(&n->n_children); assert(key != NULL); assert(key[0] != '\0'); n->n_key = key; if (options != NULL) n->n_options = options; else n->n_options = strdup(""); n->n_map = map; assert(config_file != NULL); n->n_config_file = config_file; assert(config_line >= 0); n->n_config_line = config_line; assert(parent != NULL); n->n_parent = parent; TAILQ_INSERT_TAIL(&parent->n_children, n, n_next); return (n); } static struct node * node_duplicate(const struct node *o, struct node *parent) { const struct node *child; struct node *n; if (parent == NULL) parent = o->n_parent; n = node_new(parent, o->n_key, o->n_options, o->n_location, o->n_config_file, o->n_config_line); TAILQ_FOREACH(child, &o->n_children, n_next) node_duplicate(child, n); return (n); } static void node_delete(struct node *n) { struct node *child, *tmp; assert (n != NULL); TAILQ_FOREACH_SAFE(child, &n->n_children, n_next, tmp) node_delete(child); if (n->n_parent != NULL) TAILQ_REMOVE(&n->n_parent->n_children, n, n_next); free(n); } /* * Move (reparent) node 'n' to make it sibling of 'previous', placed * just after it. */ static void node_move_after(struct node *n, struct node *previous) { TAILQ_REMOVE(&n->n_parent->n_children, n, n_next); n->n_parent = previous->n_parent; TAILQ_INSERT_AFTER(&previous->n_parent->n_children, previous, n, n_next); } static void node_expand_includes(struct node *root, bool is_master) { struct node *n, *n2, *tmp, *tmp2, *tmproot; int error; TAILQ_FOREACH_SAFE(n, &root->n_children, n_next, tmp) { if (n->n_key[0] != '+') continue; error = access(AUTO_INCLUDE_PATH, F_OK); if (error != 0) { log_errx(1, "directory services not configured; " "%s does not exist", AUTO_INCLUDE_PATH); } /* * "+1" to skip leading "+". */ yyin = auto_popen(AUTO_INCLUDE_PATH, n->n_key + 1, NULL); assert(yyin != NULL); tmproot = node_new_root(); if (is_master) parse_master_yyin(tmproot, n->n_key); else parse_map_yyin(tmproot, n->n_key, NULL); error = auto_pclose(yyin); yyin = NULL; if (error != 0) { log_errx(1, "failed to handle include \"%s\"", n->n_key); } /* * Entries to be included are now in tmproot. We need to merge * them with the rest, preserving their place and ordering. */ TAILQ_FOREACH_REVERSE_SAFE(n2, &tmproot->n_children, nodehead, n_next, tmp2) { node_move_after(n2, n); } node_delete(n); node_delete(tmproot); } } static char * expand_ampersand(char *string, const char *key) { char c, *expanded; int i, ret, before_len = 0; bool backslashed = false; assert(key[0] != '\0'); expanded = checked_strdup(string); for (i = 0; string[i] != '\0'; i++) { c = string[i]; if (c == '\\' && backslashed == false) { backslashed = true; continue; } if (backslashed) { backslashed = false; continue; } backslashed = false; if (c != '&') continue; /* * The 'before_len' variable contains the number * of characters before the '&'. */ before_len = i; //assert(i + 1 < (int)strlen(string)); ret = asprintf(&expanded, "%.*s%s%s", before_len, string, key, string + before_len + 1); if (ret < 0) log_err(1, "asprintf"); //log_debugx("\"%s\" expanded with key \"%s\" to \"%s\"", // string, key, expanded); /* * Figure out where to start searching for next variable. */ string = expanded; i = before_len + strlen(key); backslashed = false; //assert(i < (int)strlen(string)); } return (expanded); } /* * Expand "&" in n_location. If the key is NULL, try to use * key from map entries themselves. Keep in mind that maps * consist of tho levels of node structures, the key is one * level up. * * Variant with NULL key is for "automount -LL". */ void node_expand_ampersand(struct node *n, const char *key) { struct node *child; if (n->n_location != NULL) { if (key == NULL) { if (n->n_parent != NULL && strcmp(n->n_parent->n_key, "*") != 0) { n->n_location = expand_ampersand(n->n_location, n->n_parent->n_key); } } else { n->n_location = expand_ampersand(n->n_location, key); } } TAILQ_FOREACH(child, &n->n_children, n_next) node_expand_ampersand(child, key); } /* * Expand "*" in n_key. */ void node_expand_wildcard(struct node *n, const char *key) { struct node *child, *expanded; assert(key != NULL); if (strcmp(n->n_key, "*") == 0) { expanded = node_duplicate(n, NULL); expanded->n_key = checked_strdup(key); node_move_after(expanded, n); } TAILQ_FOREACH(child, &n->n_children, n_next) node_expand_wildcard(child, key); } int node_expand_defined(struct node *n) { struct node *child; int error, cumulated_error = 0; if (n->n_location != NULL) { n->n_location = defined_expand(n->n_location); if (n->n_location == NULL) { log_warnx("failed to expand location for %s", node_path(n)); return (EINVAL); } } TAILQ_FOREACH(child, &n->n_children, n_next) { error = node_expand_defined(child); if (error != 0 && cumulated_error == 0) cumulated_error = error; } return (cumulated_error); } static bool node_is_direct_key(const struct node *n) { if (n->n_parent != NULL && n->n_parent->n_parent == NULL && strcmp(n->n_key, "/-") == 0) { return (true); } return (false); } bool node_is_direct_map(const struct node *n) { for (;;) { assert(n->n_parent != NULL); if (n->n_parent->n_parent == NULL) break; n = n->n_parent; } return (node_is_direct_key(n)); } bool node_has_wildcards(const struct node *n) { const struct node *child; TAILQ_FOREACH(child, &n->n_children, n_next) { if (strcmp(child->n_key, "*") == 0) return (true); } return (false); } static void node_expand_maps(struct node *n, bool indirect) { struct node *child, *tmp; TAILQ_FOREACH_SAFE(child, &n->n_children, n_next, tmp) { if (node_is_direct_map(child)) { if (indirect) continue; } else { if (indirect == false) continue; } /* * This is the first-level map node; the one that contains * the key and subnodes with mountpoints and actual map names. */ if (child->n_map == NULL) continue; if (indirect) { log_debugx("map \"%s\" is an indirect map, parsing", child->n_map); } else { log_debugx("map \"%s\" is a direct map, parsing", child->n_map); } parse_map(child, child->n_map, NULL, NULL); } } static void node_expand_direct_maps(struct node *n) { node_expand_maps(n, false); } void node_expand_indirect_maps(struct node *n) { node_expand_maps(n, true); } static char * node_path_x(const struct node *n, char *x) { char *path; if (n->n_parent == NULL) return (x); /* * Return "/-" for direct maps only if we were asked for path * to the "/-" node itself, not to any of its subnodes. */ if (node_is_direct_key(n) && x[0] != '\0') return (x); assert(n->n_key[0] != '\0'); path = concat(n->n_key, '/', x); free(x); return (node_path_x(n->n_parent, path)); } /* * Return full path for node, consisting of concatenated * paths of node itself and all its parents, up to the root. */ char * node_path(const struct node *n) { char *path; size_t len; path = node_path_x(n, checked_strdup("")); /* * Strip trailing slash, unless the whole path is "/". */ len = strlen(path); if (len > 1 && path[len - 1] == '/') path[len - 1] = '\0'; return (path); } static char * node_options_x(const struct node *n, char *x) { char *options; if (n == NULL) return (x); options = concat(x, ',', n->n_options); free(x); return (node_options_x(n->n_parent, options)); } /* * Return options for node, consisting of concatenated * options from the node itself and all its parents, * up to the root. */ char * node_options(const struct node *n) { return (node_options_x(n, checked_strdup(""))); } static void node_print_indent(const struct node *n, const char *cmdline_options, int indent) { const struct node *child, *first_child; char *path, *options, *tmp; path = node_path(n); tmp = node_options(n); options = concat(cmdline_options, ',', tmp); free(tmp); /* * Do not show both parent and child node if they have the same * mountpoint; only show the child node. This means the typical, * "key location", map entries are shown in a single line; * the "key mountpoint1 location2 mountpoint2 location2" entries * take multiple lines. */ first_child = TAILQ_FIRST(&n->n_children); if (first_child == NULL || TAILQ_NEXT(first_child, n_next) != NULL || strcmp(path, node_path(first_child)) != 0) { assert(n->n_location == NULL || n->n_map == NULL); printf("%*.s%-*s %s%-*s %-*s # %s map %s at %s:%d\n", indent, "", 25 - indent, path, options[0] != '\0' ? "-" : " ", 20, options[0] != '\0' ? options : "", 20, n->n_location != NULL ? n->n_location : n->n_map != NULL ? n->n_map : "", node_is_direct_map(n) ? "direct" : "indirect", indent == 0 ? "referenced" : "defined", n->n_config_file, n->n_config_line); } free(path); free(options); TAILQ_FOREACH(child, &n->n_children, n_next) node_print_indent(child, cmdline_options, indent + 2); } /* * Recursively print node with all its children. The cmdline_options * argument is used for additional options to be prepended to all the * others - usually those are the options passed by command line. */ void node_print(const struct node *n, const char *cmdline_options) { const struct node *child; TAILQ_FOREACH(child, &n->n_children, n_next) node_print_indent(child, cmdline_options, 0); } static struct node * node_find_x(struct node *node, const char *path) { struct node *child, *found; char *tmp; size_t tmplen; //log_debugx("looking up %s in %s", path, node_path(node)); if (!node_is_direct_key(node)) { tmp = node_path(node); tmplen = strlen(tmp); if (strncmp(tmp, path, tmplen) != 0) { free(tmp); return (NULL); } if (path[tmplen] != '/' && path[tmplen] != '\0') { /* * If we have two map entries like 'foo' and 'foobar', make * sure the search for 'foobar' won't match 'foo' instead. */ free(tmp); return (NULL); } free(tmp); } TAILQ_FOREACH(child, &node->n_children, n_next) { found = node_find_x(child, path); if (found != NULL) return (found); } if (node->n_parent == NULL || node_is_direct_key(node)) return (NULL); return (node); } struct node * node_find(struct node *root, const char *path) { struct node *node; assert(root->n_parent == NULL); node = node_find_x(root, path); if (node != NULL) assert(node != root); return (node); } /* * Canonical form of a map entry looks like this: * * key [-options] [ [/mountpoint] [-options2] location ... ] * * Entries for executable maps are slightly different, as they * lack the 'key' field and are always single-line; the key field * for those maps is taken from 'executable_key' argument. * * We parse it in such a way that a map always has two levels - first * for key, and the second, for the mountpoint. */ static void parse_map_yyin(struct node *parent, const char *map, const char *executable_key) { char *key = NULL, *options = NULL, *mountpoint = NULL, *options2 = NULL, *location = NULL; int ret; struct node *node; lineno = 1; if (executable_key != NULL) key = checked_strdup(executable_key); for (;;) { ret = yylex(); if (ret == 0 || ret == NEWLINE) { /* * In case of executable map, the key is always * non-NULL, even if the map is empty. So, make sure * we don't fail empty maps here. */ if ((key != NULL && executable_key == NULL) || options != NULL) { log_errx(1, "truncated entry at %s, line %d", map, lineno); } if (ret == 0 || executable_key != NULL) { /* * End of file. */ break; } else { key = options = NULL; continue; } } if (key == NULL) { key = checked_strdup(yytext); if (key[0] == '+') { node_new(parent, key, NULL, NULL, map, lineno); key = options = NULL; continue; } continue; } else if (yytext[0] == '-') { if (options != NULL) { log_errx(1, "duplicated options at %s, line %d", map, lineno); } /* * +1 to skip leading "-". */ options = checked_strdup(yytext + 1); continue; } /* * We cannot properly handle a situation where the map key * is "/". Ignore such entries. * * XXX: According to Piete Brooks, Linux automounter uses * "/" as a wildcard character in LDAP maps. Perhaps * we should work around this braindamage by substituting * "*" for "/"? */ if (strcmp(key, "/") == 0) { log_warnx("nonsensical map key \"/\" at %s, line %d; " "ignoring map entry ", map, lineno); /* * Skip the rest of the entry. */ do { ret = yylex(); } while (ret != 0 && ret != NEWLINE); key = options = NULL; continue; } //log_debugx("adding map node, %s", key); node = node_new(parent, key, options, NULL, map, lineno); key = options = NULL; for (;;) { if (yytext[0] == '/') { if (mountpoint != NULL) { log_errx(1, "duplicated mountpoint " "in %s, line %d", map, lineno); } if (options2 != NULL || location != NULL) { log_errx(1, "mountpoint out of order " "in %s, line %d", map, lineno); } mountpoint = checked_strdup(yytext); goto again; } if (yytext[0] == '-') { if (options2 != NULL) { log_errx(1, "duplicated options " "in %s, line %d", map, lineno); } if (location != NULL) { log_errx(1, "options out of order " "in %s, line %d", map, lineno); } options2 = checked_strdup(yytext + 1); goto again; } if (location != NULL) { log_errx(1, "too many arguments " "in %s, line %d", map, lineno); } /* * If location field starts with colon, e.g. ":/dev/cd0", * then strip it. */ if (yytext[0] == ':') { location = checked_strdup(yytext + 1); if (location[0] == '\0') { log_errx(1, "empty location in %s, " "line %d", map, lineno); } } else { location = checked_strdup(yytext); } if (mountpoint == NULL) mountpoint = checked_strdup("/"); if (options2 == NULL) options2 = checked_strdup(""); #if 0 log_debugx("adding map node, %s %s %s", mountpoint, options2, location); #endif node_new(node, mountpoint, options2, location, map, lineno); mountpoint = options2 = location = NULL; again: ret = yylex(); if (ret == 0 || ret == NEWLINE) { if (mountpoint != NULL || options2 != NULL || location != NULL) { log_errx(1, "truncated entry " "in %s, line %d", map, lineno); } break; } } } } /* * Parse output of a special map called without argument. It is a list * of keys, separated by newlines. They can contain whitespace, so use * getline(3) instead of lexer used for maps. */ static void parse_map_keys_yyin(struct node *parent, const char *map) { char *line = NULL, *key; size_t linecap = 0; ssize_t linelen; lineno = 1; for (;;) { linelen = getline(&line, &linecap, yyin); if (linelen < 0) { /* * End of file. */ break; } if (linelen <= 1) { /* * Empty line, consisting of just the newline. */ continue; } /* * "-1" to strip the trailing newline. */ key = strndup(line, linelen - 1); log_debugx("adding key \"%s\"", key); node_new(parent, key, NULL, NULL, map, lineno); lineno++; } free(line); } static bool file_is_executable(const char *path) { struct stat sb; int error; error = stat(path, &sb); if (error != 0) log_err(1, "cannot stat %s", path); if ((sb.st_mode & S_IXUSR) || (sb.st_mode & S_IXGRP) || (sb.st_mode & S_IXOTH)) return (true); return (false); } /* * Parse a special map, e.g. "-hosts". */ static void parse_special_map(struct node *parent, const char *map, const char *key) { char *path; int error, ret; assert(map[0] == '-'); /* * +1 to skip leading "-" in map name. */ ret = asprintf(&path, "%s/special_%s", AUTO_SPECIAL_PREFIX, map + 1); if (ret < 0) log_err(1, "asprintf"); yyin = auto_popen(path, key, NULL); assert(yyin != NULL); if (key == NULL) { parse_map_keys_yyin(parent, map); } else { parse_map_yyin(parent, map, key); } error = auto_pclose(yyin); yyin = NULL; if (error != 0) log_errx(1, "failed to handle special map \"%s\"", map); node_expand_includes(parent, false); node_expand_direct_maps(parent); free(path); } /* * Retrieve and parse map from directory services, e.g. LDAP. * Note that it is different from executable maps, in that * the include script outputs the whole map to standard output * (as opposed to executable maps that only output a single * entry, without the key), and it takes the map name as an * argument, instead of key. */ static void parse_included_map(struct node *parent, const char *map) { int error; assert(map[0] != '-'); assert(map[0] != '/'); error = access(AUTO_INCLUDE_PATH, F_OK); if (error != 0) { log_errx(1, "directory services not configured;" " %s does not exist", AUTO_INCLUDE_PATH); } yyin = auto_popen(AUTO_INCLUDE_PATH, map, NULL); assert(yyin != NULL); parse_map_yyin(parent, map, NULL); error = auto_pclose(yyin); yyin = NULL; if (error != 0) log_errx(1, "failed to handle remote map \"%s\"", map); node_expand_includes(parent, false); node_expand_direct_maps(parent); } void parse_map(struct node *parent, const char *map, const char *key, bool *wildcards) { char *path = NULL; int error, ret; bool executable; assert(map != NULL); assert(map[0] != '\0'); log_debugx("parsing map \"%s\"", map); if (wildcards != NULL) *wildcards = false; if (map[0] == '-') { if (wildcards != NULL) *wildcards = true; return (parse_special_map(parent, map, key)); } if (map[0] == '/') { path = checked_strdup(map); } else { ret = asprintf(&path, "%s/%s", AUTO_MAP_PREFIX, map); if (ret < 0) log_err(1, "asprintf"); log_debugx("map \"%s\" maps to \"%s\"", map, path); /* * See if the file exists. If not, try to obtain the map * from directory services. */ error = access(path, F_OK); if (error != 0) { log_debugx("map file \"%s\" does not exist; falling " "back to directory services", path); return (parse_included_map(parent, map)); } } executable = file_is_executable(path); if (executable) { log_debugx("map \"%s\" is executable", map); if (wildcards != NULL) *wildcards = true; if (key != NULL) { yyin = auto_popen(path, key, NULL); } else { yyin = auto_popen(path, NULL); } assert(yyin != NULL); } else { yyin = fopen(path, "r"); if (yyin == NULL) log_err(1, "unable to open \"%s\"", path); } free(path); path = NULL; parse_map_yyin(parent, map, executable ? key : NULL); if (executable) { error = auto_pclose(yyin); yyin = NULL; if (error != 0) { log_errx(1, "failed to handle executable map \"%s\"", map); } } else { fclose(yyin); } yyin = NULL; log_debugx("done parsing map \"%s\"", map); node_expand_includes(parent, false); node_expand_direct_maps(parent); } static void parse_master_yyin(struct node *root, const char *master) { char *mountpoint = NULL, *map = NULL, *options = NULL; int ret; /* * XXX: 1 gives incorrect values; wtf? */ lineno = 0; for (;;) { ret = yylex(); if (ret == 0 || ret == NEWLINE) { if (mountpoint != NULL) { //log_debugx("adding map for %s", mountpoint); node_new_map(root, mountpoint, options, map, master, lineno); } if (ret == 0) { break; } else { mountpoint = map = options = NULL; continue; } } if (mountpoint == NULL) { mountpoint = checked_strdup(yytext); } else if (map == NULL) { map = checked_strdup(yytext); } else if (options == NULL) { /* * +1 to skip leading "-". */ options = checked_strdup(yytext + 1); } else { log_errx(1, "too many arguments at %s, line %d", master, lineno); } } } void parse_master(struct node *root, const char *master) { log_debugx("parsing auto_master file at \"%s\"", master); yyin = fopen(master, "r"); if (yyin == NULL) err(1, "unable to open %s", master); parse_master_yyin(root, master); fclose(yyin); yyin = NULL; log_debugx("done parsing \"%s\"", master); node_expand_includes(root, true); node_expand_direct_maps(root); } /* * Two things daemon(3) does, that we actually also want to do * when running in foreground, is closing the stdin and chdiring * to "/". This is what we do here. */ void lesser_daemon(void) { int error, fd; error = chdir("/"); if (error != 0) log_warn("chdir"); fd = open(_PATH_DEVNULL, O_RDWR, 0); if (fd < 0) { log_warn("cannot open %s", _PATH_DEVNULL); return; } error = dup2(fd, STDIN_FILENO); if (error != 0) log_warn("dup2"); error = close(fd); if (error != 0) { /* Bloody hell. */ log_warn("close"); } } int main(int argc, char **argv) { char *cmdname; if (argv[0] == NULL) log_errx(1, "NULL command name"); cmdname = basename(argv[0]); if (strcmp(cmdname, "automount") == 0) return (main_automount(argc, argv)); else if (strcmp(cmdname, "automountd") == 0) return (main_automountd(argc, argv)); else if (strcmp(cmdname, "autounmountd") == 0) return (main_autounmountd(argc, argv)); else log_errx(1, "binary name should be either \"automount\", " "\"automountd\", or \"autounmountd\""); } Index: user/alc/PQ_LAUNDRY/usr.sbin/bsdinstall/scripts/hardening =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/bsdinstall/scripts/hardening (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/bsdinstall/scripts/hardening (revision 303642) @@ -1,79 +1,79 @@ #!/bin/sh #- # Copyright (c) 2016 Bartek Rutkowski # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ : ${DIALOG_OK=0} -echo -n > $BSDINSTALL_TMPETC/rc.conf.services +echo -n > $BSDINSTALL_TMPETC/rc.conf.hardening exec 3>&1 FEATURES=$( dialog --backtitle "FreeBSD Installer" \ --title "System Hardening" --nocancel --notags --separate-output \ --checklist "Choose system security hardening options:" \ 0 0 0 \ "hide_uids" "Hide processes running as other users" ${hide_uids:-off} \ "hide_gids" "Hide processes running as other groups" ${hide_gids:-off} \ "read_msgbuf" "Disable reading kernel message buffer for unprivileged users" ${read_msgbuf:-off} \ "proc_debug" "Disable process debugging facilities for unprivileged users" ${proc_debug:-off} \ "random_pid" "Randomize the PID of newly created processes" ${random_id:-off} \ "stack_guard" "Insert stack guard page ahead of the growable segments" ${stack_guard:-off} \ "clear_tmp" "Clean the /tmp filesystem on system startup" ${clear_tmp:-off} \ "disable_syslogd" "Disable opening Syslogd network socket (disables remote logging)" ${disable_syslogd:-off} \ "disable_sendmail" "Disable Sendmail service" ${disable_sendmail:-off} \ 2>&1 1>&3 ) exec 3>&- for feature in $FEATURES; do if [ "$feature" = "hide_uids" ]; then echo security.bsd.see_other_uids=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening fi if [ "$feature" = "hide_gids" ]; then echo security.bsd.see_other_gids=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening fi if [ "$feature" = "read_msgbuf" ]; then echo security.bsd.unprivileged_read_msgbuf=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening fi if [ "$feature" = "proc_debug" ]; then echo security.bsd.unprivileged_proc_debug=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening fi if [ "$feature" = "random_id" ]; then echo kern.randompid=$(jot -r 1 9999) >> $BSDINSTALL_TMPETC/sysctl.conf.hardening fi if [ "$feature" = "stack_guard" ]; then echo security.bsd.stack_guard_page=1 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening fi if [ "$feature" = "clear_tmp" ]; then echo 'clear_tmp_enable="YES"' >> $BSDINSTALL_TMPETC/rc.conf.hardening fi if [ "$feature" = "disable_syslogd" ]; then echo 'syslogd_flags="-ss"' >> $BSDINSTALL_TMPETC/rc.conf.hardening fi if [ "$feature" = "disable_sendmail" ]; then echo 'sendmail_enable="NONE"' >> $BSDINSTALL_TMPETC/rc.conf.hardening fi done Index: user/alc/PQ_LAUNDRY/usr.sbin/pkg/pkg.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/pkg/pkg.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/pkg/pkg.c (revision 303642) @@ -1,1109 +1,1108 @@ /*- * Copyright (c) 2012-2014 Baptiste Daroussin * Copyright (c) 2013 Bryan Drewery * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "dns_utils.h" #include "config.h" struct sig_cert { char *name; unsigned char *sig; int siglen; unsigned char *cert; int certlen; bool trusted; }; struct pubkey { unsigned char *sig; int siglen; }; typedef enum { HASH_UNKNOWN, HASH_SHA256, } hash_t; struct fingerprint { hash_t type; char *name; char hash[BUFSIZ]; STAILQ_ENTRY(fingerprint) next; }; STAILQ_HEAD(fingerprint_list, fingerprint); static int extract_pkg_static(int fd, char *p, int sz) { struct archive *a; struct archive_entry *ae; char *end; int ret, r; ret = -1; a = archive_read_new(); if (a == NULL) { warn("archive_read_new"); return (ret); } archive_read_support_filter_all(a); archive_read_support_format_tar(a); if (lseek(fd, 0, 0) == -1) { warn("lseek"); goto cleanup; } if (archive_read_open_fd(a, fd, 4096) != ARCHIVE_OK) { warnx("archive_read_open_fd: %s", archive_error_string(a)); goto cleanup; } ae = NULL; while ((r = archive_read_next_header(a, &ae)) == ARCHIVE_OK) { end = strrchr(archive_entry_pathname(ae), '/'); if (end == NULL) continue; if (strcmp(end, "/pkg-static") == 0) { r = archive_read_extract(a, ae, ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_ACL | ARCHIVE_EXTRACT_FFLAGS | ARCHIVE_EXTRACT_XATTR); strlcpy(p, archive_entry_pathname(ae), sz); break; } } if (r == ARCHIVE_OK) ret = 0; else warnx("failed to extract pkg-static: %s", archive_error_string(a)); cleanup: archive_read_free(a); return (ret); } static int install_pkg_static(const char *path, const char *pkgpath, bool force) { int pstat; pid_t pid; switch ((pid = fork())) { case -1: return (-1); case 0: if (force) execl(path, "pkg-static", "add", "-f", pkgpath, (char *)NULL); else execl(path, "pkg-static", "add", pkgpath, (char *)NULL); _exit(1); default: break; } while (waitpid(pid, &pstat, 0) == -1) if (errno != EINTR) return (-1); if (WEXITSTATUS(pstat)) return (WEXITSTATUS(pstat)); else if (WIFSIGNALED(pstat)) return (128 & (WTERMSIG(pstat))); return (pstat); } static int fetch_to_fd(const char *url, char *path) { struct url *u; struct dns_srvinfo *mirrors, *current; struct url_stat st; FILE *remote; /* To store _https._tcp. + hostname + \0 */ int fd; int retry, max_retry; ssize_t r; char buf[10240]; char zone[MAXHOSTNAMELEN + 13]; static const char *mirror_type = NULL; max_retry = 3; current = mirrors = NULL; remote = NULL; if (mirror_type == NULL && config_string(MIRROR_TYPE, &mirror_type) != 0) { warnx("No MIRROR_TYPE defined"); return (-1); } if ((fd = mkstemp(path)) == -1) { warn("mkstemp()"); return (-1); } retry = max_retry; if ((u = fetchParseURL(url)) == NULL) { warn("fetchParseURL('%s')", url); return (-1); } while (remote == NULL) { if (retry == max_retry) { if (strcmp(u->scheme, "file") != 0 && strcasecmp(mirror_type, "srv") == 0) { snprintf(zone, sizeof(zone), "_%s._tcp.%s", u->scheme, u->host); mirrors = dns_getsrvinfo(zone); current = mirrors; } } if (mirrors != NULL) { strlcpy(u->host, current->host, sizeof(u->host)); u->port = current->port; } remote = fetchXGet(u, &st, ""); if (remote == NULL) { --retry; if (retry <= 0) goto fetchfail; if (mirrors == NULL) { sleep(1); } else { current = current->next; if (current == NULL) current = mirrors; } } } while ((r = fread(buf, 1, sizeof(buf), remote)) > 0) { if (write(fd, buf, r) != r) { warn("write()"); goto fetchfail; } } if (r != 0) { warn("An error occurred while fetching pkg(8)"); goto fetchfail; } if (ferror(remote)) goto fetchfail; goto cleanup; fetchfail: if (fd != -1) { close(fd); fd = -1; unlink(path); } cleanup: if (remote != NULL) fclose(remote); return fd; } static struct fingerprint * parse_fingerprint(ucl_object_t *obj) { const ucl_object_t *cur; ucl_object_iter_t it = NULL; const char *function, *fp, *key; struct fingerprint *f; hash_t fct = HASH_UNKNOWN; function = fp = NULL; while ((cur = ucl_iterate_object(obj, &it, true))) { key = ucl_object_key(cur); if (cur->type != UCL_STRING) continue; if (strcasecmp(key, "function") == 0) { function = ucl_object_tostring(cur); continue; } if (strcasecmp(key, "fingerprint") == 0) { fp = ucl_object_tostring(cur); continue; } } if (fp == NULL || function == NULL) return (NULL); if (strcasecmp(function, "sha256") == 0) fct = HASH_SHA256; if (fct == HASH_UNKNOWN) { warnx("Unsupported hashing function: %s", function); return (NULL); } f = calloc(1, sizeof(struct fingerprint)); f->type = fct; strlcpy(f->hash, fp, sizeof(f->hash)); return (f); } static void free_fingerprint_list(struct fingerprint_list* list) { struct fingerprint *fingerprint, *tmp; STAILQ_FOREACH_SAFE(fingerprint, list, next, tmp) { free(fingerprint->name); free(fingerprint); } free(list); } static struct fingerprint * load_fingerprint(const char *dir, const char *filename) { ucl_object_t *obj = NULL; struct ucl_parser *p = NULL; struct fingerprint *f; char path[MAXPATHLEN]; f = NULL; snprintf(path, MAXPATHLEN, "%s/%s", dir, filename); p = ucl_parser_new(0); if (!ucl_parser_add_file(p, path)) { warnx("%s: %s", path, ucl_parser_get_error(p)); ucl_parser_free(p); return (NULL); } obj = ucl_parser_get_object(p); if (obj->type == UCL_OBJECT) f = parse_fingerprint(obj); if (f != NULL) f->name = strdup(filename); ucl_object_unref(obj); ucl_parser_free(p); return (f); } static struct fingerprint_list * load_fingerprints(const char *path, int *count) { DIR *d; struct dirent *ent; struct fingerprint *finger; struct fingerprint_list *fingerprints; *count = 0; fingerprints = calloc(1, sizeof(struct fingerprint_list)); if (fingerprints == NULL) return (NULL); STAILQ_INIT(fingerprints); if ((d = opendir(path)) == NULL) { free(fingerprints); return (NULL); } while ((ent = readdir(d))) { if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; finger = load_fingerprint(path, ent->d_name); if (finger != NULL) { STAILQ_INSERT_TAIL(fingerprints, finger, next); ++(*count); } } closedir(d); return (fingerprints); } static void sha256_hash(unsigned char hash[SHA256_DIGEST_LENGTH], char out[SHA256_DIGEST_LENGTH * 2 + 1]) { int i; for (i = 0; i < SHA256_DIGEST_LENGTH; i++) sprintf(out + (i * 2), "%02x", hash[i]); out[SHA256_DIGEST_LENGTH * 2] = '\0'; } static void sha256_buf(char *buf, size_t len, char out[SHA256_DIGEST_LENGTH * 2 + 1]) { unsigned char hash[SHA256_DIGEST_LENGTH]; SHA256_CTX sha256; out[0] = '\0'; SHA256_Init(&sha256); SHA256_Update(&sha256, buf, len); SHA256_Final(hash, &sha256); sha256_hash(hash, out); } static int sha256_fd(int fd, char out[SHA256_DIGEST_LENGTH * 2 + 1]) { int my_fd; FILE *fp; char buffer[BUFSIZ]; unsigned char hash[SHA256_DIGEST_LENGTH]; size_t r; int ret; SHA256_CTX sha256; my_fd = -1; fp = NULL; r = 0; ret = 1; out[0] = '\0'; /* Duplicate the fd so that fclose(3) does not close it. */ if ((my_fd = dup(fd)) == -1) { warnx("dup"); goto cleanup; } if ((fp = fdopen(my_fd, "rb")) == NULL) { warnx("fdopen"); goto cleanup; } SHA256_Init(&sha256); while ((r = fread(buffer, 1, BUFSIZ, fp)) > 0) SHA256_Update(&sha256, buffer, r); if (ferror(fp) != 0) { warnx("fread"); goto cleanup; } SHA256_Final(hash, &sha256); sha256_hash(hash, out); ret = 0; cleanup: if (fp != NULL) fclose(fp); else if (my_fd != -1) close(my_fd); (void)lseek(fd, 0, SEEK_SET); return (ret); } static EVP_PKEY * load_public_key_file(const char *file) { EVP_PKEY *pkey; BIO *bp; char errbuf[1024]; bp = BIO_new_file(file, "r"); if (!bp) errx(EXIT_FAILURE, "Unable to read %s", file); if ((pkey = PEM_read_bio_PUBKEY(bp, NULL, NULL, NULL)) == NULL) warnx("ici: %s", ERR_error_string(ERR_get_error(), errbuf)); BIO_free(bp); return (pkey); } static EVP_PKEY * load_public_key_buf(const unsigned char *cert, int certlen) { EVP_PKEY *pkey; BIO *bp; char errbuf[1024]; bp = BIO_new_mem_buf(__DECONST(void *, cert), certlen); if ((pkey = PEM_read_bio_PUBKEY(bp, NULL, NULL, NULL)) == NULL) warnx("%s", ERR_error_string(ERR_get_error(), errbuf)); BIO_free(bp); return (pkey); } static bool rsa_verify_cert(int fd, const char *sigfile, const unsigned char *key, int keylen, unsigned char *sig, int siglen) { EVP_MD_CTX *mdctx; EVP_PKEY *pkey; char sha256[(SHA256_DIGEST_LENGTH * 2) + 2]; char errbuf[1024]; bool ret; pkey = NULL; mdctx = NULL; ret = false; SSL_load_error_strings(); /* Compute SHA256 of the package. */ if (lseek(fd, 0, 0) == -1) { warn("lseek"); goto cleanup; } if ((sha256_fd(fd, sha256)) == -1) { warnx("Error creating SHA256 hash for package"); goto cleanup; } if (sigfile != NULL) { if ((pkey = load_public_key_file(sigfile)) == NULL) { warnx("Error reading public key"); goto cleanup; } } else { if ((pkey = load_public_key_buf(key, keylen)) == NULL) { warnx("Error reading public key"); goto cleanup; } } /* Verify signature of the SHA256(pkg) is valid. */ if ((mdctx = EVP_MD_CTX_create()) == NULL) { warnx("%s", ERR_error_string(ERR_get_error(), errbuf)); goto error; } if (EVP_DigestVerifyInit(mdctx, NULL, EVP_sha256(), NULL, pkey) != 1) { warnx("%s", ERR_error_string(ERR_get_error(), errbuf)); goto error; } if (EVP_DigestVerifyUpdate(mdctx, sha256, strlen(sha256)) != 1) { warnx("%s", ERR_error_string(ERR_get_error(), errbuf)); goto error; } if (EVP_DigestVerifyFinal(mdctx, sig, siglen) != 1) { warnx("%s", ERR_error_string(ERR_get_error(), errbuf)); goto error; } ret = true; printf("done\n"); goto cleanup; error: printf("failed\n"); cleanup: if (pkey) EVP_PKEY_free(pkey); if (mdctx) EVP_MD_CTX_destroy(mdctx); ERR_free_strings(); return (ret); } static struct pubkey * read_pubkey(int fd) { struct pubkey *pk; struct sbuf *sig; char buf[4096]; int r; if (lseek(fd, 0, 0) == -1) { warn("lseek"); return (NULL); } sig = sbuf_new_auto(); while ((r = read(fd, buf, sizeof(buf))) >0) { sbuf_bcat(sig, buf, r); } sbuf_finish(sig); pk = calloc(1, sizeof(struct pubkey)); pk->siglen = sbuf_len(sig); pk->sig = calloc(1, pk->siglen); memcpy(pk->sig, sbuf_data(sig), pk->siglen); sbuf_delete(sig); return (pk); } static struct sig_cert * parse_cert(int fd) { int my_fd; struct sig_cert *sc; FILE *fp; struct sbuf *buf, *sig, *cert; char *line; size_t linecap; ssize_t linelen; buf = NULL; my_fd = -1; sc = NULL; line = NULL; linecap = 0; if (lseek(fd, 0, 0) == -1) { warn("lseek"); return (NULL); } /* Duplicate the fd so that fclose(3) does not close it. */ if ((my_fd = dup(fd)) == -1) { warnx("dup"); return (NULL); } if ((fp = fdopen(my_fd, "rb")) == NULL) { warn("fdopen"); close(my_fd); return (NULL); } sig = sbuf_new_auto(); cert = sbuf_new_auto(); while ((linelen = getline(&line, &linecap, fp)) > 0) { if (strcmp(line, "SIGNATURE\n") == 0) { buf = sig; continue; } else if (strcmp(line, "CERT\n") == 0) { buf = cert; continue; } else if (strcmp(line, "END\n") == 0) { break; } if (buf != NULL) sbuf_bcat(buf, line, linelen); } fclose(fp); /* Trim out unrelated trailing newline */ sbuf_setpos(sig, sbuf_len(sig) - 1); sbuf_finish(sig); sbuf_finish(cert); sc = calloc(1, sizeof(struct sig_cert)); sc->siglen = sbuf_len(sig); sc->sig = calloc(1, sc->siglen); memcpy(sc->sig, sbuf_data(sig), sc->siglen); sc->certlen = sbuf_len(cert); sc->cert = strdup(sbuf_data(cert)); sbuf_delete(sig); sbuf_delete(cert); return (sc); } static bool verify_pubsignature(int fd_pkg, int fd_sig) { struct pubkey *pk; const char *pubkey; bool ret; pk = NULL; pubkey = NULL; ret = false; if (config_string(PUBKEY, &pubkey) != 0) { warnx("No CONFIG_PUBKEY defined"); goto cleanup; } if ((pk = read_pubkey(fd_sig)) == NULL) { warnx("Error reading signature"); goto cleanup; } /* Verify the signature. */ printf("Verifying signature with public key %s... ", pubkey); if (rsa_verify_cert(fd_pkg, pubkey, NULL, 0, pk->sig, pk->siglen) == false) { fprintf(stderr, "Signature is not valid\n"); goto cleanup; } ret = true; cleanup: if (pk) { free(pk->sig); free(pk); } return (ret); } static bool verify_signature(int fd_pkg, int fd_sig) { struct fingerprint_list *trusted, *revoked; struct fingerprint *fingerprint; struct sig_cert *sc; bool ret; int trusted_count, revoked_count; const char *fingerprints; char path[MAXPATHLEN]; char hash[SHA256_DIGEST_LENGTH * 2 + 1]; sc = NULL; trusted = revoked = NULL; ret = false; /* Read and parse fingerprints. */ if (config_string(FINGERPRINTS, &fingerprints) != 0) { warnx("No CONFIG_FINGERPRINTS defined"); goto cleanup; } snprintf(path, MAXPATHLEN, "%s/trusted", fingerprints); if ((trusted = load_fingerprints(path, &trusted_count)) == NULL) { warnx("Error loading trusted certificates"); goto cleanup; } if (trusted_count == 0 || trusted == NULL) { fprintf(stderr, "No trusted certificates found.\n"); goto cleanup; } snprintf(path, MAXPATHLEN, "%s/revoked", fingerprints); if ((revoked = load_fingerprints(path, &revoked_count)) == NULL) { warnx("Error loading revoked certificates"); goto cleanup; } /* Read certificate and signature in. */ if ((sc = parse_cert(fd_sig)) == NULL) { warnx("Error parsing certificate"); goto cleanup; } /* Explicitly mark as non-trusted until proven otherwise. */ sc->trusted = false; /* Parse signature and pubkey out of the certificate */ sha256_buf(sc->cert, sc->certlen, hash); /* Check if this hash is revoked */ if (revoked != NULL) { STAILQ_FOREACH(fingerprint, revoked, next) { if (strcasecmp(fingerprint->hash, hash) == 0) { fprintf(stderr, "The package was signed with " "revoked certificate %s\n", fingerprint->name); goto cleanup; } } } STAILQ_FOREACH(fingerprint, trusted, next) { if (strcasecmp(fingerprint->hash, hash) == 0) { sc->trusted = true; sc->name = strdup(fingerprint->name); break; } } if (sc->trusted == false) { fprintf(stderr, "No trusted fingerprint found matching " "package's certificate\n"); goto cleanup; } /* Verify the signature. */ printf("Verifying signature with trusted certificate %s... ", sc->name); if (rsa_verify_cert(fd_pkg, NULL, sc->cert, sc->certlen, sc->sig, sc->siglen) == false) { fprintf(stderr, "Signature is not valid\n"); goto cleanup; } ret = true; cleanup: if (trusted) free_fingerprint_list(trusted); if (revoked) free_fingerprint_list(revoked); if (sc) { free(sc->cert); free(sc->sig); free(sc->name); free(sc); } return (ret); } static int bootstrap_pkg(bool force) { int fd_pkg, fd_sig; int ret; char url[MAXPATHLEN]; char tmppkg[MAXPATHLEN]; char tmpsig[MAXPATHLEN]; const char *packagesite; const char *signature_type; char pkgstatic[MAXPATHLEN]; fd_sig = -1; ret = -1; if (config_string(PACKAGESITE, &packagesite) != 0) { warnx("No PACKAGESITE defined"); return (-1); } if (config_string(SIGNATURE_TYPE, &signature_type) != 0) { warnx("Error looking up SIGNATURE_TYPE"); return (-1); } printf("Bootstrapping pkg from %s, please wait...\n", packagesite); /* Support pkg+http:// for PACKAGESITE which is the new format in 1.2 to avoid confusion on why http://pkg.FreeBSD.org has no A record. */ if (strncmp(URL_SCHEME_PREFIX, packagesite, strlen(URL_SCHEME_PREFIX)) == 0) packagesite += strlen(URL_SCHEME_PREFIX); snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz", packagesite); snprintf(tmppkg, MAXPATHLEN, "%s/pkg.txz.XXXXXX", getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP); if ((fd_pkg = fetch_to_fd(url, tmppkg)) == -1) goto fetchfail; if (signature_type != NULL && strcasecmp(signature_type, "NONE") != 0) { if (strcasecmp(signature_type, "FINGERPRINTS") == 0) { snprintf(tmpsig, MAXPATHLEN, "%s/pkg.txz.sig.XXXXXX", getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP); snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz.sig", packagesite); if ((fd_sig = fetch_to_fd(url, tmpsig)) == -1) { fprintf(stderr, "Signature for pkg not " "available.\n"); goto fetchfail; } if (verify_signature(fd_pkg, fd_sig) == false) goto cleanup; } else if (strcasecmp(signature_type, "PUBKEY") == 0) { snprintf(tmpsig, MAXPATHLEN, "%s/pkg.txz.pubkeysig.XXXXXX", getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP); snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz.pubkeysig", packagesite); if ((fd_sig = fetch_to_fd(url, tmpsig)) == -1) { fprintf(stderr, "Signature for pkg not " "available.\n"); goto fetchfail; } if (verify_pubsignature(fd_pkg, fd_sig) == false) goto cleanup; } else { warnx("Signature type %s is not supported for " "bootstrapping.", signature_type); goto cleanup; } } if ((ret = extract_pkg_static(fd_pkg, pkgstatic, MAXPATHLEN)) == 0) ret = install_pkg_static(pkgstatic, tmppkg, force); goto cleanup; fetchfail: warnx("Error fetching %s: %s", url, fetchLastErrString); fprintf(stderr, "A pre-built version of pkg could not be found for " "your system.\n"); fprintf(stderr, "Consider changing PACKAGESITE or installing it from " "ports: 'ports-mgmt/pkg'.\n"); cleanup: if (fd_sig != -1) { close(fd_sig); unlink(tmpsig); } if (fd_pkg != -1) { close(fd_pkg); unlink(tmppkg); } return (ret); } static const char confirmation_message[] = "The package management tool is not yet installed on your system.\n" "Do you want to fetch and install it now? [y/N]: "; static const char non_interactive_message[] = "The package management tool is not yet installed on your system.\n" "Please set ASSUME_ALWAYS_YES=yes environment variable to be able to bootstrap " "in non-interactive (stdin not being a tty)\n"; static int pkg_query_yes_no(void) { int ret, c; c = getchar(); if (c == 'y' || c == 'Y') ret = 1; else ret = 0; while (c != '\n' && c != EOF) c = getchar(); return (ret); } static int bootstrap_pkg_local(const char *pkgpath, bool force) { char path[MAXPATHLEN]; char pkgstatic[MAXPATHLEN]; const char *signature_type; int fd_pkg, fd_sig, ret; fd_sig = -1; ret = -1; fd_pkg = open(pkgpath, O_RDONLY); if (fd_pkg == -1) err(EXIT_FAILURE, "Unable to open %s", pkgpath); if (config_string(SIGNATURE_TYPE, &signature_type) != 0) { warnx("Error looking up SIGNATURE_TYPE"); goto cleanup; } if (signature_type != NULL && strcasecmp(signature_type, "NONE") != 0) { if (strcasecmp(signature_type, "FINGERPRINTS") == 0) { snprintf(path, sizeof(path), "%s.sig", pkgpath); if ((fd_sig = open(path, O_RDONLY)) == -1) { fprintf(stderr, "Signature for pkg not " "available.\n"); goto cleanup; } if (verify_signature(fd_pkg, fd_sig) == false) goto cleanup; } else if (strcasecmp(signature_type, "PUBKEY") == 0) { snprintf(path, sizeof(path), "%s.pubkeysig", pkgpath); if ((fd_sig = open(path, O_RDONLY)) == -1) { fprintf(stderr, "Signature for pkg not " "available.\n"); goto cleanup; } if (verify_pubsignature(fd_pkg, fd_sig) == false) goto cleanup; } else { warnx("Signature type %s is not supported for " "bootstrapping.", signature_type); goto cleanup; } } if ((ret = extract_pkg_static(fd_pkg, pkgstatic, MAXPATHLEN)) == 0) ret = install_pkg_static(pkgstatic, pkgpath, force); cleanup: close(fd_pkg); if (fd_sig != -1) close(fd_sig); return (ret); } int main(int argc, char *argv[]) { char pkgpath[MAXPATHLEN]; const char *pkgarg; bool bootstrap_only, force, yes; bootstrap_only = false; force = false; pkgarg = NULL; yes = false; snprintf(pkgpath, MAXPATHLEN, "%s/sbin/pkg", getenv("LOCALBASE") ? getenv("LOCALBASE") : _LOCALBASE); if (argc > 1 && strcmp(argv[1], "bootstrap") == 0) { bootstrap_only = true; if (argc == 3 && strcmp(argv[2], "-f") == 0) force = true; } if ((bootstrap_only && force) || access(pkgpath, X_OK) == -1) { /* * To allow 'pkg -N' to be used as a reliable test for whether * a system is configured to use pkg, don't bootstrap pkg * when that argument is given as argv[1]. */ if (argv[1] != NULL && strcmp(argv[1], "-N") == 0) errx(EXIT_FAILURE, "pkg is not installed"); config_init(); if (argc > 1 && strcmp(argv[1], "add") == 0) { if (argc > 2 && strcmp(argv[2], "-f") == 0) { force = true; pkgarg = argv[3]; } else pkgarg = argv[2]; if (pkgarg == NULL) { fprintf(stderr, "Path to pkg.txz required\n"); exit(EXIT_FAILURE); } if (access(pkgarg, R_OK) == -1) { fprintf(stderr, "No such file: %s\n", pkgarg); exit(EXIT_FAILURE); } if (bootstrap_pkg_local(pkgarg, force) != 0) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); } /* * Do not ask for confirmation if either of stdin or stdout is * not tty. Check the environment to see if user has answer * tucked in there already. */ config_bool(ASSUME_ALWAYS_YES, &yes); if (!yes) { if (!isatty(fileno(stdin))) { fprintf(stderr, non_interactive_message); exit(EXIT_FAILURE); } printf("%s", confirmation_message); if (pkg_query_yes_no() == 0) exit(EXIT_FAILURE); } if (bootstrap_pkg(force) != 0) exit(EXIT_FAILURE); config_finish(); if (bootstrap_only) exit(EXIT_SUCCESS); } else if (bootstrap_only) { printf("pkg already bootstrapped at %s\n", pkgpath); exit(EXIT_SUCCESS); } execv(pkgpath, argv); /* NOT REACHED */ return (EXIT_FAILURE); } Index: user/alc/PQ_LAUNDRY/usr.sbin/pw/pw.h =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/pw/pw.h (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/pw/pw.h (revision 303642) @@ -1,107 +1,106 @@ /*- * Copyright (C) 1996 * David L. Nugent. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY DAVID L. NUGENT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL DAVID L. NUGENT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include -#define _WITH_GETLINE #include #include #include #include "pwupd.h" enum _mode { M_ADD, M_DELETE, M_UPDATE, M_PRINT, M_NEXT, M_LOCK, M_UNLOCK, M_NUM }; enum _which { W_USER, W_GROUP, W_NUM }; #define _DEF_DIRMODE (S_IRWXU | S_IRWXG | S_IRWXO) #define _PATH_PW_CONF "/etc/pw.conf" #define _UC_MAXLINE 1024 #define _UC_MAXSHELLS 32 struct userconf *get_userconfig(const char *cfg); struct userconf *read_userconfig(char const * file); int write_userconfig(struct userconf *cnf, char const * file); int pw_group_add(int argc, char **argv, char *name); int pw_group_del(int argc, char **argv, char *name); int pw_group_mod(int argc, char **argv, char *name); int pw_group_next(int argc, char **argv, char *name); int pw_group_show(int argc, char **argv, char *name); int pw_user_add(int argc, char **argv, char *name); int pw_user_add(int argc, char **argv, char *name); int pw_user_add(int argc, char **argv, char *name); int pw_user_add(int argc, char **argv, char *name); int pw_user_del(int argc, char **argv, char *name); int pw_user_lock(int argc, char **argv, char *name); int pw_user_mod(int argc, char **argv, char *name); int pw_user_next(int argc, char **argv, char *name); int pw_user_show(int argc, char **argv, char *name); int pw_user_unlock(int argc, char **argv, char *name); int pw_groupnext(struct userconf *cnf, bool quiet); char *pw_checkname(char *name, int gecos); uintmax_t pw_checkid(char *nptr, uintmax_t maxval); int pw_checkfd(char *nptr); int addnispwent(const char *path, struct passwd *pwd); int delnispwent(const char *path, const char *login); int chgnispwent(const char *path, const char *login, struct passwd *pwd); int groupadd(struct userconf *, char *name, gid_t id, char *members, int fd, bool dryrun, bool pretty, bool precrypted); int nis_update(void); int boolean_val(char const * str, int dflt); int passwd_val(char const * str, int dflt); char const *boolean_str(int val); char *newstr(char const * p); void pw_log(struct userconf * cnf, int mode, int which, char const * fmt,...) __printflike(4, 5); char *pw_pwcrypt(char *password); extern const char *Modes[]; extern const char *Which[]; uintmax_t strtounum(const char * __restrict, uintmax_t, uintmax_t, const char ** __restrict); Index: user/alc/PQ_LAUNDRY/usr.sbin/pw/pw_vpw.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/pw/pw_vpw.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/pw/pw_vpw.c (revision 303642) @@ -1,204 +1,200 @@ /*- * Copyright (C) 1996 * David L. Nugent. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY DAVID L. NUGENT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL DAVID L. NUGENT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include -#define _WITH_GETLINE #include #include #include #include #include "pwupd.h" static FILE * pwd_fp = NULL; void vendpwent(void) { if (pwd_fp != NULL) { fclose(pwd_fp); pwd_fp = NULL; } } void vsetpwent(void) { vendpwent(); } static struct passwd * vnextpwent(char const *nam, uid_t uid, int doclose) { struct passwd *pw; char *line; size_t linecap; ssize_t linelen; pw = NULL; line = NULL; linecap = 0; if (pwd_fp != NULL || (pwd_fp = fopen(getpwpath(_MASTERPASSWD), "r")) != NULL) { while ((linelen = getline(&line, &linecap, pwd_fp)) > 0) { /* Skip comments and empty lines */ if (*line == '\n' || *line == '#') continue; /* trim latest \n */ if (line[linelen - 1 ] == '\n') line[linelen - 1] = '\0'; pw = pw_scan(line, PWSCAN_MASTER); if (pw == NULL) errx(EXIT_FAILURE, "Invalid user entry in '%s':" " '%s'", getpwpath(_MASTERPASSWD), line); if (uid != (uid_t)-1) { if (uid == pw->pw_uid) break; } else if (nam != NULL) { if (strcmp(nam, pw->pw_name) == 0) break; } else break; free(pw); pw = NULL; } if (doclose) vendpwent(); } free(line); return (pw); } struct passwd * vgetpwent(void) { return vnextpwent(NULL, -1, 0); } struct passwd * vgetpwuid(uid_t uid) { return vnextpwent(NULL, uid, 1); } struct passwd * vgetpwnam(const char * nam) { return vnextpwent(nam, -1, 1); } static FILE * grp_fp = NULL; void vendgrent(void) { if (grp_fp != NULL) { fclose(grp_fp); grp_fp = NULL; } } -RET_SETGRENT +void vsetgrent(void) { vendgrent(); -#if defined(__FreeBSD__) - return 0; -#endif } static struct group * vnextgrent(char const *nam, gid_t gid, int doclose) { struct group *gr; char *line; size_t linecap; ssize_t linelen; gr = NULL; line = NULL; linecap = 0; if (grp_fp != NULL || (grp_fp = fopen(getgrpath(_GROUP), "r")) != NULL) { while ((linelen = getline(&line, &linecap, grp_fp)) > 0) { /* Skip comments and empty lines */ if (*line == '\n' || *line == '#') continue; /* trim latest \n */ if (line[linelen - 1 ] == '\n') line[linelen - 1] = '\0'; gr = gr_scan(line); if (gr == NULL) errx(EXIT_FAILURE, "Invalid group entry in '%s':" " '%s'", getgrpath(_GROUP), line); if (gid != (gid_t)-1) { if (gid == gr->gr_gid) break; } else if (nam != NULL) { if (strcmp(nam, gr->gr_name) == 0) break; } else break; free(gr); gr = NULL; } if (doclose) vendgrent(); } free(line); return (gr); } struct group * vgetgrent(void) { return vnextgrent(NULL, -1, 0); } struct group * vgetgrgid(gid_t gid) { return vnextgrent(NULL, gid, 1); } struct group * vgetgrnam(const char * nam) { return vnextgrent(nam, -1, 1); } Index: user/alc/PQ_LAUNDRY/usr.sbin/pw/pwupd.h =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/pw/pwupd.h (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/pw/pwupd.h (revision 303642) @@ -1,152 +1,146 @@ /*- * Copyright (C) 1996 * David L. Nugent. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY DAVID L. NUGENT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL DAVID L. NUGENT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PWUPD_H_ #define _PWUPD_H_ #include #include #include #include #include #include #include -#if defined(__FreeBSD__) -#define RET_SETGRENT int -#else -#define RET_SETGRENT void -#endif - struct pwf { int _altdir; void (*_setpwent)(void); void (*_endpwent)(void); struct passwd * (*_getpwent)(void); struct passwd * (*_getpwuid)(uid_t uid); struct passwd * (*_getpwnam)(const char * nam); - RET_SETGRENT (*_setgrent)(void); + void (*_setgrent)(void); void (*_endgrent)(void); struct group * (*_getgrent)(void); struct group * (*_getgrgid)(gid_t gid); struct group * (*_getgrnam)(const char * nam); }; struct userconf { int default_password; /* Default password for new users? */ int reuse_uids; /* Reuse uids? */ int reuse_gids; /* Reuse gids? */ char *nispasswd; /* Path to NIS version of the passwd file */ char *dotdir; /* Where to obtain skeleton files */ char *newmail; /* Mail to send to new accounts */ char *logfile; /* Where to log changes */ char *home; /* Where to create home directory */ mode_t homemode; /* Home directory permissions */ char *shelldir; /* Where shells are located */ char **shells; /* List of shells */ char *shell_default; /* Default shell */ char *default_group; /* Default group number */ StringList *groups; /* Default (additional) groups */ char *default_class; /* Default user class */ uid_t min_uid, max_uid; /* Allowed range of uids */ gid_t min_gid, max_gid; /* Allowed range of gids */ time_t expire_days; /* Days to expiry */ time_t password_days; /* Days to password expiry */ }; struct pwconf { char rootdir[MAXPATHLEN]; char etcpath[MAXPATHLEN]; int fd; int rootfd; bool checkduplicate; }; extern struct pwf PWF; extern struct pwf VPWF; extern struct pwconf conf; #define SETPWENT() PWF._setpwent() #define ENDPWENT() PWF._endpwent() #define GETPWENT() PWF._getpwent() #define GETPWUID(uid) PWF._getpwuid(uid) #define GETPWNAM(nam) PWF._getpwnam(nam) #define SETGRENT() PWF._setgrent() #define ENDGRENT() PWF._endgrent() #define GETGRENT() PWF._getgrent() #define GETGRGID(gid) PWF._getgrgid(gid) #define GETGRNAM(nam) PWF._getgrnam(nam) #define PWF_REGULAR 0 #define PWF_ALT 1 #define PWF_ROOTDIR 2 #define PWALTDIR() PWF._altdir #ifndef _PATH_PWD #define _PATH_PWD "/etc" #endif #ifndef _GROUP #define _GROUP "group" #endif #ifndef _MASTERPASSWD #define _MASTERPASSWD "master.passwd" #endif __BEGIN_DECLS int addpwent(struct passwd * pwd); int delpwent(struct passwd * pwd); int chgpwent(char const * login, struct passwd * pwd); char * getpwpath(char const * file); int addgrent(struct group * grp); int delgrent(struct group * grp); int chggrent(char const * name, struct group * grp); char * getgrpath(const char *file); void vsetpwent(void); void vendpwent(void); struct passwd * vgetpwent(void); struct passwd * vgetpwuid(uid_t uid); struct passwd * vgetpwnam(const char * nam); struct group * vgetgrent(void); struct group * vgetgrgid(gid_t gid); struct group * vgetgrnam(const char * nam); -RET_SETGRENT vsetgrent(void); +void vsetgrent(void); void vendgrent(void); void copymkdir(int rootfd, char const * dir, int skelfd, mode_t mode, uid_t uid, gid_t gid, int flags); void rm_r(int rootfd, char const * dir, uid_t uid); __END_DECLS #endif /* !_PWUPD_H */ Index: user/alc/PQ_LAUNDRY/usr.sbin/services_mkdb/services_mkdb.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/services_mkdb/services_mkdb.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/services_mkdb/services_mkdb.c (revision 303642) @@ -1,463 +1,462 @@ /* $NetBSD: services_mkdb.c,v 1.14 2008/04/28 20:24:17 martin Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Luke Mewburn and Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include -#define _WITH_GETLINE #include #include #include #include #include #include #include #include #include "extern.h" static char tname[MAXPATHLEN]; #define PMASK 0xffff #define PROTOMAX 5 static void add(DB *, StringList *, size_t, const char *, size_t *, int); static StringList ***parseservices(const char *, StringList *); static void cleanup(void); static void store(DB *, DBT *, DBT *, int); static void killproto(DBT *); static char *getstring(const char *, size_t, char **, const char *); static size_t getprotoindex(StringList *, const char *); static const char *getprotostr(StringList *, size_t); static const char *mkaliases(StringList *, char *, size_t); static void usage(void); HASHINFO hinfo = { .bsize = 256, .ffactor = 4, .nelem = 32768, .cachesize = 1024, .hash = NULL, .lorder = 0 }; int main(int argc, char *argv[]) { DB *db; int ch; const char *fname = _PATH_SERVICES; const char *dbname = _PATH_SERVICES_DB; int warndup = 1; int unique = 0; int otherflag = 0; int byteorder = 0; size_t cnt = 0; StringList *sl, ***svc; size_t port, proto; char *dbname_dir, *dbname_dirbuf; int dbname_dir_fd = -1; setprogname(argv[0]); while ((ch = getopt(argc, argv, "blo:qu")) != -1) switch (ch) { case 'b': case 'l': if (byteorder != 0) usage(); byteorder = ch == 'b' ? 4321 : 1234; break; case 'q': otherflag = 1; warndup = 0; break; case 'o': otherflag = 1; dbname = optarg; break; case 'u': unique++; break; case '?': default: usage(); } argc -= optind; argv += optind; if (argc > 1 || (unique && otherflag)) usage(); if (argc == 1) fname = argv[0]; /* Set byte order. */ hinfo.lorder = byteorder; if (unique) uniq(fname); svc = parseservices(fname, sl = sl_init()); if (atexit(cleanup)) err(1, "Cannot install exit handler"); (void)snprintf(tname, sizeof(tname), "%s.tmp", dbname); db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL, (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH), DB_HASH, &hinfo); if (!db) err(1, "Error opening temporary database `%s'", tname); for (port = 0; port < PMASK + 1; port++) { if (svc[port] == NULL) continue; for (proto = 0; proto < PROTOMAX; proto++) { StringList *s; if ((s = svc[port][proto]) == NULL) continue; add(db, s, port, getprotostr(sl, proto), &cnt, warndup); } free(svc[port]); } free(svc); sl_free(sl, 1); if ((db->close)(db)) err(1, "Error closing temporary database `%s'", tname); /* * Make sure file is safe on disk. To improve performance we will call * fsync() to the directory where file lies */ if (rename(tname, dbname) == -1 || (dbname_dirbuf = strdup(dbname)) == NULL || (dbname_dir = dirname(dbname_dirbuf)) == NULL || (dbname_dir_fd = open(dbname_dir, O_RDONLY|O_DIRECTORY)) == -1 || fsync(dbname_dir_fd) != 0) { if (dbname_dir_fd != -1) close(dbname_dir_fd); err(1, "Cannot rename `%s' to `%s'", tname, dbname); } if (dbname_dir_fd != -1) close(dbname_dir_fd); return 0; } static void add(DB *db, StringList *sl, size_t port, const char *proto, size_t *cnt, int warndup) { size_t i; char keyb[BUFSIZ], datab[BUFSIZ], abuf[BUFSIZ]; DBT data, key; key.data = keyb; data.data = datab; #ifdef DEBUG (void)printf("add %s %zu %s [ ", sl->sl_str[0], port, proto); for (i = 1; i < sl->sl_cur; i++) (void)printf("%s ", sl->sl_str[i]); (void)printf("]\n"); #endif /* key `indirect key', data `full line' */ data.size = snprintf(datab, sizeof(datab), "%zu", (*cnt)++) + 1; key.size = snprintf(keyb, sizeof(keyb), "%s %zu/%s %s", sl->sl_str[0], port, proto, mkaliases(sl, abuf, sizeof(abuf))) + 1; store(db, &data, &key, warndup); /* key `\377port/proto', data = `indirect key' */ key.size = snprintf(keyb, sizeof(keyb), "\377%zu/%s", port, proto) + 1; store(db, &key, &data, warndup); /* key `\377port', data = `indirect key' */ killproto(&key); store(db, &key, &data, warndup); /* add references for service and all aliases */ for (i = 0; i < sl->sl_cur; i++) { /* key `\376service/proto', data = `indirect key' */ key.size = snprintf(keyb, sizeof(keyb), "\376%s/%s", sl->sl_str[i], proto) + 1; store(db, &key, &data, warndup); /* key `\376service', data = `indirect key' */ killproto(&key); store(db, &key, &data, warndup); } sl_free(sl, 1); } static StringList *** parseservices(const char *fname, StringList *sl) { ssize_t len; size_t linecap, line, pindex; FILE *fp; StringList ***svc, *s; char *p, *ep; if ((fp = fopen(fname, "r")) == NULL) err(1, "Cannot open `%s'", fname); line = linecap = 0; if ((svc = calloc(PMASK + 1, sizeof(StringList **))) == NULL) err(1, "Cannot allocate %zu bytes", (size_t)(PMASK + 1)); p = NULL; while ((len = getline(&p, &linecap, fp)) != -1) { char *name, *port, *proto, *aliases, *cp, *alias; unsigned long pnum; line++; if (len == 0) continue; if (p[len - 1] == '\n') p[len - 1] = '\0'; for (cp = p; *cp && isspace((unsigned char)*cp); cp++) continue; if (*cp == '\0' || *cp == '#') continue; if ((name = getstring(fname, line, &cp, "name")) == NULL) continue; if ((port = getstring(fname, line, &cp, "port")) == NULL) continue; if (cp) { for (aliases = cp; *cp && *cp != '#'; cp++) continue; if (*cp) *cp = '\0'; } else aliases = NULL; proto = strchr(port, '/'); if (proto == NULL || proto[1] == '\0') { warnx("%s, %zu: no protocol found", fname, line); continue; } *proto++ = '\0'; errno = 0; pnum = strtoul(port, &ep, 0); if (*port == '\0' || *ep != '\0') { warnx("%s, %zu: invalid port `%s'", fname, line, port); continue; } if ((errno == ERANGE && pnum == ULONG_MAX) || pnum > PMASK) { warnx("%s, %zu: port too big `%s'", fname, line, port); continue; } if (svc[pnum] == NULL) { svc[pnum] = calloc(PROTOMAX, sizeof(StringList *)); if (svc[pnum] == NULL) err(1, "Cannot allocate %zu bytes", (size_t)PROTOMAX); } pindex = getprotoindex(sl, proto); if (svc[pnum][pindex] == NULL) s = svc[pnum][pindex] = sl_init(); else s = svc[pnum][pindex]; /* build list of aliases */ if (sl_find(s, name) == NULL) { char *p2; if ((p2 = strdup(name)) == NULL) err(1, "Cannot copy string"); (void)sl_add(s, p2); } if (aliases) { while ((alias = strsep(&aliases, " \t")) != NULL) { if (alias[0] == '\0') continue; if (sl_find(s, alias) == NULL) { char *p2; if ((p2 = strdup(alias)) == NULL) err(1, "Cannot copy string"); (void)sl_add(s, p2); } } } } (void)fclose(fp); return svc; } /* * cleanup(): Remove temporary files upon exit */ static void cleanup(void) { if (tname[0]) (void)unlink(tname); } static char * getstring(const char *fname, size_t line, char **cp, const char *tag) { char *str; while ((str = strsep(cp, " \t")) != NULL && *str == '\0') continue; if (str == NULL) warnx("%s, %zu: no %s found", fname, line, tag); return str; } static void killproto(DBT *key) { char *p, *d = key->data; if ((p = strchr(d, '/')) == NULL) abort(); *p++ = '\0'; key->size = p - d; } static void store(DB *db, DBT *key, DBT *data, int warndup) { #ifdef DEBUG int k = key->size - 1; int d = data->size - 1; (void)printf("store [%*.*s] [%*.*s]\n", k, k, (char *)key->data + 1, d, d, (char *)data->data + 1); #endif switch ((db->put)(db, key, data, R_NOOVERWRITE)) { case 0: break; case 1: if (warndup) warnx("duplicate service `%s'", &((char *)key->data)[1]); break; case -1: err(1, "put"); break; default: abort(); break; } } static size_t getprotoindex(StringList *sl, const char *str) { size_t i; char *p; for (i= 0; i < sl->sl_cur; i++) if (strcmp(sl->sl_str[i], str) == 0) return i; if (i == PROTOMAX) errx(1, "Ran out of protocols adding `%s';" " recompile with larger PROTOMAX", str); if ((p = strdup(str)) == NULL) err(1, "Cannot copy string"); (void)sl_add(sl, p); return i; } static const char * getprotostr(StringList *sl, size_t i) { assert(i < sl->sl_cur); return sl->sl_str[i]; } static const char * mkaliases(StringList *sl, char *buf, size_t len) { size_t nc, i, pos; buf[0] = 0; for (i = 1, pos = 0; i < sl->sl_cur; i++) { nc = strlcpy(buf + pos, sl->sl_str[i], len); if (nc >= len) goto out; pos += nc; len -= nc; nc = strlcpy(buf + pos, " ", len); if (nc >= len) goto out; pos += nc; len -= nc; } return buf; out: warn("aliases for `%s' truncated", sl->sl_str[0]); return buf; } static void usage(void) { (void)fprintf(stderr, "Usage:\t%s [-b | -l] [-q] [-o ] []\n" "\t%s -u []\n", getprogname(), getprogname()); exit(1); } Index: user/alc/PQ_LAUNDRY/usr.sbin/uathload/uathload.c =================================================================== --- user/alc/PQ_LAUNDRY/usr.sbin/uathload/uathload.c (revision 303641) +++ user/alc/PQ_LAUNDRY/usr.sbin/uathload/uathload.c (revision 303642) @@ -1,241 +1,241 @@ /*- * Copyright (c) 2006 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ /* * Atheros AR5523 USB Station Firmware downloader. * * uathload -d ugen-device [firmware-file] * * Intended to be called from devd on device discovery. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* all fields are big endian */ struct uath_fwmsg { uint32_t flags; #define UATH_WRITE_BLOCK (1 << 4) uint32_t len; #define UATH_MAX_FWBLOCK_SIZE 2048 uint32_t total; uint32_t remain; uint32_t rxtotal; uint32_t pad[123]; } __packed; #define UATH_DATA_TIMEOUT 10000 #define UATH_CMD_TIMEOUT 1000 #define VERBOSE(_fmt, ...) do { \ if (verbose) { \ printf(_fmt, __VA_ARGS__); \ fflush(stdout); \ } \ } while (0) extern uint8_t _binary_ar5523_bin_start; extern uint8_t _binary_ar5523_bin_end; static int -getdevname(const char *devname, char *msgdev, char *datadev) +getdevname(const char *udevname, char *msgdev, char *datadev) { char *bn, *bnbuf, *dn, *dnbuf; - dnbuf = strdup(devname); + dnbuf = strdup(udevname); if (dnbuf == NULL) return (-1); dn = dirname(dnbuf); - bnbuf = strdup(devname); + bnbuf = strdup(udevname); if (bnbuf == NULL) { free(dnbuf); return (-1); } bn = basename(bnbuf); if (strncmp(bn, "ugen", 4) != 0) { free(dnbuf); free(bnbuf); return (-1); } bn += 4; /* NB: pipes are hardcoded */ snprintf(msgdev, 256, "%s/usb/%s.1", dn, bn); snprintf(datadev, 256, "%s/usb/%s.2", dn, bn); free(dnbuf); free(bnbuf); return (0); } static void usage(void) { errx(-1, "usage: uathload [-v] -d devname [firmware]"); } int main(int argc, char *argv[]) { - const char *fwname, *devname; + const char *fwname, *udevname; char msgdev[256], datadev[256]; struct uath_fwmsg txmsg, rxmsg; char *txdata; struct stat sb; int msg, data, fw, timeout, b, c; int bufsize = 512, verbose = 0; ssize_t len; - devname = NULL; + udevname = NULL; while ((c = getopt(argc, argv, "d:v")) != -1) { switch (c) { case 'd': - devname = optarg; + udevname = optarg; break; case 'v': verbose = 1; break; default: usage(); /*NOTREACHED*/ } } argc -= optind; argv += optind; - if (devname == NULL) + if (udevname == NULL) errx(-1, "No device name; use -d to specify the ugen device"); if (argc > 1) usage(); if (argc == 1) fwname = argv[0]; else fwname = _PATH_FIRMWARE "/ar5523.bin"; fw = open(fwname, O_RDONLY, 0); if (fw < 0) err(-1, "open(%s)", fwname); if (fstat(fw, &sb) < 0) err(-1, "fstat(%s)", fwname); txdata = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fw, 0); if (txdata == MAP_FAILED) err(-1, "mmap(%s)", fwname); len = sb.st_size; /* XXX verify device is an AR5005 part */ - if (getdevname(devname, msgdev, datadev)) + if (getdevname(udevname, msgdev, datadev)) err(-1, "getdevname error"); msg = open(msgdev, O_RDWR, 0); if (msg < 0) err(-1, "open(%s)", msgdev); timeout = UATH_DATA_TIMEOUT; if (ioctl(msg, USB_SET_RX_TIMEOUT, &timeout) < 0) err(-1, "%s: USB_SET_RX_TIMEOUT(%u)", msgdev, UATH_DATA_TIMEOUT); if (ioctl(msg, USB_SET_RX_BUFFER_SIZE, &bufsize) < 0) err(-1, "%s: USB_SET_RX_BUFFER_SIZE(%u)", msgdev, bufsize); data = open(datadev, O_WRONLY, 0); if (data < 0) err(-1, "open(%s)", datadev); timeout = UATH_DATA_TIMEOUT; if (ioctl(data, USB_SET_TX_TIMEOUT, &timeout) < 0) err(-1, "%s: USB_SET_TX_TIMEOUT(%u)", datadev, UATH_DATA_TIMEOUT); - VERBOSE("Load firmware %s to %s\n", fwname, devname); + VERBOSE("Load firmware %s to %s\n", fwname, udevname); bzero(&txmsg, sizeof (struct uath_fwmsg)); txmsg.flags = htobe32(UATH_WRITE_BLOCK); txmsg.total = htobe32(len); b = 0; while (len > 0) { int mlen; mlen = len; if (mlen > UATH_MAX_FWBLOCK_SIZE) mlen = UATH_MAX_FWBLOCK_SIZE; txmsg.remain = htobe32(len - mlen); txmsg.len = htobe32(mlen); /* send firmware block meta-data */ VERBOSE("send block %2u: %zd bytes remaining", b, len - mlen); if (write(msg, &txmsg, sizeof(txmsg)) != sizeof(txmsg)) { VERBOSE("%s", "\n"); err(-1, "error sending msg (%s)", msgdev); break; } /* send firmware block data */ VERBOSE("%s", "\n : data..."); if (write(data, txdata, mlen) != mlen) { VERBOSE("%s", "\n"); err(-1, "error sending data (%s)", datadev); break; } /* wait for ack from firmware */ VERBOSE("%s", "\n : wait for ack..."); bzero(&rxmsg, sizeof(rxmsg)); if (read(msg, &rxmsg, sizeof(rxmsg)) != sizeof(rxmsg)) { VERBOSE("%s", "\n"); err(-1, "error reading msg (%s)", msgdev); break; } VERBOSE("flags=0x%x total=%d\n", be32toh(rxmsg.flags), be32toh(rxmsg.rxtotal)); len -= mlen; txdata += mlen; b++; } sleep(1); close(fw); close(msg); close(data); return 0; } Index: user/alc/PQ_LAUNDRY =================================================================== --- user/alc/PQ_LAUNDRY (revision 303641) +++ user/alc/PQ_LAUNDRY (revision 303642) Property changes on: user/alc/PQ_LAUNDRY ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r303517-303641