Index: projects/release-pkg/Makefile.inc1 =================================================================== --- projects/release-pkg/Makefile.inc1 (revision 293224) +++ projects/release-pkg/Makefile.inc1 (revision 293225) @@ -1,2525 +1,2541 @@ # # $FreeBSD$ # # Make command line options: # -DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir # -DNO_CLEAN do not clean at all # -DDB_FROM_SRC use the user/group databases in src/etc instead of # the system database when installing. # -DNO_SHARE do not go into share subdir # -DKERNFAST define NO_KERNEL{CONFIG,CLEAN,DEPEND,OBJ} # -DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel # -DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel # -DNO_KERNELDEPEND do not run ${MAKE} depend in ${MAKE} buildkernel # -DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel # -DNO_PORTSUPDATE do not update ports in ${MAKE} update # -DNO_ROOT install without using root privilege # -DNO_DOCUPDATE do not update doc in ${MAKE} update # -DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects # LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list # LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list # LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target # LOCAL_MTREE="list of mtree files" to process to allow local directories # to be created before files are installed # LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools # list # METALOG="path to metadata log" to write permission and ownership # when NO_ROOT is set. (default: ${DESTDIR}/METALOG) # TARGET="machine" to crossbuild world for a different machine type # TARGET_ARCH= may be required when a TARGET supports multiple endians # BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL}) # WORLD_FLAGS= additional flags to pass to make(1) during buildworld # KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel # SUBDIR_OVERRIDE="list of dirs" to build rather than everything. # All libraries and includes, and some build tools will still build. # # The intended user-driven targets are: # buildworld - rebuild *everything*, including glue to help do upgrades # installworld- install everything built by "buildworld" # doxygen - build API documentation of the kernel # update - convenient way to update your source tree (eg: svn/svnup) # # Standard targets (not defined here) are documented in the makefiles in # /usr/share/mk. These include: # obj depend all install clean cleandepend cleanobj .if !defined(TARGET) || !defined(TARGET_ARCH) .error "Both TARGET and TARGET_ARCH must be defined." .endif LOCALBASE?= /usr/local # Cross toolchain changes must be in effect before bsd.compiler.mk # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes. .if defined(CROSS_TOOLCHAIN) .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk" CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}" .endif .include # don't depend on src.opts.mk doing it .include "share/mk/src.opts.mk" # We must do lib/ and libexec/ before bin/ in case of a mid-install error to # keep the users system reasonably usable. For static->dynamic root upgrades, # we don't want to install a dynamic binary without rtld and the needed # libraries. More commonly, for dynamic root, we don't want to install a # binary that requires a newer library version that hasn't been installed yet. # This ordering is not a guarantee though. The only guarantee of a working # system here would require fine-grained ordering of all components based # on their dependencies. SRCDIR?= ${.CURDIR} .if !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .else SUBDIR= lib libexec .if make(install*) # Ensure libraries are installed before progressing. SUBDIR+=.WAIT .endif SUBDIR+=bin .if ${MK_CDDL} != "no" SUBDIR+=cddl .endif SUBDIR+=gnu include .if ${MK_KERBEROS} != "no" SUBDIR+=kerberos5 .endif .if ${MK_RESCUE} != "no" SUBDIR+=rescue .endif SUBDIR+=sbin .if ${MK_CRYPT} != "no" SUBDIR+=secure .endif .if !defined(NO_SHARE) SUBDIR+=share .endif SUBDIR+=sys usr.bin usr.sbin .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .if ${MK_OFED} != "no" SUBDIR+=contrib/ofed .endif # Local directories are last, since it is nice to at least get the base # system rebuilt before you do them. .for _DIR in ${LOCAL_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR # of a LOCAL_DIRS directory. This allows LOCAL_DIRS=foo and # LOCAL_LIB_DIRS=foo/lib to behave as expected. .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|} _REDUNDENT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} .endfor .for _DIR in ${LOCAL_LIB_DIRS} .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .else .warning ${_DIR} not added to SUBDIR list. See UPDATING 20141121. .endif .endfor # We must do etc/ last as it hooks into building the man whatis file # by calling 'makedb' in share/man. This is only relevant for # install/distribute so they build the whatis file after every manpage is # installed. .if make(install*) SUBDIR+=.WAIT .endif SUBDIR+=etc .endif # !empty(SUBDIR_OVERRIDE) .if defined(NOCLEAN) .warning NOCLEAN option is deprecated. Use NO_CLEAN instead. NO_CLEAN= ${NOCLEAN} .endif .if defined(NO_CLEANDIR) CLEANDIR= clean cleandepend .else CLEANDIR= cleandir .endif LOCAL_TOOL_DIRS?= PACKAGEDIR?= ${DESTDIR}/${DISTDIR} .if empty(SHELL:M*csh*) BUILDENV_SHELL?=${SHELL} .else BUILDENV_SHELL?=/bin/sh .endif SVN?= /usr/local/bin/svn SVNFLAGS?= -r HEAD MAKEOBJDIRPREFIX?= /usr/obj .if !defined(OSRELDATE) .if exists(/usr/include/osreldate.h) OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ /usr/include/osreldate.h .else OSRELDATE= 0 .endif .export OSRELDATE .endif # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION. .if !defined(VERSION) && !make(showconfig) REVISION!= ${MAKE} -C ${SRCDIR}/release -V REVISION BRANCH!= ${MAKE} -C ${SRCDIR}/release -V BRANCH SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h VERSION= FreeBSD ${REVISION}-${BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE} .export VERSION .endif .if !defined(PKG_VERSION) REVISION!= ${MAKE} -C ${SRCDIR}/release -V REVISION BRANCH!= ${MAKE} -C ${SRCDIR}/release -V BRANCH SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h .if ${BRANCH:MSTABLE*} || ${BRANCH:MCURRENT*} TIMENOW= %Y%m%d%H%M%S EXTRA_REVISION= .s${TIMENOW:gmtime} .endif .if ${BRANCH:M*-p*} EXTRA_REVISION= _${BRANCH:C/.*-p([0-9]+$)/\1/} .endif PKG_VERSION= ${REVISION}${EXTRA_REVISION} .endif KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ arm \ armeb/arm \ armv6/arm \ armv6hf/arm \ i386 \ i386/pc98 \ mips \ mipsel/mips \ mips64el/mips \ mips64/mips \ mipsn32el/mips \ mipsn32/mips \ powerpc \ powerpc64/powerpc \ riscv64/riscv \ sparc64 .if ${TARGET} == ${TARGET_ARCH} _t= ${TARGET} .else _t= ${TARGET_ARCH}/${TARGET} .endif .for _t in ${_t} .if empty(KNOWN_ARCHES:M${_t}) .error Unknown target ${TARGET_ARCH}:${TARGET}. .endif .endfor .if ${TARGET} == ${MACHINE} TARGET_CPUTYPE?=${CPUTYPE} .else TARGET_CPUTYPE?= .endif .if !empty(TARGET_CPUTYPE) _TARGET_CPUTYPE=${TARGET_CPUTYPE} .else _TARGET_CPUTYPE=dummy .endif # Skip for showconfig as it is just wasted time and may invoke auto.obj.mk. .if !make(showconfig) _CPUTYPE!= MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \ -f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE .if ${_CPUTYPE} != ${_TARGET_CPUTYPE} .error CPUTYPE global should be set with ?=. .endif .endif .if make(buildworld) BUILD_ARCH!= uname -p .if ${MACHINE_ARCH} != ${BUILD_ARCH} .error To cross-build, set TARGET_ARCH. .endif .endif .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING) OBJTREE= ${MAKEOBJDIRPREFIX} .else OBJTREE= ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH} .endif WORLDTMP= ${OBJTREE}${.CURDIR}/tmp BPATH= ${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin XPATH= ${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin STRICTTMPPATH= ${BPATH}:${XPATH} TMPPATH= ${STRICTTMPPATH}:${PATH} # # Avoid running mktemp(1) unless actually needed. # It may not be functional, e.g., due to new ABI # when in the middle of installing over this system. # .if make(distributeworld) || make(installworld) || make(stageworld) INSTALLTMP!= /usr/bin/mktemp -d -u -t install .endif # # Building a world goes through the following stages # # 1. legacy stage [BMAKE] # This stage is responsible for creating compatibility # shims that are needed by the bootstrap-tools, # build-tools and cross-tools stages. These are generally # APIs that tools from one of those three stages need to # build that aren't present on the host. # 1. bootstrap-tools stage [BMAKE] # This stage is responsible for creating programs that # are needed for backward compatibility reasons. They # are not built as cross-tools. # 2. build-tools stage [TMAKE] # This stage is responsible for creating the object # tree and building any tools that are needed during # the build process. Some programs are listed during # this phase because they build binaries to generate # files needed to build these programs. This stage also # builds the 'build-tools' target rather than 'all'. # 3. cross-tools stage [XMAKE] # This stage is responsible for creating any tools that # are needed for building the system. A cross-compiler is one # of them. This differs from build tools in two ways: # 1. the 'all' target is built rather than 'build-tools' # 2. these tools are installed into TMPPATH for stage 4. # 4. world stage [WMAKE] # This stage actually builds the world. # 5. install stage (optional) [IMAKE] # This stage installs a previously built world. # BOOTSTRAPPING?= 0 # Common environment for world related stages CROSSENV+= MAKEOBJDIRPREFIX=${OBJTREE} \ MACHINE_ARCH=${TARGET_ARCH} \ MACHINE=${TARGET} \ CPUTYPE=${TARGET_CPUTYPE} .if ${MK_GROFF} != "no" CROSSENV+= GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \ GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \ GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac .endif .if defined(TARGET_CFLAGS) CROSSENV+= ${TARGET_CFLAGS} .endif # bootstrap-tools stage BMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile BSARGS= DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no \ MK_INCLUDES=yes BMAKE= MAKEOBJDIRPREFIX=${WORLDTMP} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ ${BSARGS} # build-tools stage TMAKE= MAKEOBJDIRPREFIX=${OBJTREE} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ -DNO_LINT \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no # cross-tools stage XMAKE= TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no # kernel-tools stage KTMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} KTMAKE= TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \ ${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no -DNO_LINT MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no # world stage WMAKEENV= ${CROSSENV} \ _LDSCRIPTROOT= \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} # make hierarchy HMAKE= PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q} .if defined(NO_ROOT) HMAKE+= PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT .endif .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif # If we do not have a bootstrap binutils (because the in-tree one does not # support the target architecture), provide a default cross-binutils prefix. # This allows aarch64 builds, for example, to automatically use the # aarch64-binutils port or package. .if !make(showconfig) .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \ !defined(CROSS_BINUTILS_PREFIX) CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/ .if !exists(${CROSS_BINUTILS_PREFIX}) .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX. .endif .endif .endif XCOMPILERS= CC CXX CPP .for COMPILER in ${XCOMPILERS} .if defined(CROSS_COMPILER_PREFIX) X${COMPILER}?= ${CROSS_COMPILER_PREFIX}${${COMPILER}} .else X${COMPILER}?= ${${COMPILER}} .endif .endfor XBINUTILS= AS AR LD NM OBJCOPY OBJDUMP RANLIB SIZE STRINGS .for BINUTIL in ${XBINUTILS} .if defined(CROSS_BINUTILS_PREFIX) && \ exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}}) X${BINUTIL}?= ${CROSS_BINUTILS_PREFIX}${${BINUTIL}} .else X${BINUTIL}?= ${${BINUTIL}} .endif .endfor CROSSENV+= CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCFLAGS} ${XCXXFLAGS}" \ DEPFLAGS="${DEPFLAGS}" \ CPP="${XCPP} ${XCFLAGS}" \ AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \ OBJDUMP=${XOBJDUMP} OBJCOPY="${XOBJCOPY}" \ RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \ SIZE="${XSIZE}" .if ${XCC:N${CCACHE_BIN}:M/*} .if defined(CROSS_BINUTILS_PREFIX) # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a # directory, but the compiler will look in the right place for it's # tools so we don't need to tell it where to look. .if exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${CROSS_BINUTILS_PREFIX} .endif .else BFLAGS+= -B${WORLDTMP}/usr/bin .endif .if ${TARGET} == "arm" .if ${TARGET_ARCH:M*hf*} != "" TARGET_ABI= gnueabihf .else TARGET_ABI= gnueabi .endif .endif .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc XCFLAGS+= -isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib XCXXFLAGS+= -I${WORLDTMP}/usr/include/c++/v1 -std=gnu++11 -L${WORLDTMP}/../lib/libc++ # XXX: DEPFLAGS is a workaround for not properly passing CXXFLAGS to sub-makes # due to CXX="${XCXX} ${XCXXFLAGS}". bsd.dep.mk does use CXXFLAGS when # building C++ files so this can come out if passing CXXFLAGS down is fixed. DEPFLAGS+= -I${WORLDTMP}/usr/include/c++/v1 .else TARGET_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd11.0 XCFLAGS+= -target ${TARGET_TRIPLE} .endif XCFLAGS+= --sysroot=${WORLDTMP} ${BFLAGS} XCXXFLAGS+= --sysroot=${WORLDTMP} ${BFLAGS} .else .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${CROSS_BINUTILS_PREFIX} XCFLAGS+= ${BFLAGS} XCXXFLAGS+= ${BFLAGS} .endif .endif # ${XCC:M/*} WMAKE= ${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64" # 32 bit world LIB32_OBJTREE= ${OBJTREE}${.CURDIR}/world32 LIB32TMP= ${OBJTREE}${.CURDIR}/lib32 .if ${TARGET_ARCH} == "amd64" .if empty(TARGET_CPUTYPE) LIB32CPUFLAGS= -march=i686 -mmmx -msse -msse2 .else LIB32CPUFLAGS= -march=${TARGET_CPUTYPE} .endif LIB32WMAKEENV= MACHINE=i386 MACHINE_ARCH=i386 \ MACHINE_CPU="i686 mmx sse sse2" LIB32WMAKEFLAGS= \ AS="${XAS} --32" \ LD="${XLD} -m elf_i386_fbsd -Y P,${LIB32TMP}/usr/lib32" \ OBJCOPY="${XOBJCOPY}" .elif ${TARGET_ARCH} == "powerpc64" .if empty(TARGET_CPUTYPE) LIB32CPUFLAGS= -mcpu=powerpc .else LIB32CPUFLAGS= -mcpu=${TARGET_CPUTYPE} .endif LIB32WMAKEENV= MACHINE=powerpc MACHINE_ARCH=powerpc LIB32WMAKEFLAGS= \ LD="${XLD} -m elf32ppc_fbsd" \ OBJCOPY="${XOBJCOPY}" .endif LIB32FLAGS= -m32 ${LIB32CPUFLAGS} -DCOMPAT_32BIT \ -isystem ${LIB32TMP}/usr/include/ \ -L${LIB32TMP}/usr/lib32 \ -B${LIB32TMP}/usr/lib32 .if ${XCC:N${CCACHE_BIN}:M/*} LIB32FLAGS+= --sysroot=${WORLDTMP} .endif # Yes, the flags are redundant. LIB32WMAKEENV+= MAKEOBJDIRPREFIX=${LIB32_OBJTREE} \ _LDSCRIPTROOT=${LIB32TMP} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} \ LIBDIR=/usr/lib32 \ SHLIBDIR=/usr/lib32 \ DTRACE="${DTRACE} -32" LIB32WMAKEFLAGS+= CC="${XCC} ${LIB32FLAGS}" \ CXX="${XCXX} ${LIB32FLAGS}" \ DESTDIR=${LIB32TMP} \ -DCOMPAT_32BIT \ -DLIBRARIES_ONLY \ -DNO_CPU_CFLAGS \ MK_CTF=no \ -DNO_LINT \ MK_TESTS=no LIB32WMAKE= ${LIB32WMAKEENV} ${MAKE} ${LIB32WMAKEFLAGS} \ MK_MAN=no MK_HTML=no LIB32IMAKE= ${LIB32WMAKE:NINSTALL=*:NDESTDIR=*:N_LDSCRIPTROOT=*} \ MK_TOOLCHAIN=no ${IMAKE_INSTALL} .endif IMAKEENV= ${CROSSENV:N_LDSCRIPTROOT=*} IMAKE= ${IMAKEENV} ${MAKE} -f Makefile.inc1 \ ${IMAKE_INSTALL} ${IMAKE_MTREE} .if empty(.MAKEFLAGS:M-n) IMAKEENV+= PATH=${STRICTTMPPATH}:${INSTALLTMP} \ LD_LIBRARY_PATH=${INSTALLTMP} \ PATH_LOCALE=${INSTALLTMP}/locale IMAKE+= __MAKE_SHELL=${INSTALLTMP}/sh .else IMAKEENV+= PATH=${TMPPATH}:${INSTALLTMP} .endif .if defined(DB_FROM_SRC) INSTALLFLAGS+= -N ${.CURDIR}/etc MTREEFLAGS+= -N ${.CURDIR}/etc .endif _INSTALL_DDIR= ${DESTDIR}/${DISTDIR} INSTALL_DDIR= ${_INSTALL_DDIR:S://:/:g:C:/$::} .if defined(NO_ROOT) METALOG?= ${DESTDIR}/${DISTDIR}/METALOG IMAKE+= -DNO_ROOT METALOG=${METALOG} INSTALLFLAGS+= -U -M ${METALOG} -D ${INSTALL_DDIR} MTREEFLAGS+= -W .endif .if defined(BUILD_PKGS) INSTALLFLAGS+= -h sha256 .endif .if defined(DB_FROM_SRC) || defined(NO_ROOT) IMAKE_INSTALL= INSTALL="install ${INSTALLFLAGS}" IMAKE_MTREE= MTREE_CMD="mtree ${MTREEFLAGS}" .endif # kernel stage KMAKEENV= ${WMAKEENV} KMAKE= ${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME} # # buildworld # # Attempt to rebuild the entire system, with reasonable chance of # success, regardless of how old your existing system is. # _worldtmp: .PHONY .if ${.CURDIR:C/[^,]//g} != "" # The m4 build of sendmail files doesn't like it if ',' is used # anywhere in the path of it's files. @echo @echo "*** Error: path to source tree contains a comma ','" @echo false .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Rebuilding the temporary build tree" @echo "--------------------------------------------------------------" .if !defined(NO_CLEAN) rm -rf ${WORLDTMP} .if defined(LIB32TMP) rm -rf ${LIB32TMP} .endif .else rm -rf ${WORLDTMP}/legacy/usr/include # XXX - These three can depend on any header file. rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/ioctl.c rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c rm -f ${OBJTREE}${.CURDIR}/usr.bin/truss/ioctl.c .endif .for _dir in \ lib usr legacy/bin legacy/usr mkdir -p ${WORLDTMP}/${_dir} .endfor mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .if ${MK_GROFF} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .endif mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/usr/include >/dev/null ln -sf ${.CURDIR}/sys ${WORLDTMP} .if ${MK_DEBUG_FILES} != "no" # We could instead disable debug files for these build stages mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/legacy/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/usr/lib >/dev/null .endif .if ${MK_LIB32} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${WORLDTMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" mkdir -p ${WORLDTMP}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .for _mtree in ${LOCAL_MTREE} mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null .endfor _legacy: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.1: legacy release compatibility shims" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} legacy _bootstrap-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.2: bootstrap tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools _cleanobj: .if !defined(NO_CLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR} .if defined(LIB32TMP) ${_+_}cd ${.CURDIR}; ${LIB32WMAKE} -f Makefile.inc1 ${CLEANDIR} .endif .endif _obj: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} obj _build-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${TMAKE} build-tools _cross-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3: cross tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools ${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools _includes: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.1: building includes" @echo "--------------------------------------------------------------" # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need # headers from default SUBDIR. Do SUBDIR_OVERRIDE includes last. ${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \ includes .if !empty(SUBDIR_OVERRIDE) && make(buildworld) ${_+_}cd ${.CURDIR}; ${WMAKE} SHARED=symlinks includes .endif _libraries: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.2: building libraries" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; \ ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \ MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries _depend: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.3: make dependencies" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} depend everything: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.4: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all .if defined(LIB32TMP) build32: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 5.1: building 32 bit shim libraries" @echo "--------------------------------------------------------------" mkdir -p ${LIB32TMP}/usr/include mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${LIB32TMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${LIB32TMP}/usr/include >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${LIB32TMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${LIB32TMP}/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${LIB32TMP}/usr/lib/debug/usr >/dev/null .endif mkdir -p ${WORLDTMP} ln -sf ${.CURDIR}/sys ${WORLDTMP} .for _t in obj includes ${_+_}cd ${.CURDIR}/include; ${LIB32WMAKE} DIRPRFX=include/ ${_t} ${_+_}cd ${.CURDIR}/lib; ${LIB32WMAKE} DIRPRFX=lib/ ${_t} .if ${MK_CDDL} != "no" ${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32WMAKE} DIRPRFX=cddl/lib/ ${_t} .endif ${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32WMAKE} DIRPRFX=gnu/lib/ ${_t} .if ${MK_CRYPT} != "no" ${_+_}cd ${.CURDIR}/secure/lib; ${LIB32WMAKE} DIRPRFX=secure/lib/ ${_t} .endif .if ${MK_KERBEROS} != "no" ${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32WMAKE} DIRPRFX=kerberos5/lib ${_t} .endif .endfor .for _dir in usr.bin/lex/lib ${_+_}cd ${.CURDIR}/${_dir}; ${LIB32WMAKE} DIRPRFX=${_dir}/ obj .endfor .for _dir in lib/ncurses/ncurses lib/ncurses/ncursesw lib/libmagic ${_+_}cd ${.CURDIR}/${_dir}; \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" \ MAKEOBJDIRPREFIX=${LIB32_OBJTREE} ${MAKE} SSP_CFLAGS= DESTDIR= \ DIRPRFX=${_dir}/ -DNO_LINT -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ build-tools .endfor ${_+_}cd ${.CURDIR}; \ ${LIB32WMAKE} -f Makefile.inc1 -DNO_FSCHG libraries .for _t in obj depend all ${_+_}cd ${.CURDIR}/libexec/rtld-elf; PROG=ld-elf32.so.1 ${LIB32WMAKE} \ -DNO_FSCHG DIRPRFX=libexec/rtld-elf/ ${_t} ${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32WMAKE} \ DIRPRFX=usr.bin/ldd ${_t} .endfor distribute32 install32: .MAKE .PHONY ${_+_}cd ${.CURDIR}/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .if ${MK_CDDL} != "no" ${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .endif ${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .if ${MK_CRYPT} != "no" ${_+_}cd ${.CURDIR}/secure/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .endif .if ${MK_KERBEROS} != "no" ${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32IMAKE} ${.TARGET:S/32$//} .endif ${_+_}cd ${.CURDIR}/libexec/rtld-elf; \ PROG=ld-elf32.so.1 ${LIB32IMAKE} ${.TARGET:S/32$//} ${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32IMAKE} \ ${.TARGET:S/32$//} .endif WMAKE_TGTS= WMAKE_TGTS+= _worldtmp _legacy .if empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= _bootstrap-tools .endif WMAKE_TGTS+= _cleanobj _obj _build-tools _cross-tools WMAKE_TGTS+= _includes _libraries _depend everything .if defined(LIB32TMP) && ${MK_LIB32} != "no" && empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= build32 .endif buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue buildworld_prologue: @echo "--------------------------------------------------------------" @echo ">>> World build started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" buildworld_epilogue: @echo @echo "--------------------------------------------------------------" @echo ">>> World build completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" # # We need to have this as a target because the indirection between Makefile # and Makefile.inc1 causes the correct PATH to be used, rather than a # modification of the current environment's PATH. In addition, we need # to quote multiword values. # buildenvvars: .PHONY @echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@} .if ${.TARGETS:Mbuildenv} .if ${.MAKEFLAGS:M-j} .error The buildenv target is incompatible with -j .endif .endif BUILDENV_DIR?= ${.CURDIR} buildenv: .PHONY @echo Entering world for ${TARGET_ARCH}:${TARGET} .if ${BUILDENV_SHELL:M*zsh*} @echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE} .endif @cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \ || true TOOLCHAIN_TGTS= ${WMAKE_TGTS:N_depend:Neverything:Nbuild32} toolchain: ${TOOLCHAIN_TGTS} kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries} # # installcheck # # Checks to be sure system is ready for installworld/installkernel. # installcheck: _installcheck_world _installcheck_kernel _installcheck_world: _installcheck_kernel: # # Require DESTDIR to be set if installing for a different architecture or # using the user/group database in the source tree. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \ defined(DB_FROM_SRC) .if !make(distributeworld) _installcheck_world: __installcheck_DESTDIR _installcheck_kernel: __installcheck_DESTDIR __installcheck_DESTDIR: .if !defined(DESTDIR) || empty(DESTDIR) @echo "ERROR: Please set DESTDIR!"; \ false .endif .endif .endif .if !defined(DB_FROM_SRC) # # Check for missing UIDs/GIDs. # CHECK_UIDS= auditdistd CHECK_GIDS= audit .if ${MK_SENDMAIL} != "no" CHECK_UIDS+= smmsp CHECK_GIDS+= smmsp .endif .if ${MK_PF} != "no" CHECK_UIDS+= proxy CHECK_GIDS+= proxy authpf .endif .if ${MK_UNBOUND} != "no" CHECK_UIDS+= unbound CHECK_GIDS+= unbound .endif _installcheck_world: __installcheck_UGID __installcheck_UGID: .for uid in ${CHECK_UIDS} @if ! `id -u ${uid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .for gid in ${CHECK_GIDS} @if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .endif # # Required install tools to be saved in a scratch dir for safety. # .if ${MK_ZONEINFO} != "no" _zoneinfo= zic tzsetup .endif ITOOLS= [ awk cap_mkdb cat chflags chmod chown cmp cp \ date echo egrep find grep id install ${_install-info} \ ln make mkdir mtree mv pwd_mkdb \ rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \ ${LOCAL_ITOOLS} # Needed for share/man .if ${MK_MAN} != "no" ITOOLS+=makewhatis .endif # # distributeworld # # Distributes everything compiled by a `buildworld'. # # installworld # # Installs everything compiled by a 'buildworld'. # # Non-base distributions produced by the base system EXTRA_DISTRIBUTIONS= doc .if defined(LIB32TMP) && ${MK_LIB32} != "no" EXTRA_DISTRIBUTIONS+= lib32 .endif .if ${MK_TESTS} != "no" EXTRA_DISTRIBUTIONS+= tests .endif DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 distributeworld installworld stageworld: _installcheck_world mkdir -p ${INSTALLTMP} progs=$$(for prog in ${ITOOLS}; do \ if progpath=`which $$prog`; then \ echo $$progpath; \ else \ echo "Required tool $$prog not found in PATH." >&2; \ exit 1; \ fi; \ done); \ libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \ while read line; do \ set -- $$line; \ if [ "$$2 $$3" != "not found" ]; then \ echo $$2; \ else \ echo "Required library $$1 not found." >&2; \ exit 1; \ fi; \ done); \ cp $$libs $$progs ${INSTALLTMP} cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale .if defined(NO_ROOT) echo "#${MTREE_MAGIC}" > ${METALOG} .endif .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} -mkdir ${DESTDIR}/${DISTDIR}/${dist} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null .endif .if ${MK_LIB32} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" && ${dist} == "tests" -mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .if defined(NO_ROOT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \ sed -e 's#^\./#./${dist}/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \ sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG} .if ${MK_LIB32} != "no" ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib32.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} .endif .endif .endfor -mkdir ${DESTDIR}/${DISTDIR}/base ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediatly before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist} | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.meta .endfor .for dist in ${DEBUG_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediatly before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.debug.meta .endfor .endif .endif packageworld: .for dist in base ${EXTRA_DISTRIBUTIONS} .if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug \ @${DESTDIR}/${DISTDIR}/${dist}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug . | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .endif .endfor .for dist in ${DEBUG_DISTRIBUTIONS} . if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvLf - usr/lib/debug | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . endif .endfor # # reinstall # # If you have a build server, you can NFS mount the source and obj directories # and do a 'make reinstall' on the *client* to install new binaries from the # most recent server build. # reinstall: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(LIB32TMP) && ${MK_LIB32} != "no" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install32 .endif restage: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy distribution @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(LIB32TMP) && ${MK_LIB32} != "no" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install32 .endif redistribute: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Distributing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute .if defined(LIB32TMP) && ${MK_LIB32} != "no" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute32 \ DISTRIBUTION=lib32 .endif distrib-dirs: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} distribution: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} ${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \ METALOG=${METALOG} installconfig # # buildkernel and installkernel # # Which kernels to build and/or install is specified by setting # KERNCONF. If not defined a GENERIC kernel is built/installed. # Only the existing (depending TARGET) config files are used # for building kernels and only the first of these is designated # as the one being installed. # # Note that we have to use TARGET instead of TARGET_ARCH when # we're in kernel-land. Since only TARGET_ARCH is (expected) to # be set to cross-build, we have to make sure TARGET is set # properly. .if defined(KERNFAST) NO_KERNELCLEAN= t NO_KERNELCONFIG= t NO_KERNELDEPEND= t NO_KERNELOBJ= t # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah .if !defined(KERNCONF) && ${KERNFAST} != "1" KERNCONF=${KERNFAST} .endif .endif .if ${TARGET_ARCH} == "powerpc64" KERNCONF?= GENERIC64 .else KERNCONF?= GENERIC .endif INSTKERNNAME?= kernel KERNSRCDIR?= ${.CURDIR}/sys KRNLCONFDIR= ${KERNSRCDIR}/${TARGET}/conf KRNLOBJDIR= ${OBJTREE}${KERNSRCDIR} KERNCONFDIR?= ${KRNLCONFDIR} BUILDKERNELS= INSTALLKERNEL= .if defined(NO_INSTALLKERNEL) # All of the BUILDKERNELS loops start at index 1. BUILDKERNELS+= dummy .endif .for _kernel in ${KERNCONF} .if exists(${KERNCONFDIR}/${_kernel}) BUILDKERNELS+= ${_kernel} .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL) INSTALLKERNEL= ${_kernel} .endif .endif .endfor ${WMAKE_TGTS:N_worldtmp:Nbuild32} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY # # buildkernel # # Builds all kernels defined by BUILDKERNELS. # buildkernel: .MAKE .PHONY .if empty(BUILDKERNELS:Ndummy) @echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \ false .endif @echo .for _kernel in ${BUILDKERNELS:Ndummy} @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" @echo "===> ${_kernel}" mkdir -p ${KRNLOBJDIR} .if !defined(NO_KERNELCONFIG) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1: configuring the kernel" @echo "--------------------------------------------------------------" cd ${KRNLCONFDIR}; \ PATH=${TMPPATH} \ config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \ -I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}' .endif .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR} .endif .if !defined(NO_KERNELOBJ) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools .if !defined(NO_KERNELDEPEND) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: making dependencies" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} depend -DNO_MODULES_OBJ .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.2: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" .endfor # # installkernel, etc. # # Install the kernel defined by INSTALLKERNEL # installkernel installkernel.debug \ reinstallkernel reinstallkernel.debug: _installcheck_kernel .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} .endif .if ${BUILDKERNELS:[#]} > 1 .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${_kernel}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//} .endfor .endif distributekernel distributekernel.debug: .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif mkdir -p ${DESTDIR}/${DISTDIR} .if defined(NO_ROOT) echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta .endif cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \ DESTDIR=${INSTALL_DDIR}/kernel \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif .if ${BUILDKERNELS:[#]} > 1 .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta .endif cd ${KRNLOBJDIR}/${_kernel}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \ KERNEL=${INSTKERNNAME}.${_kernel} \ DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) sed -e 's|^./kernel|.|' \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta .endif .endfor .endif packagekernel: .if defined(NO_ROOT) .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ - tar cvf - @${DESTDIR}/${DISTDIR}/kernel.meta | \ + tar cvf - --exclude '*.debug' \ + @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif + cd ${DESTDIR}/${DISTDIR}/kernel; \ + tar cvf - --include '*/*/*.debug' \ + @${DESTDIR}/${DISTDIR}/kernel.meta | \ + ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ - tar cvf - @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ + tar cvf - --exclude '*.debug' \ + @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz + cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ + tar cvf - --include '*/*/*.debug' \ + @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ + ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .else .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ - tar cvf - . | \ + tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif + cd ${DESTDIR}/${DISTDIR}/kernel; \ + tar cvf - --include '*/*/*.debug' $$(eval find .) | \ + ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ - tar cvf - . | \ + tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz + cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ + tar cvf - --include '*/*/*.debug' $$(eval find .) | \ + ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .endif create-world-packages: @rm -f ${DESTDIR}/*.plist 2>/dev/null || : @cd ${DESTDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ ${DESTDIR}/METALOG @for plist in ${DESTDIR}/*.plist; do \ plist=$${plist##*/} ; \ test -f ${SRCDIR}/release/packages/$${plist%.plist}.ucl || \ ( echo "Unkown package FreeBSD-$${plist%.plist}" ; false ) ; \ done @cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ for plist in ${DESTDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/$${pkgname}/" \ -e "s/%COMMENT%/Generic comment for $${pkgname}/" \ -e "s/%DESC%/Generic description for $${pkgname}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/$${pkgname}.ucl \ > ${DESTDIR}/$${pkgname}.ucl ; \ awk -F\" ' \ /^name/ { printf("===> Creating %s-", $$2); next } \ /^version/ { print $$2; next } \ ' ${DESTDIR}/$${pkgname}.ucl ; \ pkg -o ABI_FILE=${DESTDIR}/bin/sh \ create -M ${DESTDIR}/$${pkgname}.ucl \ -p ${DESTDIR}/$${pkgname}.plist \ -r ${DESTDIR} -o ${DESTDIR} ; \ done STAGEDIR= ${MAKEOBJDIRPREFIX}${.CURDIR}/stage packages: @mkdir -p ${MAKEOBJDIRPREFIX}${.CURDIR}/stage ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${DESTDIR:U${STAGEDIR}} -DNO_ROOT -B stageworld ; \ ${MAKE} DESTDIR=${DESTDIR:U${STAGEDIR}} create-world-packages create-kernel-packages: @cd ${DESTDIR}/${DISTDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ ${DESTDIR}/${DISTDIR}/kernel.meta .for flavor in release debug .if exists(${DESTDIR}/${DISTDIR}/${flavor}.plist) @rm -rf ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir @cp -r ${SRCDIR}/release/packages/kernel \ ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir @cd ${DESTDIR}/${DISTDIR} ; \ sed -i '' -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${KERNCONF} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${KERNCONF} kernel ${flavor}/" \ ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST @awk -F\" '/name/ { printf("===> Creating %s-", $$2) } /version/ {print $$2 }' \ ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST @pkg create -m ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir \ -p ${DESTDIR}/${DISTDIR}/${flavor}.plist \ -r ${DESTDIR}/${DISTDIR}/kernel \ -o ${DESTDIR} .endif .endfor .for _kernel in ${BUILDKERNELS:S/${INSTALLKERNEL}//} @cd ${DESTDIR}/${DISTDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ ${DESTDIR}/${DISTDIR}/kernel${_kernel}.meta .for flavor in release debug .if exists(${DESTDIR}/${DISTDIR}/${flavor}.plist) @rm -rf ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir @cp -r ${SRCDIR}/release/packages/kernel \ ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir @cd ${DESTDIR}/${DISTDIR} ; \ sed -i '' -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${KERNCONF} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${KERNCONF} kernel ${flavor}/" \ ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST @awk -F\" '/name/ { printf("===> Creating %s-", $$2) } /version/ {print $$2 }' \ ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST @pkg create -m ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir \ -p ${DESTDIR}/${DISTDIR}/${flavor}.plist \ -r ${DESTDIR}/${DISTDIR}/kernel.${_kernel} \ -o ${DESTDIR} .endif .endfor .endfor # # doxygen # # Build the API documentation with doxygen # doxygen: .PHONY @if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \ echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all # # update # # Update the source tree(s), by running svn/svnup to update to the # latest copy. # update: .if (defined(CVS_UPDATE) || defined(SUP_UPDATE)) && !defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo "CVS_UPDATE and SUP_UPDATE are no longer supported." @echo "Please see: https://wiki.freebsd.org/CvsIsDeprecated" @echo "--------------------------------------------------------------" @exit 1 .endif .if defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo ">>> Updating ${.CURDIR} using Subversion" @echo "--------------------------------------------------------------" @(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS}) .endif # # ------------------------------------------------------------------------ # # From here onwards are utility targets used by the 'make world' and # related targets. If your 'world' breaks, you may like to try to fix # the problem and manually run the following targets to attempt to # complete the build. Beware, this is *not* guaranteed to work, you # need to have a pretty good grip on the current state of the system # to attempt to manually finish it. If in doubt, 'make world' again. # # # legacy: Build compatibility shims for the next three targets. This is a # minimal set of tools and shims necessary to compensate for older systems # which don't have the APIs required by the targets built in bootstrap-tools, # build-tools or cross-tools. # # ELF Tool Chain libraries are needed for ELF tools and dtrace tools. .if ${BOOTSTRAPPING} < 1100006 _elftoolchain_libs= lib/libelf lib/libdwarf .endif legacy: .if ${BOOTSTRAPPING} < 800107 && ${BOOTSTRAPPING} != 0 @echo "ERROR: Source upgrades from versions prior to 8.0 are not supported."; \ false .endif .for _tool in tools/build ${_elftoolchain_libs} ${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install .endfor # # bootstrap-tools: Build tools needed for compatibility. These are binaries that # are built to build other binaries in the system. However, the focus of these # binaries is usually quite narrow. Bootstrap tools use the host's compiler and # libraries, augmented by -legacy. # _bt= _bootstrap-tools .if ${MK_GAMES} != "no" _strfile= usr.bin/fortune/strfile .endif .if ${MK_GCC} != "no" && ${MK_CXX} != "no" _gperf= gnu/usr.bin/gperf .endif .if ${MK_GROFF} != "no" _groff= gnu/usr.bin/groff \ usr.bin/soelim .endif .if ${MK_VT} != "no" _vtfontcvt= usr.bin/vtfontcvt .endif .if ${BOOTSTRAPPING} < 900002 _sed= usr.bin/sed .endif .if ${BOOTSTRAPPING} < 1000002 _libopenbsd= lib/libopenbsd _m4= usr.bin/m4 ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd .endif .if ${BOOTSTRAPPING} < 1000026 _nmtree= lib/libnetbsd \ usr.sbin/nmtree ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd .endif .if ${BOOTSTRAPPING} < 1000027 _cat= bin/cat .endif .if ${BOOTSTRAPPING} < 1000033 _lex= usr.bin/lex ${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support # r285986 crunchen: use STRIPBIN rather than STRIP .if ${BOOTSTRAPPING} < 1100078 _crunch= usr.sbin/crunch .endif .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041 _awk= usr.bin/awk .endif _yacc= lib/liby \ usr.bin/yacc ${_bt}-usr.bin/yacc: ${_bt}-lib/liby .if ${MK_BSNMP} != "no" _gensnmptree= usr.sbin/bsnmpd/gensnmptree .endif # We need to build tblgen when we're building clang either as # the bootstrap compiler, or as the part of the normal build. .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" _clang_tblgen= \ lib/clang/libllvmsupport \ lib/clang/libllvmtablegen \ usr.bin/clang/tblgen \ usr.bin/clang/clang-tblgen ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport ${_bt}-usr.bin/clang/tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport .endif # Default to building the GPL DTC, but build the BSDL one if users explicitly # request it. _dtc= usr.bin/dtc .if ${MK_GPL_DTC} != "no" _dtc= gnu/usr.bin/dtc .endif .if ${MK_KERBEROS} != "no" _kerberos5_bootstrap_tools= \ kerberos5/tools/make-roken \ kerberos5/lib/libroken \ kerberos5/lib/libvers \ kerberos5/tools/asn1_compile \ kerberos5/tools/slc \ usr.bin/compile_et .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g} .endif .if ${MK_MANDOCDB} != "no" _libopenbsd?= lib/libopenbsd _makewhatis= lib/libsqlite3 \ usr.bin/mandoc ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd ${_bt}-lib/libsqlite3 .else _makewhatis=usr.bin/makewhatis .endif bootstrap-tools: .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. # Try to bound the building of the bootstrap-tool to just the # FreeBSD versions that need the tool built at this stage of the build. .for _tool in \ ${_clang_tblgen} \ ${_kerberos5_bootstrap_tools} \ ${_strfile} \ ${_gperf} \ ${_groff} \ ${_dtc} \ ${_awk} \ ${_cat} \ usr.bin/lorder \ ${_libopenbsd} \ ${_makewhatis} \ usr.bin/rpcgen \ ${_sed} \ ${_yacc} \ ${_m4} \ ${_lex} \ usr.bin/xinstall \ ${_gensnmptree} \ usr.sbin/config \ ${_crunch} \ ${_nmtree} \ ${_vtfontcvt} \ usr.bin/localedef ${_bt}-${_tool}: .PHONY .MAKE ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install bootstrap-tools: ${_bt}-${_tool} .endfor # # build-tools: Build special purpose build tools # .if !defined(NO_SHARE) _share= share/syscons/scrnmaps .endif .if ${MK_GCC} != "no" _gcc_tools= gnu/usr.bin/cc/cc_tools .endif .if ${MK_RESCUE} != "no" # rescue includes programs that have build-tools targets _rescue=rescue/rescue .endif .for _tool in \ bin/csh \ bin/sh \ ${LOCAL_TOOL_DIRS} \ lib/ncurses/ncurses \ lib/ncurses/ncursesw \ ${_rescue} \ ${_share} \ usr.bin/awk \ lib/libmagic \ usr.bin/mkesdb_static \ usr.bin/mkcsmapper_static \ usr.bin/vi/catalog build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ build-tools build-tools: build-tools_${_tool} .endfor .for _tool in \ ${_gcc_tools} build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all build-tools: build-tools_${_tool} .endfor # # kernel-tools: Build kernel-building tools # kernel-tools: mkdir -p ${MAKEOBJDIRPREFIX}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${MAKEOBJDIRPREFIX}/usr >/dev/null # # cross-tools: All the tools needed to build the rest of the system after # we get done with the earlier stages. It is the last set of tools needed # to begin building the target binaries. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" _btxld= usr.sbin/btxld .endif .endif # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures # resulting from missing bug fixes or ELF Toolchain updates. .if ${MK_CDDL} != "no" _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \ cddl/usr.bin/ctfmerge .endif # If we're given an XAS, don't build binutils. .if ${XAS:M/*} == "" .if ${MK_BINUTILS_BOOTSTRAP} != "no" _binutils= gnu/usr.bin/binutils .endif .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" _elftctools= lib/libelftc \ usr.bin/elfcopy \ usr.bin/nm \ usr.bin/size \ usr.bin/strings # These are not required by the build, but can be useful for developers who # cross-build on a FreeBSD 10 host: _elftctools+= usr.bin/addr2line .endif .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" # If cross-building with an external binutils we still need to build strip for # the target (for at least crunchide). _elftctools= lib/libelftc \ usr.bin/elfcopy .endif # If an full path to an external cross compiler is given, don't build # a cross compiler. .if ${XCC:N${CCACHE_BIN}:M/*} == "" && ${MK_CROSS_COMPILER} != "no" .if ${MK_CLANG_BOOTSTRAP} != "no" _clang= usr.bin/clang _clang_libs= lib/clang .endif .if ${MK_GCC_BOOTSTRAP} != "no" _cc= gnu/usr.bin/cc .endif .endif .if ${MK_USB} != "no" _usb_tools= sys/boot/usb/tools .endif cross-tools: .MAKE .PHONY .for _tool in \ ${_clang_libs} \ ${_clang} \ ${_binutils} \ ${_elftctools} \ ${_dtrace_tools} \ ${_cc} \ ${_btxld} \ ${_crunchide} \ ${_usb_tools} ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ depend; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install .endfor NXBDESTDIR= ${OBJTREE}/nxb-bin NXBENV= MAKEOBJDIRPREFIX=${OBJTREE}/nxb \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin NXBMAKE= ${NXBENV} ${MAKE} \ TBLGEN=${NXBDESTDIR}/usr/bin/tblgen \ CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_DEBUG_FILES=no # native-xtools is the current target for qemu-user cross builds of ports # via poudriere and the imgact_binmisc kernel module. # For non-clang enabled targets that are still using the in tree gcc # we must build a gperf binary for one instance of its Makefiles. On # clang-enabled systems, the gperf binary is obsolete. native-xtools: .PHONY .if ${MK_GCC_BOOTSTRAP} != "no" mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin ${_+_}@${ECHODIR} "===> ${_gperf} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_gperf}; \ ${NXBMAKE} DIRPRFX=${_gperf}/ obj; \ ${NXBMAKE} DIRPRFX=${_gperf}/ depend; \ ${NXBMAKE} DIRPRFX=${_gperf}/ all; \ ${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install .endif mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${NXBDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${NXBDESTDIR}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${NXBDESTDIR}/usr/lib >/dev/null .endif .for _tool in \ bin/cat \ bin/chmod \ bin/cp \ bin/csh \ bin/echo \ bin/expr \ bin/hostname \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/ps \ bin/realpath \ bin/rm \ bin/rmdir \ bin/sh \ bin/sleep \ ${_clang_tblgen} \ usr.bin/ar \ ${_binutils} \ ${_elftctools} \ ${_cc} \ ${_gcc_tools} \ ${_clang_libs} \ ${_clang} \ sbin/md5 \ sbin/sysctl \ gnu/usr.bin/diff \ usr.bin/awk \ usr.bin/basename \ usr.bin/bmake \ usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/dirname \ usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ usr.bin/gzip \ usr.bin/id \ usr.bin/lex \ usr.bin/lorder \ usr.bin/mktemp \ usr.bin/mt \ usr.bin/patch \ usr.bin/sed \ usr.bin/sort \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ usr.bin/true \ usr.bin/uniq \ usr.bin/unzip \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xz \ usr.bin/yacc \ usr.sbin/chown ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${NXBMAKE} DIRPRFX=${_tool}/ obj; \ ${NXBMAKE} DIRPRFX=${_tool}/ depend; \ ${NXBMAKE} DIRPRFX=${_tool}/ all; \ ${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install .endfor # # hierarchy - ensure that all the needed directories are present # hierarchy hier: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs # # libraries - build all libraries, and install them under ${DESTDIR}. # # The list of libraries with dependents (${_prebuild_libs}) and their # interdependencies (__L) are built automatically by the # ${.CURDIR}/tools/make_libdeps.sh script. # libraries: .MAKE .PHONY ${_+_}cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 _prereq_libs; \ ${MAKE} -f Makefile.inc1 _startup_libs; \ ${MAKE} -f Makefile.inc1 _prebuild_libs; \ ${MAKE} -f Makefile.inc1 _generic_libs # # static libgcc.a prerequisite for shared libc # _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt # These dependencies are not automatically generated: # # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before # all shared libraries for ELF. # _startup_libs= gnu/lib/csu _startup_libs+= lib/csu _startup_libs+= gnu/lib/libgcc _startup_libs+= lib/libcompiler_rt _startup_libs+= lib/libc _startup_libs+= lib/libc_nonshared .if ${MK_LIBCPLUSPLUS} != "no" _startup_libs+= lib/libcxxrt .endif gnu/lib/libgcc__L: lib/libc__L gnu/lib/libgcc__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: gnu/lib/libgcc__L .endif _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_kerberos5_lib_libhdb} \ ${_kerberos5_lib_libheimbase} \ ${_kerberos5_lib_libheimntlm} \ ${_libsqlite3} \ ${_kerberos5_lib_libheimipcc} \ ${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \ ${_kerberos5_lib_libroken} \ ${_kerberos5_lib_libwind} \ lib/libbz2 ${_libcom_err} lib/libcrypt \ lib/libelf lib/libexpat \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ ${_lib_libcapsicum} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam ${_lib_libthr} \ ${_lib_libradius} lib/libsbuf lib/libtacplus \ lib/libgeom \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} \ ${_cddl_lib_libctf} \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_lib_libldns} \ ${_secure_lib_libssh} ${_secure_lib_libssl} \ gnu/lib/libdialog .if ${MK_GNUCXX} != "no" _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++ gnu/lib/libstdc++__L: lib/msun__L gnu/lib/libsupc++__L: gnu/lib/libstdc++__L .endif .if ${MK_LIBCPLUSPLUS} != "no" _prebuild_libs+= lib/libc++ .endif lib/libgeom__L: lib/libexpat__L lib/libkvm__L: lib/libelf__L .if ${MK_LIBTHR} != "no" _lib_libthr= lib/libthr .endif .if ${MK_RADIUS_SUPPORT} != "no" _lib_libradius= lib/libradius .endif .if ${MK_OFED} != "no" _ofed_lib= contrib/ofed/usr.lib/ .endif .if ${MK_CASPER} != "no" _lib_libcapsicum=lib/libcapsicum .endif lib/libcapsicum__L: lib/libnv__L lib/libpjdlog__L: lib/libutil__L lib/liblzma__L: lib/libthr__L _generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib} .for _DIR in ${LOCAL_LIB_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) _generic_libs+= ${_DIR} .endif .endfor lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: lib/libgeom__L cddl/lib/libctf__L: lib/libz__L .endif # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built # on select architectures though (see cddl/lib/Makefile) .if ${MACHINE_CPUARCH} != "sparc64" _prebuild_libs+= lib/libproc lib/librtld_db .endif .if ${MK_CRYPT} != "no" .if ${MK_OPENSSL} != "no" _secure_lib_libcrypto= secure/lib/libcrypto _secure_lib_libssl= secure/lib/libssl lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L .if ${MK_LDNS} != "no" _lib_libldns= lib/libldns lib/libldns__L: secure/lib/libcrypto__L .endif .if ${MK_OPENSSH} != "no" _secure_lib_libssh= secure/lib/libssh secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L .if ${MK_LDNS} != "no" secure/lib/libssh__L: lib/libldns__L .endif .if ${MK_KERBEROS_SUPPORT} != "no" secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libmd__L kerberos5/lib/libroken__L .endif .endif .endif _secure_lib= secure/lib .endif .if ${MK_KERBEROS} != "no" kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \ kerberos5/lib/libwind__L lib/libsqlite3__L kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \ kerberos5/lib/libroken__L kerberos5/lib/libwind__L \ kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L kerberos5/lib/libroken__L: lib/libcrypt__L kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libheimbase__L: lib/libthr__L kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L .endif lib/libsqlite3__L: lib/libthr__L .if ${MK_GSSAPI} != "no" _lib_libgssapi= lib/libgssapi .endif .if ${MK_KERBEROS} != "no" _kerberos5_lib= kerberos5/lib _kerberos5_lib_libasn1= kerberos5/lib/libasn1 _kerberos5_lib_libhdb= kerberos5/lib/libhdb _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5 _kerberos5_lib_libhx509= kerberos5/lib/libhx509 _kerberos5_lib_libroken= kerberos5/lib/libroken _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm _libsqlite3= lib/libsqlite3 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc _kerberos5_lib_libwind= kerberos5/lib/libwind _libcom_err= lib/libcom_err .endif .if ${MK_NIS} != "no" _lib_libypclnt= lib/libypclnt .endif .if ${MK_OPENSSL} == "no" lib/libradius__L: lib/libmd__L .endif lib/libproc__L: \ ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L .if ${MK_CXX} != "no" .if ${MK_LIBCPLUSPLUS} != "no" lib/libproc__L: lib/libcxxrt__L .else # This implies MK_GNUCXX != "no"; see lib/libproc lib/libproc__L: gnu/lib/libsupc++__L .endif .endif gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L .for _lib in ${_prereq_libs} ${_lib}__PL: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ install .endif .endfor .for _lib in ${_startup_libs} ${_prebuild_libs:Nlib/libpam} ${_generic_libs} ${_lib}__L: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install .endif .endfor # libpam is special: we need to build static PAM modules before # static PAM library, and dynamic PAM library before dynamic PAM # modules. lib/libpam__L: .PHONY .MAKE ${_+_}@${ECHODIR} "===> lib/libpam (obj,depend,all,install)"; \ cd ${.CURDIR}/lib/libpam; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ depend; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \ -D_NO_LIBPAM_SO_YET all; \ ${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \ -D_NO_LIBPAM_SO_YET install _prereq_libs: ${_prereq_libs:S/$/__PL/} _startup_libs: ${_startup_libs:S/$/__L/} _prebuild_libs: ${_prebuild_libs:S/$/__L/} _generic_libs: ${_generic_libs:S/$/__L/} # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from # 'everything' with _PARALLEL_SUBDIR_OK set. This is because it is unlikely # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in # parallel. This is safe for the world stage of buildworld though since it has # already built libraries in a proper order and installed includes into # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to # avoid trashing a system if it crashes mid-install. .if !make(all) || defined(_PARALLEL_SUBDIR_OK) SUBDIR_PARALLEL= .endif .include .if make(check-old) || make(check-old-dirs) || \ make(check-old-files) || make(check-old-libs) || \ make(delete-old) || make(delete-old-dirs) || \ make(delete-old-files) || make(delete-old-libs) # # check for / delete old files section # .include "ObsoleteFiles.inc" OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \ else you can not start such an application. Consult UPDATING for more \ information regarding how to cope with the removal/revision bump of a \ specific library." .if !defined(BATCH_DELETE_OLD_FILES) RM_I=-i .else RM_I=-v .endif delete-old-files: @echo ">>> Removing old files (only deletes safe to delete libs)" # Ask for every old file if the user really wants to remove it. # It's annoying, but better safe than sorry. # NB: We cannot pass the list of OLD_FILES as a parameter because the # argument list will get too long. Using .for/.endfor make "loops" will make # the Makefile parser segfault. @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done # Remove catpages without corresponding manpages. @exec 3<&0; \ find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ rm ${RM_I} $${catpage} <&3; \ fi; \ done @echo ">>> Old files removed" check-old-files: @echo ">>> Checking for old files" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done # Check for catpages without corresponding manpages. @find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ echo $${catpage}; \ fi; \ done delete-old-libs: @echo ">>> Removing old libraries" @echo "${OLD_LIBS_MESSAGE}" | fmt @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done @echo ">>> Old libraries removed" check-old-libs: @echo ">>> Checking for old libraries" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done delete-old-dirs: @echo ">>> Removing old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done @echo ">>> Old directories removed" check-old-dirs: @echo ">>> Checking for old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir}"; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done delete-old: delete-old-files delete-old-dirs @echo "To remove old libraries run '${MAKE} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs @echo "To remove old files and directories run '${MAKE} delete-old'." @echo "To remove old libraries run '${MAKE} delete-old-libs'." .endif # # showconfig - show build configuration. # showconfig: @(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \ ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u .if !empty(KRNLOBJDIR) && !empty(KERNCONF) DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/ .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE) .if exists(${KERNCONFDIR}/${KERNCONF}) FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \ '${KERNCONFDIR}/${KERNCONF}' ; echo .endif .endif .endif .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH}) DTBOUTPUTPATH= ${.CURDIR} .endif # # Build 'standalone' Device Tree Blob # builddtb: @PATH=${TMPPATH} MACHINE=${TARGET} \ ${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \ "${FDT_DTS_FILE}" ${DTBOUTPUTPATH} ############### # cleanworld # In the following, the first 'rm' in a series will usually remove all # files and directories. If it does not, then there are probably some # files with file flags set, so this unsets them and tries the 'rm' a # second time. There are situations where this target will be cleaning # some directories via more than one method, but that duplication is # needed to correctly handle all the possible situations. Removing all # files without file flags set in the first 'rm' instance saves time, # because 'chflags' will need to operate on fewer files afterwards. # # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be # created by bsd.obj.mk, except that we don't want to .include that file # in this makefile. # BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR} cleanworld: .PHONY .if exists(${BW_CANONICALOBJDIR}/) -rm -rf ${BW_CANONICALOBJDIR}/* -chflags -R 0 ${BW_CANONICALOBJDIR} rm -rf ${BW_CANONICALOBJDIR}/* .endif .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR} # To be safe in this case, fall back to a 'make cleandir' ${_+_}@cd ${.CURDIR}; ${MAKE} cleandir .endif .if defined(TARGET) && defined(TARGET_ARCH) .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH} XDEV_CPUTYPE?=${CPUTYPE} .else XDEV_CPUTYPE?=${TARGET_CPUTYPE} .endif NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \ MK_MAN=no MK_NLS=no MK_PROFILE=no \ MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ CPUTYPE=${XDEV_CPUTYPE} XDDIR=${TARGET_ARCH}-freebsd XDTP?=/usr/${XDDIR} .if ${XDTP:N/*} .error XDTP variable should be an absolute path .endif CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \ INSTALL="sh ${.CURDIR}/tools/install.sh" CDENV= ${CDBENV} \ TOOLS_PREFIX=${XDTP} CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \ --sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \ -B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \ CPP="${CPP} ${CD2CFLAGS}" \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} CDTMP= ${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN} CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN} XDDESTDIR=${DESTDIR}/${XDTP} .if !defined(OSREL) OSREL!= uname -r | sed -e 's/[-(].*//' .endif .ORDER: xdev-build xdev-install xdev-links xdev: xdev-build xdev-install .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools _xb-worldtmp: .PHONY mkdir -p ${CDTMP}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${CDTMP}/usr >/dev/null _xb-bootstrap-tools: .PHONY .for _tool in \ ${_clang_tblgen} \ ${_gperf} ${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ depend; \ ${CDMAKE} DIRPRFX=${_tool}/ all; \ ${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install .endfor _xb-build-tools: .PHONY ${_+_}@cd ${.CURDIR}; \ ${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools _xb-cross-tools: .PHONY .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,depend,all)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ depend; \ ${CDMAKE} DIRPRFX=${_tool}/ all .endfor _xi-mtree: .PHONY ${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}" mkdir -p ${XDDESTDIR} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${XDDESTDIR} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${XDDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${XDDESTDIR}/usr/include >/dev/null .if ${MK_LIB32} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \ -p ${XDDESTDIR}/usr >/dev/null .endif .if ${MK_TESTS} != "no" mkdir -p ${XDDESTDIR}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${XDDESTDIR}${TESTSBASE} >/dev/null .endif .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries _xi-cross-tools: .PHONY @echo "_xi-cross-tools" .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR} .endfor _xi-includes: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \ DESTDIR=${XDDESTDIR} _xi-libraries: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \ DESTDIR=${XDDESTDIR} xdev-links: .PHONY ${_+_}cd ${XDDESTDIR}/usr/bin; \ mkdir -p ../../../../usr/bin; \ for i in *; do \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}-$$i; \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \ done .else xdev xdev-build xdev-install xdev-links: @echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target" .endif Index: projects/release-pkg/include/paths.h =================================================================== --- projects/release-pkg/include/paths.h (revision 293224) +++ projects/release-pkg/include/paths.h (revision 293225) @@ -1,144 +1,144 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)paths.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _PATHS_H_ #define _PATHS_H_ #include /* Default search path. */ -#define _PATH_DEFPATH "/usr/bin:/bin" +#define _PATH_DEFPATH "/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin" /* All standard utilities path. */ #define _PATH_STDPATH "/usr/bin:/bin:/usr/sbin:/sbin" /* Locate system binaries. */ #define _PATH_SYSPATH "/sbin:/usr/sbin" #define _PATH_BSHELL "/bin/sh" #define _PATH_CAPABILITY "/etc/capability" #define _PATH_CAPABILITY_DB "/etc/capability.db" #define _PATH_CONSOLE "/dev/console" #define _PATH_CP "/bin/cp" #define _PATH_CSHELL "/bin/csh" #define _PATH_CSMAPPER "/usr/share/i18n/csmapper" #define _PATH_DEFTAPE "/dev/sa0" #define _PATH_DEVGPIOC "/dev/gpioc" #define _PATH_DEVNULL "/dev/null" #define _PATH_DEVZERO "/dev/zero" #define _PATH_DRUM "/dev/drum" #define _PATH_ESDB "/usr/share/i18n/esdb" #define _PATH_ETC "/etc" #define _PATH_FTPUSERS "/etc/ftpusers" #define _PATH_FWMEM "/dev/fwmem" #define _PATH_GBDE "/sbin/gbde" #define _PATH_GELI "/sbin/geli" #define _PATH_HALT "/sbin/halt" #ifdef COMPAT_32BIT #define _PATH_I18NMODULE "/usr/lib32/i18n" #else #define _PATH_I18NMODULE "/usr/lib/i18n" #endif #define _PATH_IFCONFIG "/sbin/ifconfig" #define _PATH_KMEM "/dev/kmem" #define _PATH_LIBMAP_CONF "/etc/libmap.conf" #define _PATH_LOCALE "/usr/share/locale" #define _PATH_LOGIN "/usr/bin/login" #define _PATH_MAILDIR "/var/mail" #define _PATH_MAN "/usr/share/man" #define _PATH_MDCONFIG "/sbin/mdconfig" #define _PATH_MEM "/dev/mem" #define _PATH_MKSNAP_FFS "/sbin/mksnap_ffs" #define _PATH_MOUNT "/sbin/mount" #define _PATH_NEWFS "/sbin/newfs" #define _PATH_NOLOGIN "/var/run/nologin" #define _PATH_RCP "/bin/rcp" #define _PATH_REBOOT "/sbin/reboot" #define _PATH_RLOGIN "/usr/bin/rlogin" #define _PATH_RM "/bin/rm" #define _PATH_RSH "/usr/bin/rsh" #define _PATH_SENDMAIL "/usr/sbin/sendmail" #define _PATH_SHELLS "/etc/shells" #define _PATH_TTY "/dev/tty" #define _PATH_UNIX "don't use _PATH_UNIX" #define _PATH_UFSSUSPEND "/dev/ufssuspend" #define _PATH_VI "/usr/bin/vi" #define _PATH_WALL "/usr/bin/wall" /* Provide trailing slash, since mostly used for building pathnames. */ #define _PATH_DEV "/dev/" #define _PATH_TMP "/tmp/" #define _PATH_VARDB "/var/db/" #define _PATH_VARRUN "/var/run/" #define _PATH_VARTMP "/var/tmp/" #define _PATH_YP "/var/yp/" #define _PATH_UUCPLOCK "/var/spool/lock/" /* How to get the correct name of the kernel. */ __BEGIN_DECLS const char *getbootfile(void); __END_DECLS #ifdef RESCUE #undef _PATH_DEFPATH -#define _PATH_DEFPATH "/rescue:/usr/bin:/bin" +#define _PATH_DEFPATH "/rescue:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin" #undef _PATH_STDPATH #define _PATH_STDPATH "/rescue:/usr/bin:/bin:/usr/sbin:/sbin" #undef _PATH_SYSPATH #define _PATH_SYSPATH "/rescue:/sbin:/usr/sbin" #undef _PATH_BSHELL #define _PATH_BSHELL "/rescue/sh" #undef _PATH_CP #define _PATH_CP "/rescue/cp" #undef _PATH_CSHELL #define _PATH_CSHELL "/rescue/csh" #undef _PATH_HALT #define _PATH_HALT "/rescue/halt" #undef _PATH_IFCONFIG #define _PATH_IFCONFIG "/rescue/ifconfig" #undef _PATH_MDCONFIG #define _PATH_MDCONFIG "/rescue/mdconfig" #undef _PATH_MOUNT #define _PATH_MOUNT "/rescue/mount" #undef _PATH_NEWFS #define _PATH_NEWFS "/rescue/newfs" #undef _PATH_RCP #define _PATH_RCP "/rescue/rcp" #undef _PATH_REBOOT #define _PATH_REBOOT "/rescue/reboot" #undef _PATH_RM #define _PATH_RM "/rescue/rm" #undef _PATH_VI #define _PATH_VI "/rescue/vi" #undef _PATH_WALL #define _PATH_WALL "/rescue/wall" #endif /* RESCUE */ #endif /* !_PATHS_H_ */ Index: projects/release-pkg/include =================================================================== --- projects/release-pkg/include (revision 293224) +++ projects/release-pkg/include (revision 293225) Property changes on: projects/release-pkg/include ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/include:r289091-289158,289371-289384,293171-293224 Index: projects/release-pkg/lib/libc/gen/exec.3 =================================================================== --- projects/release-pkg/lib/libc/gen/exec.3 (revision 293224) +++ projects/release-pkg/lib/libc/gen/exec.3 (revision 293225) @@ -1,321 +1,321 @@ .\" Copyright (c) 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)exec.3 8.3 (Berkeley) 1/24/94 .\" $FreeBSD$ .\" -.Dd December 12, 2015 +.Dd January 5, 2016 .Dt EXEC 3 .Os .Sh NAME .Nm execl , .Nm execlp , .Nm execle , .Nm exect , .Nm execv , .Nm execvp , .Nm execvP .Nd execute a file .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In unistd.h .Vt extern char **environ ; .Ft int .Fn execl "const char *path" "const char *arg" ... /* "(char *)0" */ .Ft int .Fn execlp "const char *file" "const char *arg" ... /* "(char *)0" */ .Ft int .Fo execle .Fa "const char *path" "const char *arg" ... .Fa /* .Bk -words .Fa "(char *)0" "char *const envp[]" */ .Ek .Fc .Ft int .Fn exect "const char *path" "char *const argv[]" "char *const envp[]" .Ft int .Fn execv "const char *path" "char *const argv[]" .Ft int .Fn execvp "const char *file" "char *const argv[]" .Ft int .Fn execvP "const char *file" "const char *search_path" "char *const argv[]" .Sh DESCRIPTION The .Nm exec family of functions replaces the current process image with a new process image. The functions described in this manual page are front-ends for the function .Xr execve 2 . (See the manual page for .Xr execve 2 for detailed information about the replacement of the current process.) .Pp The initial argument for these functions is the pathname of a file which is to be executed. .Pp The .Fa "const char *arg" and subsequent ellipses in the .Fn execl , .Fn execlp , and .Fn execle functions can be thought of as .Em arg0 , .Em arg1 , \&..., .Em argn . Together they describe a list of one or more pointers to null-terminated strings that represent the argument list available to the executed program. The first argument, by convention, should point to the file name associated with the file being executed. The list of arguments .Em must be terminated by a .Dv NULL pointer. .Pp The .Fn exect , .Fn execv , .Fn execvp , and .Fn execvP functions provide an array of pointers to null-terminated strings that represent the argument list available to the new program. The first argument, by convention, should point to the file name associated with the file being executed. The array of pointers .Sy must be terminated by a .Dv NULL pointer. .Pp The .Fn execle and .Fn exect functions also specify the environment of the executed process by following the .Dv NULL pointer that terminates the list of arguments in the argument list or the pointer to the argv array with an additional argument. This additional argument is an array of pointers to null-terminated strings and .Em must be terminated by a .Dv NULL pointer. The other functions take the environment for the new process image from the external variable .Va environ in the current process. .Pp Some of these functions have special semantics. .Pp The functions .Fn execlp , .Fn execvp , and .Fn execvP will duplicate the actions of the shell in searching for an executable file if the specified file name does not contain a slash .Dq Li / character. For .Fn execlp and .Fn execvp , search path is the path specified in the environment by .Dq Ev PATH variable. If this variable is not specified, the default path is set according to the .Dv _PATH_DEFPATH definition in .In paths.h , which is set to -.Dq Ev /usr/bin:/bin . +.Dq Ev /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin . For .Fn execvP , the search path is specified as an argument to the function. In addition, certain errors are treated specially. .Pp If an error is ambiguous (for simplicity, we shall consider all errors except .Er ENOEXEC as being ambiguous here, although only the critical error .Er EACCES is really ambiguous), then these functions will act as if they stat the file to determine whether the file exists and has suitable execute permissions. If it does, they will return immediately with the global variable .Va errno restored to the value set by .Fn execve . Otherwise, the search will be continued. If the search completes without performing a successful .Fn execve or terminating due to an error, these functions will return with the global variable .Va errno set to .Er EACCES or .Er ENOENT according to whether at least one file with suitable execute permissions was found. .Pp If the header of a file is not recognized (the attempted .Fn execve returned .Er ENOEXEC ) , these functions will execute the shell with the path of the file as its first argument. (If this attempt fails, no further searching is done.) .Pp The function .Fn exect executes a file with the program tracing facilities enabled (see .Xr ptrace 2 ) . .Sh RETURN VALUES If any of the .Fn exec functions returns, an error will have occurred. The return value is \-1, and the global variable .Va errno will be set to indicate the error. .Sh FILES .Bl -tag -width /bin/sh -compact .It Pa /bin/sh The shell. .El .Sh COMPATIBILITY Historically, the default path for the .Fn execlp and .Fn execvp functions was .Dq Pa :/bin:/usr/bin . This was changed to remove the current directory to enhance system security. .Pp The behavior of .Fn execlp and .Fn execvp when errors occur while attempting to execute the file is not quite historic practice, and has not traditionally been documented and is not specified by the .Tn POSIX standard. .Pp Traditionally, the functions .Fn execlp and .Fn execvp ignored all errors except for the ones described above and .Er ETXTBSY , upon which they retried after sleeping for several seconds, and .Er ENOMEM and .Er E2BIG , upon which they returned. They now return for .Er ETXTBSY , and determine existence and executability more carefully. In particular, .Er EACCES for inaccessible directories in the path prefix is no longer confused with .Er EACCES for files with unsuitable execute permissions. In .Bx 4.4 , they returned upon all errors except .Er EACCES , .Er ENOENT , .Er ENOEXEC and .Er ETXTBSY . This was inferior to the traditional error handling, since it breaks the ignoring of errors for path prefixes and only improves the handling of the unusual ambiguous error .Er EFAULT and the unusual error .Er EIO . The behaviour was changed to match the behaviour of .Xr sh 1 . .Sh ERRORS The .Fn execl , .Fn execle , .Fn execlp , .Fn execvp and .Fn execvP functions may fail and set .Va errno for any of the errors specified for the library functions .Xr execve 2 and .Xr malloc 3 . .Pp The .Fn exect and .Fn execv functions may fail and set .Va errno for any of the errors specified for the library function .Xr execve 2 . .Sh SEE ALSO .Xr sh 1 , .Xr execve 2 , .Xr fork 2 , .Xr ktrace 2 , .Xr ptrace 2 , .Xr environ 7 .Sh STANDARDS The .Fn execl , .Fn execv , .Fn execle , .Fn execlp and .Fn execvp functions conform to .St -p1003.1-88 . The .Fn execvP function first appeared in .Fx 5.2 . Index: projects/release-pkg/lib/libc/gen/posix_spawn.3 =================================================================== --- projects/release-pkg/lib/libc/gen/posix_spawn.3 (revision 293224) +++ projects/release-pkg/lib/libc/gen/posix_spawn.3 (revision 293225) @@ -1,460 +1,460 @@ .\" Copyright (c) 2008 Ed Schouten .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" Portions of this text are reprinted and reproduced in electronic form .\" from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- .\" Portable Operating System Interface (POSIX), The Open Group Base .\" Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of .\" Electrical and Electronics Engineers, Inc and The Open Group. In the .\" event of any discrepancy between this version and the original IEEE and .\" The Open Group Standard, the original IEEE and The Open Group Standard is .\" the referee document. The original Standard can be obtained online at .\" http://www.opengroup.org/unix/online.html. .\" .\" $FreeBSD$ .\" -.Dd June 17, 2011 +.Dd January 5, 2016 .Dt POSIX_SPAWN 3 .Os .Sh NAME .Nm posix_spawn , .Nm posix_spawnp .Nd "spawn a process" .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In spawn.h .Ft int .Fn posix_spawn "pid_t *restrict pid" "const char *restrict path" "const posix_spawn_file_actions_t *file_actions" "const posix_spawnattr_t *restrict attrp" "char *const argv[restrict]" "char *const envp[restrict]" .Ft int .Fn posix_spawnp "pid_t *restrict pid" "const char *restrict file" "const posix_spawn_file_actions_t *file_actions" "const posix_spawnattr_t *restrict attrp" "char *const argv[restrict]" "char *const envp[restrict]" .Sh DESCRIPTION The .Fn posix_spawn and .Fn posix_spawnp functions create a new process (child process) from the specified process image. The new process image is constructed from a regular executable file called the new process image file. .Pp When a C program is executed as the result of this call, it is entered as a C-language function call as follows: .Bd -literal -offset indent int main(int argc, char *argv[]); .Ed .Pp where .Fa argc is the argument count and .Fa argv is an array of character pointers to the arguments themselves. In addition, the variable: .Bd -literal -offset indent extern char **environ; .Ed .Pp points to an array of character pointers to the environment strings. .Pp The argument .Fa argv is an array of character pointers to null-terminated strings. The last member of this array is a null pointer and is not counted in .Fa argc . These strings constitute the argument list available to the new process image. The value in .Fa argv Ns [0] should point to a filename that is associated with the process image being started by the .Fn posix_spawn or .Fn posix_spawnp function. .Pp The argument .Fa envp is an array of character pointers to null-terminated strings. These strings constitute the environment for the new process image. The environment array is terminated by a null pointer. .Pp The .Fa path argument to .Fn posix_spawn is a pathname that identifies the new process image file to execute. .Pp The .Fa file parameter to .Fn posix_spawnp is used to construct a pathname that identifies the new process image file. If the file parameter contains a slash character, the file parameter is used as the pathname for the new process image file. Otherwise, the path prefix for this file is obtained by a search of the directories passed as the environment variable .Dq Ev PATH . If this variable is not specified, the default path is set according to the .Dv _PATH_DEFPATH definition in .In paths.h , which is set to -.Dq Ev /usr/bin:/bin . +.Dq Ev /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin . .Pp If .Fa file_actions is a null pointer, then file descriptors open in the calling process remain open in the child process, except for those whose close-on-exec flag .Dv FD_CLOEXEC is set (see .Fn fcntl ) . For those file descriptors that remain open, all attributes of the corresponding open file descriptions, including file locks (see .Fn fcntl ) , remain unchanged. .Pp If .Fa file_actions is not NULL, then the file descriptors open in the child process are those open in the calling process as modified by the spawn file actions object pointed to by .Fa file_actions and the .Dv FD_CLOEXEC flag of each remaining open file descriptor after the spawn file actions have been processed. The effective order of processing the spawn file actions are: .Bl -enum .It The set of open file descriptors for the child process initially are the same set as is open for the calling process. All attributes of the corresponding open file descriptions, including file locks (see .Fn fcntl ) , remain unchanged. .It The signal mask, signal default actions, and the effective user and group IDs for the child process are changed as specified in the attributes object referenced by .Fa attrp . .It The file actions specified by the spawn file actions object are performed in the order in which they were added to the spawn file actions object. .It Any file descriptor that has its .Dv FD_CLOEXEC flag set (see .Fn fcntl ) is closed. .El .Pp The .Vt posix_spawnattr_t spawn attributes object type is defined in .In spawn.h . It contains the attributes defined below. .Pp If the .Dv POSIX_SPAWN_SETPGROUP flag is set in the spawn-flags attribute of the object referenced by .Fa attrp , and the spawn-pgroup attribute of the same object is non-zero, then the child's process group is as specified in the spawn-pgroup attribute of the object referenced by .Fa attrp . .Pp As a special case, if the .Dv POSIX_SPAWN_SETPGROUP flag is set in the spawn-flags attribute of the object referenced by .Fa attrp , and the spawn-pgroup attribute of the same object is set to zero, then the child is in a new process group with a process group ID equal to its process ID. .Pp If the .Dv POSIX_SPAWN_SETPGROUP flag is not set in the spawn-flags attribute of the object referenced by .Fa attrp , the new child process inherits the parent's process group. .Pp If the .Dv POSIX_SPAWN_SETSCHEDPARAM flag is set in the spawn-flags attribute of the object referenced by .Fa attrp , but .Dv POSIX_SPAWN_SETSCHEDULER is not set, the new process image initially has the scheduling policy of the calling process with the scheduling parameters specified in the spawn-schedparam attribute of the object referenced by .Fa attrp . .Pp If the .Dv POSIX_SPAWN_SETSCHEDULER flag is set in the spawn-flags attribute of the object referenced by .Fa attrp (regardless of the setting of the .Dv POSIX_SPAWN_SETSCHEDPARAM flag), the new process image initially has the scheduling policy specified in the spawn-schedpolicy attribute of the object referenced by .Fa attrp and the scheduling parameters specified in the spawn-schedparam attribute of the same object. .Pp The .Dv POSIX_SPAWN_RESETIDS flag in the spawn-flags attribute of the object referenced by .Fa attrp governs the effective user ID of the child process. If this flag is not set, the child process inherits the parent process' effective user ID. If this flag is set, the child process' effective user ID is reset to the parent's real user ID. In either case, if the set-user-ID mode bit of the new process image file is set, the effective user ID of the child process becomes that file's owner ID before the new process image begins execution. .Pp The .Dv POSIX_SPAWN_RESETIDS flag in the spawn-flags attribute of the object referenced by .Fa attrp also governs the effective group ID of the child process. If this flag is not set, the child process inherits the parent process' effective group ID. If this flag is set, the child process' effective group ID is reset to the parent's real group ID. In either case, if the set-group-ID mode bit of the new process image file is set, the effective group ID of the child process becomes that file's group ID before the new process image begins execution. .Pp If the .Dv POSIX_SPAWN_SETSIGMASK flag is set in the spawn-flags attribute of the object referenced by .Fa attrp , the child process initially has the signal mask specified in the spawn-sigmask attribute of the object referenced by .Fa attrp . .Pp If the .Dv POSIX_SPAWN_SETSIGDEF flag is set in the spawn-flags attribute of the object referenced by .Fa attrp , the signals specified in the spawn-sigdefault attribute of the same object is set to their default actions in the child process. Signals set to the default action in the parent process is set to the default action in the child process. .Pp Signals set to be caught by the calling process is set to the default action in the child process. .Pp Signals set to be ignored by the calling process image is set to be ignored by the child process, unless otherwise specified by the .Dv POSIX_SPAWN_SETSIGDEF flag being set in the spawn-flags attribute of the object referenced by .Fa attrp and the signals being indicated in the spawn-sigdefault attribute of the object referenced by .Fa attrp . .Pp If the value of the .Fa attrp pointer is NULL, then the default values are used. .Pp All process attributes, other than those influenced by the attributes set in the object referenced by .Fa attrp as specified above or by the file descriptor manipulations specified in .Fa file_actions , appear in the new process image as though .Fn vfork had been called to create a child process and then .Fn execve had been called by the child process to execute the new process image. .Pp The implementation uses vfork(), thus the fork handlers are not run when .Fn posix_spawn or .Fn posix_spawnp is called. .Sh RETURN VALUES Upon successful completion, .Fn posix_spawn and .Fn posix_spawnp return the process ID of the child process to the parent process, in the variable pointed to by a non-NULL .Fa pid argument, and return zero as the function return value. Otherwise, no child process is created, no value is stored into the variable pointed to by .Fa pid , and an error number is returned as the function return value to indicate the error. If the .Fa pid argument is a null pointer, the process ID of the child is not returned to the caller. .Sh ERRORS .Bl -enum .It If .Fn posix_spawn and .Fn posix_spawnp fail for any of the reasons that would cause .Fn vfork or one of the .Nm exec to fail, an error value is returned as described by .Fn vfork and .Nm exec , respectively (or, if the error occurs after the calling process successfully returns, the child process exits with exit status 127). .It If .Nm POSIX_SPAWN_SETPGROUP is set in the spawn-flags attribute of the object referenced by attrp, and .Fn posix_spawn or .Fn posix_spawnp fails while changing the child's process group, an error value is returned as described by .Fn setpgid (or, if the error occurs after the calling process successfully returns, the child process exits with exit status 127). .It If .Nm POSIX_SPAWN_SETSCHEDPARAM is set and .Nm POSIX_SPAWN_SETSCHEDULER is not set in the spawn-flags attribute of the object referenced by attrp, then if .Fn posix_spawn or .Fn posix_spawnp fails for any of the reasons that would cause .Fn sched_setparam to fail, an error value is returned as described by .Fn sched_setparam (or, if the error occurs after the calling process successfully returns, the child process exits with exit status 127). .It If .Nm POSIX_SPAWN_SETSCHEDULER is set in the spawn-flags attribute of the object referenced by attrp, and if .Fn posix_spawn or .Fn posix_spawnp fails for any of the reasons that would cause .Fn sched_setscheduler to fail, an error value is returned as described by .Fn sched_setscheduler (or, if the error occurs after the calling process successfully returns, the child process exits with exit status 127). .It If the .Fa file_actions argument is not NULL, and specifies any dup2 or open actions to be performed, and if .Fn posix_spawn or .Fn posix_spawnp fails for any of the reasons that would cause .Fn dup2 or .Fn open to fail, an error value is returned as described by .Fn dup2 and .Fn open , respectively (or, if the error occurs after the calling process successfully returns, the child process exits with exit status 127). An open file action may, by itself, result in any of the errors described by .Fn dup2 , in addition to those described by .Fn open . This implementation ignores any errors from .Fn close , including trying to close a descriptor that is not open. .El .Sh SEE ALSO .Xr close 2 , .Xr dup2 2 , .Xr execve 2 , .Xr fcntl 2 , .Xr open 2 , .Xr sched_setparam 2 , .Xr sched_setscheduler 2 , .Xr setpgid 2 , .Xr vfork 2 , .Xr posix_spawn_file_actions_addclose 3 , .Xr posix_spawn_file_actions_adddup2 3 , .Xr posix_spawn_file_actions_addopen 3 , .Xr posix_spawn_file_actions_destroy 3 , .Xr posix_spawn_file_actions_init 3 , .Xr posix_spawnattr_destroy 3 , .Xr posix_spawnattr_getflags 3 , .Xr posix_spawnattr_getpgroup 3 , .Xr posix_spawnattr_getschedparam 3 , .Xr posix_spawnattr_getschedpolicy 3 , .Xr posix_spawnattr_getsigdefault 3 , .Xr posix_spawnattr_getsigmask 3 , .Xr posix_spawnattr_init 3 , .Xr posix_spawnattr_setflags 3 , .Xr posix_spawnattr_setpgroup 3 , .Xr posix_spawnattr_setschedparam 3 , .Xr posix_spawnattr_setschedpolicy 3 , .Xr posix_spawnattr_setsigdefault 3 , .Xr posix_spawnattr_setsigmask 3 .Sh STANDARDS The .Fn posix_spawn and .Fn posix_spawnp functions conform to .St -p1003.1-2001 , except that they ignore all errors from .Fn close . A future update of the Standard is expected to require that these functions not fail because a file descriptor to be closed (via .Fn posix_spawn_file_actions_addclose ) is not open. .Sh HISTORY The .Fn posix_spawn and .Fn posix_spawnp functions first appeared in .Fx 8.0 . .Sh AUTHORS .An \&Ed Schouten Aq Mt ed@FreeBSD.org Index: projects/release-pkg/lib/libc =================================================================== --- projects/release-pkg/lib/libc (revision 293224) +++ projects/release-pkg/lib/libc (revision 293225) Property changes on: projects/release-pkg/lib/libc ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/lib/libc:r289119-289158,289371-289384,293171-293224 Index: projects/release-pkg/libexec/rtld-elf/rtld.c =================================================================== --- projects/release-pkg/libexec/rtld-elf/rtld.c (revision 293224) +++ projects/release-pkg/libexec/rtld-elf/rtld.c (revision 293225) @@ -1,5117 +1,5119 @@ /*- * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra. * Copyright 2003 Alexander Kabaev . * Copyright 2009-2012 Konstantin Belousov . * Copyright 2012 John Marino . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * Dynamic linker for ELF. * * John Polstra . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "debug.h" #include "rtld.h" #include "libmap.h" #include "paths.h" #include "rtld_tls.h" #include "rtld_printf.h" #include "notes.h" /* Types. */ typedef void (*func_ptr_type)(); typedef void * (*path_enum_proc) (const char *path, size_t len, void *arg); /* * Function declarations. */ static const char *basename(const char *); static void digest_dynamic1(Obj_Entry *, int, const Elf_Dyn **, const Elf_Dyn **, const Elf_Dyn **); static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, const Elf_Dyn *, const Elf_Dyn *); static void digest_dynamic(Obj_Entry *, int); static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *); static Obj_Entry *dlcheck(void *); static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, int mode, RtldLockState *lockstate); static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int); static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *); static bool donelist_check(DoneList *, const Obj_Entry *); static void errmsg_restore(char *); static char *errmsg_save(void); static void *fill_search_info(const char *, size_t, void *); static char *find_library(const char *, const Obj_Entry *, int *); static const char *gethints(bool); static void init_dag(Obj_Entry *); static void init_pagesizes(Elf_Auxinfo **aux_info); static void init_rtld(caddr_t, Elf_Auxinfo **); static void initlist_add_neededs(Needed_Entry *, Objlist *); static void initlist_add_objects(Obj_Entry *, Obj_Entry **, Objlist *); static void linkmap_add(Obj_Entry *); static void linkmap_delete(Obj_Entry *); static void load_filtees(Obj_Entry *, int flags, RtldLockState *); static void unload_filtees(Obj_Entry *); static int load_needed_objects(Obj_Entry *, int); static int load_preload_objects(void); static Obj_Entry *load_object(const char *, int fd, const Obj_Entry *, int); static void map_stacks_exec(RtldLockState *); static Obj_Entry *obj_from_addr(const void *); static void objlist_call_fini(Objlist *, Obj_Entry *, RtldLockState *); static void objlist_call_init(Objlist *, RtldLockState *); static void objlist_clear(Objlist *); static Objlist_Entry *objlist_find(Objlist *, const Obj_Entry *); static void objlist_init(Objlist *); static void objlist_push_head(Objlist *, Obj_Entry *); static void objlist_push_tail(Objlist *, Obj_Entry *); static void objlist_put_after(Objlist *, Obj_Entry *, Obj_Entry *); static void objlist_remove(Objlist *, Obj_Entry *); static int parse_libdir(const char *); static void *path_enumerate(const char *, path_enum_proc, void *); static int relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate); static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate); static int relocate_objects(Obj_Entry *, bool, Obj_Entry *, int, RtldLockState *); static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, RtldLockState *lockstate); static int rtld_dirname(const char *, char *); static int rtld_dirname_abs(const char *, char *); static void *rtld_dlopen(const char *name, int fd, int mode); static void rtld_exit(void); static char *search_library_path(const char *, const char *); static char *search_library_pathfds(const char *, const char *, int *); static const void **get_program_var_addr(const char *, RtldLockState *); static void set_program_var(const char *, const void *); static int symlook_default(SymLook *, const Obj_Entry *refobj); static int symlook_global(SymLook *, DoneList *); static void symlook_init_from_req(SymLook *, const SymLook *); static int symlook_list(SymLook *, const Objlist *, DoneList *); static int symlook_needed(SymLook *, const Needed_Entry *, DoneList *); static int symlook_obj1_sysv(SymLook *, const Obj_Entry *); static int symlook_obj1_gnu(SymLook *, const Obj_Entry *); static void trace_loaded_objects(Obj_Entry *); static void unlink_object(Obj_Entry *); static void unload_object(Obj_Entry *); static void unref_dag(Obj_Entry *); static void ref_dag(Obj_Entry *); static char *origin_subst_one(Obj_Entry *, char *, const char *, const char *, bool); static char *origin_subst(Obj_Entry *, char *); static bool obj_resolve_origin(Obj_Entry *obj); static void preinit_main(void); static int rtld_verify_versions(const Objlist *); static int rtld_verify_object_versions(Obj_Entry *); static void object_add_name(Obj_Entry *, const char *); static int object_match_name(const Obj_Entry *, const char *); static void ld_utrace_log(int, void *, void *, size_t, int, const char *); static void rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info); static uint32_t gnu_hash(const char *); static bool matched_symbol(SymLook *, const Obj_Entry *, Sym_Match_Result *, const unsigned long); void r_debug_state(struct r_debug *, struct link_map *) __noinline __exported; void _r_debug_postinit(struct link_map *) __noinline __exported; int __sys_openat(int, const char *, int, ...); /* * Data declarations. */ static char *error_message; /* Message for dlerror(), or NULL */ struct r_debug r_debug __exported; /* for GDB; */ static bool libmap_disable; /* Disable libmap */ static bool ld_loadfltr; /* Immediate filters processing */ static char *libmap_override; /* Maps to use in addition to libmap.conf */ static bool trust; /* False for setuid and setgid programs */ static bool dangerous_ld_env; /* True if environment variables have been used to affect the libraries loaded */ static char *ld_bind_now; /* Environment variable for immediate binding */ static char *ld_debug; /* Environment variable for debugging */ static char *ld_library_path; /* Environment variable for search path */ static char *ld_library_dirs; /* Environment variable for library descriptors */ static char *ld_preload; /* Environment variable for libraries to load first */ static char *ld_elf_hints_path; /* Environment variable for alternative hints path */ static char *ld_tracing; /* Called from ldd to print libs */ static char *ld_utrace; /* Use utrace() to log events. */ static Obj_Entry *obj_list; /* Head of linked list of shared objects */ static Obj_Entry **obj_tail; /* Link field of last object in list */ static Obj_Entry *obj_main; /* The main program shared object */ static Obj_Entry obj_rtld; /* The dynamic linker shared object */ static unsigned int obj_count; /* Number of objects in obj_list */ static unsigned int obj_loads; /* Number of objects in obj_list */ static Objlist list_global = /* Objects dlopened with RTLD_GLOBAL */ STAILQ_HEAD_INITIALIZER(list_global); static Objlist list_main = /* Objects loaded at program startup */ STAILQ_HEAD_INITIALIZER(list_main); static Objlist list_fini = /* Objects needing fini() calls */ STAILQ_HEAD_INITIALIZER(list_fini); Elf_Sym sym_zero; /* For resolving undefined weak refs. */ #define GDB_STATE(s,m) r_debug.r_state = s; r_debug_state(&r_debug,m); extern Elf_Dyn _DYNAMIC; #pragma weak _DYNAMIC #ifndef RTLD_IS_DYNAMIC #define RTLD_IS_DYNAMIC() (&_DYNAMIC != NULL) #endif int dlclose(void *) __exported; char *dlerror(void) __exported; void *dlopen(const char *, int) __exported; void *fdlopen(int, int) __exported; void *dlsym(void *, const char *) __exported; dlfunc_t dlfunc(void *, const char *) __exported; void *dlvsym(void *, const char *, const char *) __exported; int dladdr(const void *, Dl_info *) __exported; void dllockinit(void *, void *(*)(void *), void (*)(void *), void (*)(void *), void (*)(void *), void (*)(void *), void (*)(void *)) __exported; int dlinfo(void *, int , void *) __exported; int dl_iterate_phdr(__dl_iterate_hdr_callback, void *) __exported; int _rtld_addr_phdr(const void *, struct dl_phdr_info *) __exported; int _rtld_get_stack_prot(void) __exported; int _rtld_is_dlopened(void *) __exported; void _rtld_error(const char *, ...) __exported; int npagesizes, osreldate; size_t *pagesizes; long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0}; static int stack_prot = PROT_READ | PROT_WRITE | RTLD_DEFAULT_STACK_EXEC; static int max_stack_flags; /* * Global declarations normally provided by crt1. The dynamic linker is * not built with crt1, so we have to provide them ourselves. */ char *__progname; char **environ; /* * Used to pass argc, argv to init functions. */ int main_argc; char **main_argv; /* * Globals to control TLS allocation. */ size_t tls_last_offset; /* Static TLS offset of last module */ size_t tls_last_size; /* Static TLS size of last module */ size_t tls_static_space; /* Static TLS space allocated */ size_t tls_static_max_align; int tls_dtv_generation = 1; /* Used to detect when dtv size changes */ int tls_max_index = 1; /* Largest module index allocated */ bool ld_library_path_rpath = false; /* * Globals for path names, and such */ char *ld_elf_hints_default = _PATH_ELF_HINTS; char *ld_path_libmap_conf = _PATH_LIBMAP_CONF; char *ld_path_rtld = _PATH_RTLD; char *ld_standard_library_path = STANDARD_LIBRARY_PATH; char *ld_env_prefix = LD_; /* * Fill in a DoneList with an allocation large enough to hold all of * the currently-loaded objects. Keep this as a macro since it calls * alloca and we want that to occur within the scope of the caller. */ #define donelist_init(dlp) \ ((dlp)->objs = alloca(obj_count * sizeof (dlp)->objs[0]), \ assert((dlp)->objs != NULL), \ (dlp)->num_alloc = obj_count, \ (dlp)->num_used = 0) #define UTRACE_DLOPEN_START 1 #define UTRACE_DLOPEN_STOP 2 #define UTRACE_DLCLOSE_START 3 #define UTRACE_DLCLOSE_STOP 4 #define UTRACE_LOAD_OBJECT 5 #define UTRACE_UNLOAD_OBJECT 6 #define UTRACE_ADD_RUNDEP 7 #define UTRACE_PRELOAD_FINISHED 8 #define UTRACE_INIT_CALL 9 #define UTRACE_FINI_CALL 10 #define UTRACE_DLSYM_START 11 #define UTRACE_DLSYM_STOP 12 struct utrace_rtld { char sig[4]; /* 'RTLD' */ int event; void *handle; void *mapbase; /* Used for 'parent' and 'init/fini' */ size_t mapsize; int refcnt; /* Used for 'mode' */ char name[MAXPATHLEN]; }; #define LD_UTRACE(e, h, mb, ms, r, n) do { \ if (ld_utrace != NULL) \ ld_utrace_log(e, h, mb, ms, r, n); \ } while (0) static void ld_utrace_log(int event, void *handle, void *mapbase, size_t mapsize, int refcnt, const char *name) { struct utrace_rtld ut; ut.sig[0] = 'R'; ut.sig[1] = 'T'; ut.sig[2] = 'L'; ut.sig[3] = 'D'; ut.event = event; ut.handle = handle; ut.mapbase = mapbase; ut.mapsize = mapsize; ut.refcnt = refcnt; bzero(ut.name, sizeof(ut.name)); if (name) strlcpy(ut.name, name, sizeof(ut.name)); utrace(&ut, sizeof(ut)); } #ifdef RTLD_VARIANT_ENV_NAMES /* * construct the env variable based on the type of binary that's * running. */ static inline const char * _LD(const char *var) { static char buffer[128]; strlcpy(buffer, ld_env_prefix, sizeof(buffer)); strlcat(buffer, var, sizeof(buffer)); return (buffer); } #else #define _LD(x) LD_ x #endif /* * Main entry point for dynamic linking. The first argument is the * stack pointer. The stack is expected to be laid out as described * in the SVR4 ABI specification, Intel 386 Processor Supplement. * Specifically, the stack pointer points to a word containing * ARGC. Following that in the stack is a null-terminated sequence * of pointers to argument strings. Then comes a null-terminated * sequence of pointers to environment strings. Finally, there is a * sequence of "auxiliary vector" entries. * * The second argument points to a place to store the dynamic linker's * exit procedure pointer and the third to a place to store the main * program's object. * * The return value is the main program's entry point. */ func_ptr_type _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) { Elf_Auxinfo *aux_info[AT_COUNT]; int i; int argc; char **argv; char **env; Elf_Auxinfo *aux; Elf_Auxinfo *auxp; const char *argv0; Objlist_Entry *entry; Obj_Entry *obj; Obj_Entry **preload_tail; Obj_Entry *last_interposer; Objlist initlist; RtldLockState lockstate; char *library_path_rpath; int mib[2]; size_t len; /* * On entry, the dynamic linker itself has not been relocated yet. * Be very careful not to reference any global data until after * init_rtld has returned. It is OK to reference file-scope statics * and string constants, and to call static and global functions. */ /* Find the auxiliary vector on the stack. */ argc = *sp++; argv = (char **) sp; sp += argc + 1; /* Skip over arguments and NULL terminator */ env = (char **) sp; while (*sp++ != 0) /* Skip over environment, and NULL terminator */ ; aux = (Elf_Auxinfo *) sp; /* Digest the auxiliary vector. */ for (i = 0; i < AT_COUNT; i++) aux_info[i] = NULL; for (auxp = aux; auxp->a_type != AT_NULL; auxp++) { if (auxp->a_type < AT_COUNT) aux_info[auxp->a_type] = auxp; } /* Initialize and relocate ourselves. */ assert(aux_info[AT_BASE] != NULL); init_rtld((caddr_t) aux_info[AT_BASE]->a_un.a_ptr, aux_info); __progname = obj_rtld.path; argv0 = argv[0] != NULL ? argv[0] : "(null)"; environ = env; main_argc = argc; main_argv = argv; if (aux_info[AT_CANARY] != NULL && aux_info[AT_CANARY]->a_un.a_ptr != NULL) { i = aux_info[AT_CANARYLEN]->a_un.a_val; if (i > sizeof(__stack_chk_guard)) i = sizeof(__stack_chk_guard); memcpy(__stack_chk_guard, aux_info[AT_CANARY]->a_un.a_ptr, i); } else { mib[0] = CTL_KERN; mib[1] = KERN_ARND; len = sizeof(__stack_chk_guard); if (sysctl(mib, 2, __stack_chk_guard, &len, NULL, 0) == -1 || len != sizeof(__stack_chk_guard)) { /* If sysctl was unsuccessful, use the "terminator canary". */ ((unsigned char *)(void *)__stack_chk_guard)[0] = 0; ((unsigned char *)(void *)__stack_chk_guard)[1] = 0; ((unsigned char *)(void *)__stack_chk_guard)[2] = '\n'; ((unsigned char *)(void *)__stack_chk_guard)[3] = 255; } } trust = !issetugid(); md_abi_variant_hook(aux_info); ld_bind_now = getenv(_LD("BIND_NOW")); /* * If the process is tainted, then we un-set the dangerous environment * variables. The process will be marked as tainted until setuid(2) * is called. If any child process calls setuid(2) we do not want any * future processes to honor the potentially un-safe variables. */ if (!trust) { if (unsetenv(_LD("PRELOAD")) || unsetenv(_LD("LIBMAP")) || unsetenv(_LD("LIBRARY_PATH")) || unsetenv(_LD("LIBRARY_PATH_FDS")) || unsetenv(_LD("LIBMAP_DISABLE")) || unsetenv(_LD("DEBUG")) || unsetenv(_LD("ELF_HINTS_PATH")) || unsetenv(_LD("LOADFLTR")) || unsetenv(_LD("LIBRARY_PATH_RPATH"))) { _rtld_error("environment corrupt; aborting"); rtld_die(); } } ld_debug = getenv(_LD("DEBUG")); libmap_disable = getenv(_LD("LIBMAP_DISABLE")) != NULL; libmap_override = getenv(_LD("LIBMAP")); ld_library_path = getenv(_LD("LIBRARY_PATH")); ld_library_dirs = getenv(_LD("LIBRARY_PATH_FDS")); ld_preload = getenv(_LD("PRELOAD")); ld_elf_hints_path = getenv(_LD("ELF_HINTS_PATH")); ld_loadfltr = getenv(_LD("LOADFLTR")) != NULL; library_path_rpath = getenv(_LD("LIBRARY_PATH_RPATH")); if (library_path_rpath != NULL) { if (library_path_rpath[0] == 'y' || library_path_rpath[0] == 'Y' || library_path_rpath[0] == '1') ld_library_path_rpath = true; else ld_library_path_rpath = false; } dangerous_ld_env = libmap_disable || (libmap_override != NULL) || (ld_library_path != NULL) || (ld_preload != NULL) || (ld_elf_hints_path != NULL) || ld_loadfltr; ld_tracing = getenv(_LD("TRACE_LOADED_OBJECTS")); ld_utrace = getenv(_LD("UTRACE")); if ((ld_elf_hints_path == NULL) || strlen(ld_elf_hints_path) == 0) ld_elf_hints_path = ld_elf_hints_default; if (ld_debug != NULL && *ld_debug != '\0') debug = 1; dbg("%s is initialized, base address = %p", __progname, (caddr_t) aux_info[AT_BASE]->a_un.a_ptr); dbg("RTLD dynamic = %p", obj_rtld.dynamic); dbg("RTLD pltgot = %p", obj_rtld.pltgot); dbg("initializing thread locks"); lockdflt_init(); /* * Load the main program, or process its program header if it is * already loaded. */ if (aux_info[AT_EXECFD] != NULL) { /* Load the main program. */ int fd = aux_info[AT_EXECFD]->a_un.a_val; dbg("loading main program"); obj_main = map_object(fd, argv0, NULL); close(fd); if (obj_main == NULL) rtld_die(); max_stack_flags = obj->stack_flags; } else { /* Main program already loaded. */ const Elf_Phdr *phdr; int phnum; caddr_t entry; dbg("processing main program's program header"); assert(aux_info[AT_PHDR] != NULL); phdr = (const Elf_Phdr *) aux_info[AT_PHDR]->a_un.a_ptr; assert(aux_info[AT_PHNUM] != NULL); phnum = aux_info[AT_PHNUM]->a_un.a_val; assert(aux_info[AT_PHENT] != NULL); assert(aux_info[AT_PHENT]->a_un.a_val == sizeof(Elf_Phdr)); assert(aux_info[AT_ENTRY] != NULL); entry = (caddr_t) aux_info[AT_ENTRY]->a_un.a_ptr; if ((obj_main = digest_phdr(phdr, phnum, entry, argv0)) == NULL) rtld_die(); } if (aux_info[AT_EXECPATH] != 0) { char *kexecpath; char buf[MAXPATHLEN]; kexecpath = aux_info[AT_EXECPATH]->a_un.a_ptr; dbg("AT_EXECPATH %p %s", kexecpath, kexecpath); if (kexecpath[0] == '/') obj_main->path = kexecpath; else if (getcwd(buf, sizeof(buf)) == NULL || strlcat(buf, "/", sizeof(buf)) >= sizeof(buf) || strlcat(buf, kexecpath, sizeof(buf)) >= sizeof(buf)) obj_main->path = xstrdup(argv0); else obj_main->path = xstrdup(buf); } else { dbg("No AT_EXECPATH"); obj_main->path = xstrdup(argv0); } dbg("obj_main path %s", obj_main->path); obj_main->mainprog = true; if (aux_info[AT_STACKPROT] != NULL && aux_info[AT_STACKPROT]->a_un.a_val != 0) stack_prot = aux_info[AT_STACKPROT]->a_un.a_val; #ifndef COMPAT_32BIT /* * Get the actual dynamic linker pathname from the executable if * possible. (It should always be possible.) That ensures that * gdb will find the right dynamic linker even if a non-standard * one is being used. */ if (obj_main->interp != NULL && strcmp(obj_main->interp, obj_rtld.path) != 0) { free(obj_rtld.path); obj_rtld.path = xstrdup(obj_main->interp); __progname = obj_rtld.path; } #endif digest_dynamic(obj_main, 0); dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj_main->path, obj_main->valid_hash_sysv, obj_main->valid_hash_gnu, obj_main->dynsymcount); linkmap_add(obj_main); linkmap_add(&obj_rtld); /* Link the main program into the list of objects. */ *obj_tail = obj_main; obj_tail = &obj_main->next; obj_count++; obj_loads++; /* Initialize a fake symbol for resolving undefined weak references. */ sym_zero.st_info = ELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); sym_zero.st_shndx = SHN_UNDEF; sym_zero.st_value = -(uintptr_t)obj_main->relocbase; if (!libmap_disable) libmap_disable = (bool)lm_init(libmap_override); dbg("loading LD_PRELOAD libraries"); if (load_preload_objects() == -1) rtld_die(); preload_tail = obj_tail; dbg("loading needed objects"); if (load_needed_objects(obj_main, 0) == -1) rtld_die(); /* Make a list of all objects loaded at startup. */ last_interposer = obj_main; for (obj = obj_list; obj != NULL; obj = obj->next) { if (obj->z_interpose && obj != obj_main) { objlist_put_after(&list_main, last_interposer, obj); last_interposer = obj; } else { objlist_push_tail(&list_main, obj); } obj->refcount++; } dbg("checking for required versions"); if (rtld_verify_versions(&list_main) == -1 && !ld_tracing) rtld_die(); if (ld_tracing) { /* We're done */ trace_loaded_objects(obj_main); exit(0); } if (getenv(_LD("DUMP_REL_PRE")) != NULL) { dump_relocations(obj_main); exit (0); } /* * Processing tls relocations requires having the tls offsets * initialized. Prepare offsets before starting initial * relocation processing. */ dbg("initializing initial thread local storage offsets"); STAILQ_FOREACH(entry, &list_main, link) { /* * Allocate all the initial objects out of the static TLS * block even if they didn't ask for it. */ allocate_tls_offset(entry->obj); } if (relocate_objects(obj_main, ld_bind_now != NULL && *ld_bind_now != '\0', &obj_rtld, SYMLOOK_EARLY, NULL) == -1) rtld_die(); dbg("doing copy relocations"); if (do_copy_relocations(obj_main) == -1) rtld_die(); if (getenv(_LD("DUMP_REL_POST")) != NULL) { dump_relocations(obj_main); exit (0); } /* * Setup TLS for main thread. This must be done after the * relocations are processed, since tls initialization section * might be the subject for relocations. */ dbg("initializing initial thread local storage"); allocate_initial_tls(obj_list); dbg("initializing key program variables"); set_program_var("__progname", argv[0] != NULL ? basename(argv[0]) : ""); set_program_var("environ", env); set_program_var("__elf_aux_vector", aux); /* Make a list of init functions to call. */ objlist_init(&initlist); initlist_add_objects(obj_list, preload_tail, &initlist); r_debug_state(NULL, &obj_main->linkmap); /* say hello to gdb! */ map_stacks_exec(NULL); dbg("resolving ifuncs"); if (resolve_objects_ifunc(obj_main, ld_bind_now != NULL && *ld_bind_now != '\0', SYMLOOK_EARLY, NULL) == -1) rtld_die(); if (!obj_main->crt_no_init) { /* * Make sure we don't call the main program's init and fini * functions for binaries linked with old crt1 which calls * _init itself. */ obj_main->init = obj_main->fini = (Elf_Addr)NULL; obj_main->preinit_array = obj_main->init_array = obj_main->fini_array = (Elf_Addr)NULL; } wlock_acquire(rtld_bind_lock, &lockstate); if (obj_main->crt_no_init) preinit_main(); objlist_call_init(&initlist, &lockstate); _r_debug_postinit(&obj_main->linkmap); objlist_clear(&initlist); dbg("loading filtees"); for (obj = obj_list->next; obj != NULL; obj = obj->next) { if (ld_loadfltr || obj->z_loadfltr) load_filtees(obj, 0, &lockstate); } lock_release(rtld_bind_lock, &lockstate); dbg("transferring control to program entry point = %p", obj_main->entry); /* Return the exit procedure and the program entry point. */ *exit_proc = rtld_exit; *objp = obj_main; return (func_ptr_type) obj_main->entry; } void * rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def) { void *ptr; Elf_Addr target; ptr = (void *)make_function_pointer(def, obj); target = ((Elf_Addr (*)(void))ptr)(); return ((void *)target); } Elf_Addr _rtld_bind(Obj_Entry *obj, Elf_Size reloff) { const Elf_Rel *rel; const Elf_Sym *def; const Obj_Entry *defobj; Elf_Addr *where; Elf_Addr target; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); if (obj->pltrel) rel = (const Elf_Rel *) ((caddr_t) obj->pltrel + reloff); else rel = (const Elf_Rel *) ((caddr_t) obj->pltrela + reloff); where = (Elf_Addr *) (obj->relocbase + rel->r_offset); def = find_symdef(ELF_R_SYM(rel->r_info), obj, &defobj, true, NULL, &lockstate); if (def == NULL) rtld_die(); if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) target = (Elf_Addr)rtld_resolve_ifunc(defobj, def); else target = (Elf_Addr)(defobj->relocbase + def->st_value); dbg("\"%s\" in \"%s\" ==> %p in \"%s\"", defobj->strtab + def->st_name, basename(obj->path), (void *)target, basename(defobj->path)); /* * Write the new contents for the jmpslot. Note that depending on * architecture, the value which we need to return back to the * lazy binding trampoline may or may not be the target * address. The value returned from reloc_jmpslot() is the value * that the trampoline needs. */ target = reloc_jmpslot(where, target, defobj, obj, rel); lock_release(rtld_bind_lock, &lockstate); return target; } /* * Error reporting function. Use it like printf. If formats the message * into a buffer, and sets things up so that the next call to dlerror() * will return the message. */ void _rtld_error(const char *fmt, ...) { static char buf[512]; va_list ap; va_start(ap, fmt); rtld_vsnprintf(buf, sizeof buf, fmt, ap); error_message = buf; va_end(ap); } /* * Return a dynamically-allocated copy of the current error message, if any. */ static char * errmsg_save(void) { return error_message == NULL ? NULL : xstrdup(error_message); } /* * Restore the current error message from a copy which was previously saved * by errmsg_save(). The copy is freed. */ static void errmsg_restore(char *saved_msg) { if (saved_msg == NULL) error_message = NULL; else { _rtld_error("%s", saved_msg); free(saved_msg); } } static const char * basename(const char *name) { const char *p = strrchr(name, '/'); return p != NULL ? p + 1 : name; } static struct utsname uts; static char * origin_subst_one(Obj_Entry *obj, char *real, const char *kw, const char *subst, bool may_free) { char *p, *p1, *res, *resp; int subst_len, kw_len, subst_count, old_len, new_len; kw_len = strlen(kw); /* * First, count the number of the keyword occurences, to * preallocate the final string. */ for (p = real, subst_count = 0;; p = p1 + kw_len, subst_count++) { p1 = strstr(p, kw); if (p1 == NULL) break; } /* * If the keyword is not found, just return. * * Return non-substituted string if resolution failed. We * cannot do anything more reasonable, the failure mode of the * caller is unresolved library anyway. */ if (subst_count == 0 || (obj != NULL && !obj_resolve_origin(obj))) return (may_free ? real : xstrdup(real)); if (obj != NULL) subst = obj->origin_path; /* * There is indeed something to substitute. Calculate the * length of the resulting string, and allocate it. */ subst_len = strlen(subst); old_len = strlen(real); new_len = old_len + (subst_len - kw_len) * subst_count; res = xmalloc(new_len + 1); /* * Now, execute the substitution loop. */ for (p = real, resp = res, *resp = '\0';;) { p1 = strstr(p, kw); if (p1 != NULL) { /* Copy the prefix before keyword. */ memcpy(resp, p, p1 - p); resp += p1 - p; /* Keyword replacement. */ memcpy(resp, subst, subst_len); resp += subst_len; *resp = '\0'; p = p1 + kw_len; } else break; } /* Copy to the end of string and finish. */ strcat(resp, p); if (may_free) free(real); return (res); } static char * origin_subst(Obj_Entry *obj, char *real) { char *res1, *res2, *res3, *res4; if (obj == NULL || !trust) return (xstrdup(real)); if (uts.sysname[0] == '\0') { if (uname(&uts) != 0) { _rtld_error("utsname failed: %d", errno); return (NULL); } } res1 = origin_subst_one(obj, real, "$ORIGIN", NULL, false); res2 = origin_subst_one(NULL, res1, "$OSNAME", uts.sysname, true); res3 = origin_subst_one(NULL, res2, "$OSREL", uts.release, true); res4 = origin_subst_one(NULL, res3, "$PLATFORM", uts.machine, true); return (res4); } void rtld_die(void) { const char *msg = dlerror(); if (msg == NULL) msg = "Fatal error"; rtld_fdputstr(STDERR_FILENO, msg); rtld_fdputchar(STDERR_FILENO, '\n'); _exit(1); } /* * Process a shared object's DYNAMIC section, and save the important * information in its Obj_Entry structure. */ static void digest_dynamic1(Obj_Entry *obj, int early, const Elf_Dyn **dyn_rpath, const Elf_Dyn **dyn_soname, const Elf_Dyn **dyn_runpath) { const Elf_Dyn *dynp; Needed_Entry **needed_tail = &obj->needed; Needed_Entry **needed_filtees_tail = &obj->needed_filtees; Needed_Entry **needed_aux_filtees_tail = &obj->needed_aux_filtees; const Elf_Hashelt *hashtab; const Elf32_Word *hashval; Elf32_Word bkt, nmaskwords; int bloom_size32; int plttype = DT_REL; *dyn_rpath = NULL; *dyn_soname = NULL; *dyn_runpath = NULL; obj->bind_now = false; for (dynp = obj->dynamic; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_REL: obj->rel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_RELSZ: obj->relsize = dynp->d_un.d_val; break; case DT_RELENT: assert(dynp->d_un.d_val == sizeof(Elf_Rel)); break; case DT_JMPREL: obj->pltrel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_PLTRELSZ: obj->pltrelsize = dynp->d_un.d_val; break; case DT_RELA: obj->rela = (const Elf_Rela *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_RELASZ: obj->relasize = dynp->d_un.d_val; break; case DT_RELAENT: assert(dynp->d_un.d_val == sizeof(Elf_Rela)); break; case DT_PLTREL: plttype = dynp->d_un.d_val; assert(dynp->d_un.d_val == DT_REL || plttype == DT_RELA); break; case DT_SYMTAB: obj->symtab = (const Elf_Sym *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_SYMENT: assert(dynp->d_un.d_val == sizeof(Elf_Sym)); break; case DT_STRTAB: obj->strtab = (const char *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_STRSZ: obj->strsize = dynp->d_un.d_val; break; case DT_VERNEED: obj->verneed = (const Elf_Verneed *) (obj->relocbase + dynp->d_un.d_val); break; case DT_VERNEEDNUM: obj->verneednum = dynp->d_un.d_val; break; case DT_VERDEF: obj->verdef = (const Elf_Verdef *) (obj->relocbase + dynp->d_un.d_val); break; case DT_VERDEFNUM: obj->verdefnum = dynp->d_un.d_val; break; case DT_VERSYM: obj->versyms = (const Elf_Versym *)(obj->relocbase + dynp->d_un.d_val); break; case DT_HASH: { hashtab = (const Elf_Hashelt *)(obj->relocbase + dynp->d_un.d_ptr); obj->nbuckets = hashtab[0]; obj->nchains = hashtab[1]; obj->buckets = hashtab + 2; obj->chains = obj->buckets + obj->nbuckets; obj->valid_hash_sysv = obj->nbuckets > 0 && obj->nchains > 0 && obj->buckets != NULL; } break; case DT_GNU_HASH: { hashtab = (const Elf_Hashelt *)(obj->relocbase + dynp->d_un.d_ptr); obj->nbuckets_gnu = hashtab[0]; obj->symndx_gnu = hashtab[1]; nmaskwords = hashtab[2]; bloom_size32 = (__ELF_WORD_SIZE / 32) * nmaskwords; obj->maskwords_bm_gnu = nmaskwords - 1; obj->shift2_gnu = hashtab[3]; obj->bloom_gnu = (Elf_Addr *) (hashtab + 4); obj->buckets_gnu = hashtab + 4 + bloom_size32; obj->chain_zero_gnu = obj->buckets_gnu + obj->nbuckets_gnu - obj->symndx_gnu; /* Number of bitmask words is required to be power of 2 */ obj->valid_hash_gnu = powerof2(nmaskwords) && obj->nbuckets_gnu > 0 && obj->buckets_gnu != NULL; } break; case DT_NEEDED: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_tail = nep; needed_tail = &nep->next; } break; case DT_FILTER: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_filtees_tail = nep; needed_filtees_tail = &nep->next; } break; case DT_AUXILIARY: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_aux_filtees_tail = nep; needed_aux_filtees_tail = &nep->next; } break; case DT_PLTGOT: obj->pltgot = (Elf_Addr *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_TEXTREL: obj->textrel = true; break; case DT_SYMBOLIC: obj->symbolic = true; break; case DT_RPATH: /* * We have to wait until later to process this, because we * might not have gotten the address of the string table yet. */ *dyn_rpath = dynp; break; case DT_SONAME: *dyn_soname = dynp; break; case DT_RUNPATH: *dyn_runpath = dynp; break; case DT_INIT: obj->init = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_PREINIT_ARRAY: obj->preinit_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_PREINIT_ARRAYSZ: obj->preinit_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; case DT_INIT_ARRAY: obj->init_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_INIT_ARRAYSZ: obj->init_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; case DT_FINI: obj->fini = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_FINI_ARRAY: obj->fini_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_FINI_ARRAYSZ: obj->fini_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; /* * Don't process DT_DEBUG on MIPS as the dynamic section * is mapped read-only. DT_MIPS_RLD_MAP is used instead. */ -#ifndef __mips__ case DT_DEBUG: + if (!obj->writable_dynamic) + break; if (!early) dbg("Filling in DT_DEBUG entry"); ((Elf_Dyn*)dynp)->d_un.d_ptr = (Elf_Addr) &r_debug; break; -#endif case DT_FLAGS: if (dynp->d_un.d_val & DF_ORIGIN) obj->z_origin = true; if (dynp->d_un.d_val & DF_SYMBOLIC) obj->symbolic = true; if (dynp->d_un.d_val & DF_TEXTREL) obj->textrel = true; if (dynp->d_un.d_val & DF_BIND_NOW) obj->bind_now = true; /*if (dynp->d_un.d_val & DF_STATIC_TLS) ;*/ break; #ifdef __mips__ case DT_MIPS_LOCAL_GOTNO: obj->local_gotno = dynp->d_un.d_val; break; case DT_MIPS_SYMTABNO: obj->symtabno = dynp->d_un.d_val; break; case DT_MIPS_GOTSYM: obj->gotsym = dynp->d_un.d_val; break; case DT_MIPS_RLD_MAP: *((Elf_Addr *)(dynp->d_un.d_ptr)) = (Elf_Addr) &r_debug; break; #endif #ifdef __powerpc64__ case DT_PPC64_GLINK: obj->glink = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; #endif case DT_FLAGS_1: if (dynp->d_un.d_val & DF_1_NOOPEN) obj->z_noopen = true; if (dynp->d_un.d_val & DF_1_ORIGIN) obj->z_origin = true; if (dynp->d_un.d_val & DF_1_GLOBAL) obj->z_global = true; if (dynp->d_un.d_val & DF_1_BIND_NOW) obj->bind_now = true; if (dynp->d_un.d_val & DF_1_NODELETE) obj->z_nodelete = true; if (dynp->d_un.d_val & DF_1_LOADFLTR) obj->z_loadfltr = true; if (dynp->d_un.d_val & DF_1_INTERPOSE) obj->z_interpose = true; if (dynp->d_un.d_val & DF_1_NODEFLIB) obj->z_nodeflib = true; break; default: if (!early) { dbg("Ignoring d_tag %ld = %#lx", (long)dynp->d_tag, (long)dynp->d_tag); } break; } } obj->traced = false; if (plttype == DT_RELA) { obj->pltrela = (const Elf_Rela *) obj->pltrel; obj->pltrel = NULL; obj->pltrelasize = obj->pltrelsize; obj->pltrelsize = 0; } /* Determine size of dynsym table (equal to nchains of sysv hash) */ if (obj->valid_hash_sysv) obj->dynsymcount = obj->nchains; else if (obj->valid_hash_gnu) { obj->dynsymcount = 0; for (bkt = 0; bkt < obj->nbuckets_gnu; bkt++) { if (obj->buckets_gnu[bkt] == 0) continue; hashval = &obj->chain_zero_gnu[obj->buckets_gnu[bkt]]; do obj->dynsymcount++; while ((*hashval++ & 1u) == 0); } obj->dynsymcount += obj->symndx_gnu; } } static bool obj_resolve_origin(Obj_Entry *obj) { if (obj->origin_path != NULL) return (true); obj->origin_path = xmalloc(PATH_MAX); return (rtld_dirname_abs(obj->path, obj->origin_path) != -1); } static void digest_dynamic2(Obj_Entry *obj, const Elf_Dyn *dyn_rpath, const Elf_Dyn *dyn_soname, const Elf_Dyn *dyn_runpath) { if (obj->z_origin && !obj_resolve_origin(obj)) rtld_die(); if (dyn_runpath != NULL) { obj->runpath = (char *)obj->strtab + dyn_runpath->d_un.d_val; obj->runpath = origin_subst(obj, obj->runpath); } else if (dyn_rpath != NULL) { obj->rpath = (char *)obj->strtab + dyn_rpath->d_un.d_val; obj->rpath = origin_subst(obj, obj->rpath); } if (dyn_soname != NULL) object_add_name(obj, obj->strtab + dyn_soname->d_un.d_val); } static void digest_dynamic(Obj_Entry *obj, int early) { const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; digest_dynamic1(obj, early, &dyn_rpath, &dyn_soname, &dyn_runpath); digest_dynamic2(obj, dyn_rpath, dyn_soname, dyn_runpath); } /* * Process a shared object's program header. This is used only for the * main program, when the kernel has already loaded the main program * into memory before calling the dynamic linker. It creates and * returns an Obj_Entry structure. */ static Obj_Entry * digest_phdr(const Elf_Phdr *phdr, int phnum, caddr_t entry, const char *path) { Obj_Entry *obj; const Elf_Phdr *phlimit = phdr + phnum; const Elf_Phdr *ph; Elf_Addr note_start, note_end; int nsegs = 0; obj = obj_new(); for (ph = phdr; ph < phlimit; ph++) { if (ph->p_type != PT_PHDR) continue; obj->phdr = phdr; obj->phsize = ph->p_memsz; obj->relocbase = (caddr_t)phdr - ph->p_vaddr; break; } obj->stack_flags = PF_X | PF_R | PF_W; for (ph = phdr; ph < phlimit; ph++) { switch (ph->p_type) { case PT_INTERP: obj->interp = (const char *)(ph->p_vaddr + obj->relocbase); break; case PT_LOAD: if (nsegs == 0) { /* First load segment */ obj->vaddrbase = trunc_page(ph->p_vaddr); obj->mapbase = obj->vaddrbase + obj->relocbase; obj->textsize = round_page(ph->p_vaddr + ph->p_memsz) - obj->vaddrbase; } else { /* Last load segment */ obj->mapsize = round_page(ph->p_vaddr + ph->p_memsz) - obj->vaddrbase; } nsegs++; break; case PT_DYNAMIC: + if (ph->p_flags & PROT_WRITE) + obj->writable_dynamic = true; obj->dynamic = (const Elf_Dyn *)(ph->p_vaddr + obj->relocbase); break; case PT_TLS: obj->tlsindex = 1; obj->tlssize = ph->p_memsz; obj->tlsalign = ph->p_align; obj->tlsinitsize = ph->p_filesz; obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase); break; case PT_GNU_STACK: obj->stack_flags = ph->p_flags; break; case PT_GNU_RELRO: obj->relro_page = obj->relocbase + trunc_page(ph->p_vaddr); obj->relro_size = round_page(ph->p_memsz); break; case PT_NOTE: note_start = (Elf_Addr)obj->relocbase + ph->p_vaddr; note_end = note_start + ph->p_filesz; digest_notes(obj, note_start, note_end); break; } } if (nsegs < 1) { _rtld_error("%s: too few PT_LOAD segments", path); return NULL; } obj->entry = entry; return obj; } void digest_notes(Obj_Entry *obj, Elf_Addr note_start, Elf_Addr note_end) { const Elf_Note *note; const char *note_name; uintptr_t p; for (note = (const Elf_Note *)note_start; (Elf_Addr)note < note_end; note = (const Elf_Note *)((const char *)(note + 1) + roundup2(note->n_namesz, sizeof(Elf32_Addr)) + roundup2(note->n_descsz, sizeof(Elf32_Addr)))) { if (note->n_namesz != sizeof(NOTE_FREEBSD_VENDOR) || note->n_descsz != sizeof(int32_t)) continue; if (note->n_type != NT_FREEBSD_ABI_TAG && note->n_type != NT_FREEBSD_NOINIT_TAG) continue; note_name = (const char *)(note + 1); if (strncmp(NOTE_FREEBSD_VENDOR, note_name, sizeof(NOTE_FREEBSD_VENDOR)) != 0) continue; switch (note->n_type) { case NT_FREEBSD_ABI_TAG: /* FreeBSD osrel note */ p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); obj->osrel = *(const int32_t *)(p); dbg("note osrel %d", obj->osrel); break; case NT_FREEBSD_NOINIT_TAG: /* FreeBSD 'crt does not call init' note */ obj->crt_no_init = true; dbg("note crt_no_init"); break; } } } static Obj_Entry * dlcheck(void *handle) { Obj_Entry *obj; for (obj = obj_list; obj != NULL; obj = obj->next) if (obj == (Obj_Entry *) handle) break; if (obj == NULL || obj->refcount == 0 || obj->dl_refcount == 0) { _rtld_error("Invalid shared object handle %p", handle); return NULL; } return obj; } /* * If the given object is already in the donelist, return true. Otherwise * add the object to the list and return false. */ static bool donelist_check(DoneList *dlp, const Obj_Entry *obj) { unsigned int i; for (i = 0; i < dlp->num_used; i++) if (dlp->objs[i] == obj) return true; /* * Our donelist allocation should always be sufficient. But if * our threads locking isn't working properly, more shared objects * could have been loaded since we allocated the list. That should * never happen, but we'll handle it properly just in case it does. */ if (dlp->num_used < dlp->num_alloc) dlp->objs[dlp->num_used++] = obj; return false; } /* * Hash function for symbol table lookup. Don't even think about changing * this. It is specified by the System V ABI. */ unsigned long elf_hash(const char *name) { const unsigned char *p = (const unsigned char *) name; unsigned long h = 0; unsigned long g; while (*p != '\0') { h = (h << 4) + *p++; if ((g = h & 0xf0000000) != 0) h ^= g >> 24; h &= ~g; } return h; } /* * The GNU hash function is the Daniel J. Bernstein hash clipped to 32 bits * unsigned in case it's implemented with a wider type. */ static uint32_t gnu_hash(const char *s) { uint32_t h; unsigned char c; h = 5381; for (c = *s; c != '\0'; c = *++s) h = h * 33 + c; return (h & 0xffffffff); } /* * Find the library with the given name, and return its full pathname. * The returned string is dynamically allocated. Generates an error * message and returns NULL if the library cannot be found. * * If the second argument is non-NULL, then it refers to an already- * loaded shared object, whose library search path will be searched. * * If a library is successfully located via LD_LIBRARY_PATH_FDS, its * descriptor (which is close-on-exec) will be passed out via the third * argument. * * The search order is: * DT_RPATH in the referencing file _unless_ DT_RUNPATH is present (1) * DT_RPATH of the main object if DSO without defined DT_RUNPATH (1) * LD_LIBRARY_PATH * DT_RUNPATH in the referencing file * ldconfig hints (if -z nodefaultlib, filter out default library directories * from list) * /lib:/usr/lib _unless_ the referencing file is linked with -z nodefaultlib * * (1) Handled in digest_dynamic2 - rpath left NULL if runpath defined. */ static char * find_library(const char *xname, const Obj_Entry *refobj, int *fdp) { char *pathname; char *name; bool nodeflib, objgiven; objgiven = refobj != NULL; if (strchr(xname, '/') != NULL) { /* Hard coded pathname */ if (xname[0] != '/' && !trust) { _rtld_error("Absolute pathname required for shared object \"%s\"", xname); return NULL; } return (origin_subst(__DECONST(Obj_Entry *, refobj), __DECONST(char *, xname))); } if (libmap_disable || !objgiven || (name = lm_find(refobj->path, xname)) == NULL) name = (char *)xname; dbg(" Searching for \"%s\"", name); /* * If refobj->rpath != NULL, then refobj->runpath is NULL. Fall * back to pre-conforming behaviour if user requested so with * LD_LIBRARY_PATH_RPATH environment variable and ignore -z * nodeflib. */ if (objgiven && refobj->rpath != NULL && ld_library_path_rpath) { if ((pathname = search_library_path(name, ld_library_path)) != NULL || (refobj != NULL && (pathname = search_library_path(name, refobj->rpath)) != NULL) || (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL || (pathname = search_library_path(name, gethints(false))) != NULL || (pathname = search_library_path(name, ld_standard_library_path)) != NULL) return (pathname); } else { nodeflib = objgiven ? refobj->z_nodeflib : false; if ((objgiven && (pathname = search_library_path(name, refobj->rpath)) != NULL) || (objgiven && refobj->runpath == NULL && refobj != obj_main && (pathname = search_library_path(name, obj_main->rpath)) != NULL) || (pathname = search_library_path(name, ld_library_path)) != NULL || (objgiven && (pathname = search_library_path(name, refobj->runpath)) != NULL) || (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL || (pathname = search_library_path(name, gethints(nodeflib))) != NULL || (objgiven && !nodeflib && (pathname = search_library_path(name, ld_standard_library_path)) != NULL)) return (pathname); } if (objgiven && refobj->path != NULL) { _rtld_error("Shared object \"%s\" not found, required by \"%s\"", name, basename(refobj->path)); } else { _rtld_error("Shared object \"%s\" not found", name); } return NULL; } /* * Given a symbol number in a referencing object, find the corresponding * definition of the symbol. Returns a pointer to the symbol, or NULL if * no definition was found. Returns a pointer to the Obj_Entry of the * defining object via the reference parameter DEFOBJ_OUT. */ const Elf_Sym * find_symdef(unsigned long symnum, const Obj_Entry *refobj, const Obj_Entry **defobj_out, int flags, SymCache *cache, RtldLockState *lockstate) { const Elf_Sym *ref; const Elf_Sym *def; const Obj_Entry *defobj; SymLook req; const char *name; int res; /* * If we have already found this symbol, get the information from * the cache. */ if (symnum >= refobj->dynsymcount) return NULL; /* Bad object */ if (cache != NULL && cache[symnum].sym != NULL) { *defobj_out = cache[symnum].obj; return cache[symnum].sym; } ref = refobj->symtab + symnum; name = refobj->strtab + ref->st_name; def = NULL; defobj = NULL; /* * We don't have to do a full scale lookup if the symbol is local. * We know it will bind to the instance in this load module; to * which we already have a pointer (ie ref). By not doing a lookup, * we not only improve performance, but it also avoids unresolvable * symbols when local symbols are not in the hash table. This has * been seen with the ia64 toolchain. */ if (ELF_ST_BIND(ref->st_info) != STB_LOCAL) { if (ELF_ST_TYPE(ref->st_info) == STT_SECTION) { _rtld_error("%s: Bogus symbol table entry %lu", refobj->path, symnum); } symlook_init(&req, name); req.flags = flags; req.ventry = fetch_ventry(refobj, symnum); req.lockstate = lockstate; res = symlook_default(&req, refobj); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } else { def = ref; defobj = refobj; } /* * If we found no definition and the reference is weak, treat the * symbol as having the value zero. */ if (def == NULL && ELF_ST_BIND(ref->st_info) == STB_WEAK) { def = &sym_zero; defobj = obj_main; } if (def != NULL) { *defobj_out = defobj; /* Record the information in the cache to avoid subsequent lookups. */ if (cache != NULL) { cache[symnum].sym = def; cache[symnum].obj = defobj; } } else { if (refobj != &obj_rtld) _rtld_error("%s: Undefined symbol \"%s\"", refobj->path, name); } return def; } /* * Return the search path from the ldconfig hints file, reading it if * necessary. If nostdlib is true, then the default search paths are * not added to result. * * Returns NULL if there are problems with the hints file, * or if the search path there is empty. */ static const char * gethints(bool nostdlib) { static char *hints, *filtered_path; struct elfhints_hdr hdr; struct fill_search_info_args sargs, hargs; struct dl_serinfo smeta, hmeta, *SLPinfo, *hintinfo; struct dl_serpath *SLPpath, *hintpath; char *p; unsigned int SLPndx, hintndx, fndx, fcount; int fd; size_t flen; bool skip; /* First call, read the hints file */ if (hints == NULL) { /* Keep from trying again in case the hints file is bad. */ hints = ""; if ((fd = open(ld_elf_hints_path, O_RDONLY | O_CLOEXEC)) == -1) return (NULL); if (read(fd, &hdr, sizeof hdr) != sizeof hdr || hdr.magic != ELFHINTS_MAGIC || hdr.version != 1) { close(fd); return (NULL); } p = xmalloc(hdr.dirlistlen + 1); if (lseek(fd, hdr.strtab + hdr.dirlist, SEEK_SET) == -1 || read(fd, p, hdr.dirlistlen + 1) != (ssize_t)hdr.dirlistlen + 1) { free(p); close(fd); return (NULL); } hints = p; close(fd); } /* * If caller agreed to receive list which includes the default * paths, we are done. Otherwise, if we still did not * calculated filtered result, do it now. */ if (!nostdlib) return (hints[0] != '\0' ? hints : NULL); if (filtered_path != NULL) goto filt_ret; /* * Obtain the list of all configured search paths, and the * list of the default paths. * * First estimate the size of the results. */ smeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath); smeta.dls_cnt = 0; hmeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath); hmeta.dls_cnt = 0; sargs.request = RTLD_DI_SERINFOSIZE; sargs.serinfo = &smeta; hargs.request = RTLD_DI_SERINFOSIZE; hargs.serinfo = &hmeta; path_enumerate(ld_standard_library_path, fill_search_info, &sargs); path_enumerate(p, fill_search_info, &hargs); SLPinfo = xmalloc(smeta.dls_size); hintinfo = xmalloc(hmeta.dls_size); /* * Next fetch both sets of paths. */ sargs.request = RTLD_DI_SERINFO; sargs.serinfo = SLPinfo; sargs.serpath = &SLPinfo->dls_serpath[0]; sargs.strspace = (char *)&SLPinfo->dls_serpath[smeta.dls_cnt]; hargs.request = RTLD_DI_SERINFO; hargs.serinfo = hintinfo; hargs.serpath = &hintinfo->dls_serpath[0]; hargs.strspace = (char *)&hintinfo->dls_serpath[hmeta.dls_cnt]; path_enumerate(ld_standard_library_path, fill_search_info, &sargs); path_enumerate(p, fill_search_info, &hargs); /* * Now calculate the difference between two sets, by excluding * standard paths from the full set. */ fndx = 0; fcount = 0; filtered_path = xmalloc(hdr.dirlistlen + 1); hintpath = &hintinfo->dls_serpath[0]; for (hintndx = 0; hintndx < hmeta.dls_cnt; hintndx++, hintpath++) { skip = false; SLPpath = &SLPinfo->dls_serpath[0]; /* * Check each standard path against current. */ for (SLPndx = 0; SLPndx < smeta.dls_cnt; SLPndx++, SLPpath++) { /* matched, skip the path */ if (!strcmp(hintpath->dls_name, SLPpath->dls_name)) { skip = true; break; } } if (skip) continue; /* * Not matched against any standard path, add the path * to result. Separate consequtive paths with ':'. */ if (fcount > 0) { filtered_path[fndx] = ':'; fndx++; } fcount++; flen = strlen(hintpath->dls_name); strncpy((filtered_path + fndx), hintpath->dls_name, flen); fndx += flen; } filtered_path[fndx] = '\0'; free(SLPinfo); free(hintinfo); filt_ret: return (filtered_path[0] != '\0' ? filtered_path : NULL); } static void init_dag(Obj_Entry *root) { const Needed_Entry *needed; const Objlist_Entry *elm; DoneList donelist; if (root->dag_inited) return; donelist_init(&donelist); /* Root object belongs to own DAG. */ objlist_push_tail(&root->dldags, root); objlist_push_tail(&root->dagmembers, root); donelist_check(&donelist, root); /* * Add dependencies of root object to DAG in breadth order * by exploiting the fact that each new object get added * to the tail of the dagmembers list. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { for (needed = elm->obj->needed; needed != NULL; needed = needed->next) { if (needed->obj == NULL || donelist_check(&donelist, needed->obj)) continue; objlist_push_tail(&needed->obj->dldags, root); objlist_push_tail(&root->dagmembers, needed->obj); } } root->dag_inited = true; } static void process_z(Obj_Entry *root) { const Objlist_Entry *elm; Obj_Entry *obj; /* * Walk over object DAG and process every dependent object * that is marked as DF_1_NODELETE or DF_1_GLOBAL. They need * to grow their own DAG. * * For DF_1_GLOBAL, DAG is required for symbol lookups in * symlook_global() to work. * * For DF_1_NODELETE, the DAG should have its reference upped. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { obj = elm->obj; if (obj == NULL) continue; if (obj->z_nodelete && !obj->ref_nodel) { dbg("obj %s -z nodelete", obj->path); init_dag(obj); ref_dag(obj); obj->ref_nodel = true; } if (obj->z_global && objlist_find(&list_global, obj) == NULL) { dbg("obj %s -z global", obj->path); objlist_push_tail(&list_global, obj); init_dag(obj); } } } /* * Initialize the dynamic linker. The argument is the address at which * the dynamic linker has been mapped into memory. The primary task of * this function is to relocate the dynamic linker. */ static void init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info) { Obj_Entry objtmp; /* Temporary rtld object */ const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; #ifdef RTLD_INIT_PAGESIZES_EARLY /* The page size is required by the dynamic memory allocator. */ init_pagesizes(aux_info); #endif /* * Conjure up an Obj_Entry structure for the dynamic linker. * * The "path" member can't be initialized yet because string constants * cannot yet be accessed. Below we will set it correctly. */ memset(&objtmp, 0, sizeof(objtmp)); objtmp.path = NULL; objtmp.rtld = true; objtmp.mapbase = mapbase; #ifdef PIC objtmp.relocbase = mapbase; #endif if (RTLD_IS_DYNAMIC()) { objtmp.dynamic = rtld_dynamic(&objtmp); digest_dynamic1(&objtmp, 1, &dyn_rpath, &dyn_soname, &dyn_runpath); assert(objtmp.needed == NULL); #if !defined(__mips__) /* MIPS has a bogus DT_TEXTREL. */ assert(!objtmp.textrel); #endif /* * Temporarily put the dynamic linker entry into the object list, so * that symbols can be found. */ relocate_objects(&objtmp, true, &objtmp, 0, NULL); } /* Initialize the object list. */ obj_tail = &obj_list; /* Now that non-local variables can be accesses, copy out obj_rtld. */ memcpy(&obj_rtld, &objtmp, sizeof(obj_rtld)); #ifndef RTLD_INIT_PAGESIZES_EARLY /* The page size is required by the dynamic memory allocator. */ init_pagesizes(aux_info); #endif if (aux_info[AT_OSRELDATE] != NULL) osreldate = aux_info[AT_OSRELDATE]->a_un.a_val; digest_dynamic2(&obj_rtld, dyn_rpath, dyn_soname, dyn_runpath); /* Replace the path with a dynamically allocated copy. */ obj_rtld.path = xstrdup(ld_path_rtld); r_debug.r_brk = r_debug_state; r_debug.r_state = RT_CONSISTENT; } /* * Retrieve the array of supported page sizes. The kernel provides the page * sizes in increasing order. */ static void init_pagesizes(Elf_Auxinfo **aux_info) { static size_t psa[MAXPAGESIZES]; int mib[2]; size_t len, size; if (aux_info[AT_PAGESIZES] != NULL && aux_info[AT_PAGESIZESLEN] != NULL) { size = aux_info[AT_PAGESIZESLEN]->a_un.a_val; pagesizes = aux_info[AT_PAGESIZES]->a_un.a_ptr; } else { len = 2; if (sysctlnametomib("hw.pagesizes", mib, &len) == 0) size = sizeof(psa); else { /* As a fallback, retrieve the base page size. */ size = sizeof(psa[0]); if (aux_info[AT_PAGESZ] != NULL) { psa[0] = aux_info[AT_PAGESZ]->a_un.a_val; goto psa_filled; } else { mib[0] = CTL_HW; mib[1] = HW_PAGESIZE; len = 2; } } if (sysctl(mib, len, psa, &size, NULL, 0) == -1) { _rtld_error("sysctl for hw.pagesize(s) failed"); rtld_die(); } psa_filled: pagesizes = psa; } npagesizes = size / sizeof(pagesizes[0]); /* Discard any invalid entries at the end of the array. */ while (npagesizes > 0 && pagesizes[npagesizes - 1] == 0) npagesizes--; } /* * Add the init functions from a needed object list (and its recursive * needed objects) to "list". This is not used directly; it is a helper * function for initlist_add_objects(). The write lock must be held * when this function is called. */ static void initlist_add_neededs(Needed_Entry *needed, Objlist *list) { /* Recursively process the successor needed objects. */ if (needed->next != NULL) initlist_add_neededs(needed->next, list); /* Process the current needed object. */ if (needed->obj != NULL) initlist_add_objects(needed->obj, &needed->obj->next, list); } /* * Scan all of the DAGs rooted in the range of objects from "obj" to * "tail" and add their init functions to "list". This recurses over * the DAGs and ensure the proper init ordering such that each object's * needed libraries are initialized before the object itself. At the * same time, this function adds the objects to the global finalization * list "list_fini" in the opposite order. The write lock must be * held when this function is called. */ static void initlist_add_objects(Obj_Entry *obj, Obj_Entry **tail, Objlist *list) { if (obj->init_scanned || obj->init_done) return; obj->init_scanned = true; /* Recursively process the successor objects. */ if (&obj->next != tail) initlist_add_objects(obj->next, tail, list); /* Recursively process the needed objects. */ if (obj->needed != NULL) initlist_add_neededs(obj->needed, list); if (obj->needed_filtees != NULL) initlist_add_neededs(obj->needed_filtees, list); if (obj->needed_aux_filtees != NULL) initlist_add_neededs(obj->needed_aux_filtees, list); /* Add the object to the init list. */ if (obj->preinit_array != (Elf_Addr)NULL || obj->init != (Elf_Addr)NULL || obj->init_array != (Elf_Addr)NULL) objlist_push_tail(list, obj); /* Add the object to the global fini list in the reverse order. */ if ((obj->fini != (Elf_Addr)NULL || obj->fini_array != (Elf_Addr)NULL) && !obj->on_fini_list) { objlist_push_head(&list_fini, obj); obj->on_fini_list = true; } } #ifndef FPTR_TARGET #define FPTR_TARGET(f) ((Elf_Addr) (f)) #endif static void free_needed_filtees(Needed_Entry *n) { Needed_Entry *needed, *needed1; for (needed = n; needed != NULL; needed = needed->next) { if (needed->obj != NULL) { dlclose(needed->obj); needed->obj = NULL; } } for (needed = n; needed != NULL; needed = needed1) { needed1 = needed->next; free(needed); } } static void unload_filtees(Obj_Entry *obj) { free_needed_filtees(obj->needed_filtees); obj->needed_filtees = NULL; free_needed_filtees(obj->needed_aux_filtees); obj->needed_aux_filtees = NULL; obj->filtees_loaded = false; } static void load_filtee1(Obj_Entry *obj, Needed_Entry *needed, int flags, RtldLockState *lockstate) { for (; needed != NULL; needed = needed->next) { needed->obj = dlopen_object(obj->strtab + needed->name, -1, obj, flags, ((ld_loadfltr || obj->z_loadfltr) ? RTLD_NOW : RTLD_LAZY) | RTLD_LOCAL, lockstate); } } static void load_filtees(Obj_Entry *obj, int flags, RtldLockState *lockstate) { lock_restart_for_upgrade(lockstate); if (!obj->filtees_loaded) { load_filtee1(obj, obj->needed_filtees, flags, lockstate); load_filtee1(obj, obj->needed_aux_filtees, flags, lockstate); obj->filtees_loaded = true; } } static int process_needed(Obj_Entry *obj, Needed_Entry *needed, int flags) { Obj_Entry *obj1; for (; needed != NULL; needed = needed->next) { obj1 = needed->obj = load_object(obj->strtab + needed->name, -1, obj, flags & ~RTLD_LO_NOLOAD); if (obj1 == NULL && !ld_tracing && (flags & RTLD_LO_FILTEES) == 0) return (-1); } return (0); } /* * Given a shared object, traverse its list of needed objects, and load * each of them. Returns 0 on success. Generates an error message and * returns -1 on failure. */ static int load_needed_objects(Obj_Entry *first, int flags) { Obj_Entry *obj; for (obj = first; obj != NULL; obj = obj->next) { if (process_needed(obj, obj->needed, flags) == -1) return (-1); } return (0); } static int load_preload_objects(void) { char *p = ld_preload; Obj_Entry *obj; static const char delim[] = " \t:;"; if (p == NULL) return 0; p += strspn(p, delim); while (*p != '\0') { size_t len = strcspn(p, delim); char savech; savech = p[len]; p[len] = '\0'; obj = load_object(p, -1, NULL, 0); if (obj == NULL) return -1; /* XXX - cleanup */ obj->z_interpose = true; p[len] = savech; p += len; p += strspn(p, delim); } LD_UTRACE(UTRACE_PRELOAD_FINISHED, NULL, NULL, 0, 0, NULL); return 0; } static const char * printable_path(const char *path) { return (path == NULL ? "" : path); } /* * Load a shared object into memory, if it is not already loaded. The * object may be specified by name or by user-supplied file descriptor * fd_u. In the later case, the fd_u descriptor is not closed, but its * duplicate is. * * Returns a pointer to the Obj_Entry for the object. Returns NULL * on failure. */ static Obj_Entry * load_object(const char *name, int fd_u, const Obj_Entry *refobj, int flags) { Obj_Entry *obj; int fd; struct stat sb; char *path; fd = -1; if (name != NULL) { for (obj = obj_list->next; obj != NULL; obj = obj->next) { if (object_match_name(obj, name)) return (obj); } path = find_library(name, refobj, &fd); if (path == NULL) return (NULL); } else path = NULL; if (fd >= 0) { /* * search_library_pathfds() opens a fresh file descriptor for the * library, so there is no need to dup(). */ } else if (fd_u == -1) { /* * If we didn't find a match by pathname, or the name is not * supplied, open the file and check again by device and inode. * This avoids false mismatches caused by multiple links or ".." * in pathnames. * * To avoid a race, we open the file and use fstat() rather than * using stat(). */ if ((fd = open(path, O_RDONLY | O_CLOEXEC | O_VERIFY)) == -1) { _rtld_error("Cannot open \"%s\"", path); free(path); return (NULL); } } else { fd = fcntl(fd_u, F_DUPFD_CLOEXEC, 0); if (fd == -1) { _rtld_error("Cannot dup fd"); free(path); return (NULL); } } if (fstat(fd, &sb) == -1) { _rtld_error("Cannot fstat \"%s\"", printable_path(path)); close(fd); free(path); return NULL; } for (obj = obj_list->next; obj != NULL; obj = obj->next) if (obj->ino == sb.st_ino && obj->dev == sb.st_dev) break; if (obj != NULL && name != NULL) { object_add_name(obj, name); free(path); close(fd); return obj; } if (flags & RTLD_LO_NOLOAD) { free(path); close(fd); return (NULL); } /* First use of this object, so we must map it in */ obj = do_load_object(fd, name, path, &sb, flags); if (obj == NULL) free(path); close(fd); return obj; } static Obj_Entry * do_load_object(int fd, const char *name, char *path, struct stat *sbp, int flags) { Obj_Entry *obj; struct statfs fs; /* * but first, make sure that environment variables haven't been * used to circumvent the noexec flag on a filesystem. */ if (dangerous_ld_env) { if (fstatfs(fd, &fs) != 0) { _rtld_error("Cannot fstatfs \"%s\"", printable_path(path)); return NULL; } if (fs.f_flags & MNT_NOEXEC) { _rtld_error("Cannot execute objects on %s\n", fs.f_mntonname); return NULL; } } dbg("loading \"%s\"", printable_path(path)); obj = map_object(fd, printable_path(path), sbp); if (obj == NULL) return NULL; /* * If DT_SONAME is present in the object, digest_dynamic2 already * added it to the object names. */ if (name != NULL) object_add_name(obj, name); obj->path = path; digest_dynamic(obj, 0); dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj->path, obj->valid_hash_sysv, obj->valid_hash_gnu, obj->dynsymcount); if (obj->z_noopen && (flags & (RTLD_LO_DLOPEN | RTLD_LO_TRACE)) == RTLD_LO_DLOPEN) { dbg("refusing to load non-loadable \"%s\"", obj->path); _rtld_error("Cannot dlopen non-loadable %s", obj->path); munmap(obj->mapbase, obj->mapsize); obj_free(obj); return (NULL); } obj->dlopened = (flags & RTLD_LO_DLOPEN) != 0; *obj_tail = obj; obj_tail = &obj->next; obj_count++; obj_loads++; linkmap_add(obj); /* for GDB & dlinfo() */ max_stack_flags |= obj->stack_flags; dbg(" %p .. %p: %s", obj->mapbase, obj->mapbase + obj->mapsize - 1, obj->path); if (obj->textrel) dbg(" WARNING: %s has impure text", obj->path); LD_UTRACE(UTRACE_LOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0, obj->path); return obj; } static Obj_Entry * obj_from_addr(const void *addr) { Obj_Entry *obj; for (obj = obj_list; obj != NULL; obj = obj->next) { if (addr < (void *) obj->mapbase) continue; if (addr < (void *) (obj->mapbase + obj->mapsize)) return obj; } return NULL; } static void preinit_main(void) { Elf_Addr *preinit_addr; int index; preinit_addr = (Elf_Addr *)obj_main->preinit_array; if (preinit_addr == NULL) return; for (index = 0; index < obj_main->preinit_array_num; index++) { if (preinit_addr[index] != 0 && preinit_addr[index] != 1) { dbg("calling preinit function for %s at %p", obj_main->path, (void *)preinit_addr[index]); LD_UTRACE(UTRACE_INIT_CALL, obj_main, (void *)preinit_addr[index], 0, 0, obj_main->path); call_init_pointer(obj_main, preinit_addr[index]); } } } /* * Call the finalization functions for each of the objects in "list" * belonging to the DAG of "root" and referenced once. If NULL "root" * is specified, every finalization function will be called regardless * of the reference count and the list elements won't be freed. All of * the objects are expected to have non-NULL fini functions. */ static void objlist_call_fini(Objlist *list, Obj_Entry *root, RtldLockState *lockstate) { Objlist_Entry *elm; char *saved_msg; Elf_Addr *fini_addr; int index; assert(root == NULL || root->refcount == 1); /* * Preserve the current error message since a fini function might * call into the dynamic linker and overwrite it. */ saved_msg = errmsg_save(); do { STAILQ_FOREACH(elm, list, link) { if (root != NULL && (elm->obj->refcount != 1 || objlist_find(&root->dagmembers, elm->obj) == NULL)) continue; /* Remove object from fini list to prevent recursive invocation. */ STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link); /* * XXX: If a dlopen() call references an object while the * fini function is in progress, we might end up trying to * unload the referenced object in dlclose() or the object * won't be unloaded although its fini function has been * called. */ lock_release(rtld_bind_lock, lockstate); /* * It is legal to have both DT_FINI and DT_FINI_ARRAY defined. * When this happens, DT_FINI_ARRAY is processed first. */ fini_addr = (Elf_Addr *)elm->obj->fini_array; if (fini_addr != NULL && elm->obj->fini_array_num > 0) { for (index = elm->obj->fini_array_num - 1; index >= 0; index--) { if (fini_addr[index] != 0 && fini_addr[index] != 1) { dbg("calling fini function for %s at %p", elm->obj->path, (void *)fini_addr[index]); LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)fini_addr[index], 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, fini_addr[index]); } } } if (elm->obj->fini != (Elf_Addr)NULL) { dbg("calling fini function for %s at %p", elm->obj->path, (void *)elm->obj->fini); LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)elm->obj->fini, 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, elm->obj->fini); } wlock_acquire(rtld_bind_lock, lockstate); /* No need to free anything if process is going down. */ if (root != NULL) free(elm); /* * We must restart the list traversal after every fini call * because a dlclose() call from the fini function or from * another thread might have modified the reference counts. */ break; } } while (elm != NULL); errmsg_restore(saved_msg); } /* * Call the initialization functions for each of the objects in * "list". All of the objects are expected to have non-NULL init * functions. */ static void objlist_call_init(Objlist *list, RtldLockState *lockstate) { Objlist_Entry *elm; Obj_Entry *obj; char *saved_msg; Elf_Addr *init_addr; int index; /* * Clean init_scanned flag so that objects can be rechecked and * possibly initialized earlier if any of vectors called below * cause the change by using dlopen. */ for (obj = obj_list; obj != NULL; obj = obj->next) obj->init_scanned = false; /* * Preserve the current error message since an init function might * call into the dynamic linker and overwrite it. */ saved_msg = errmsg_save(); STAILQ_FOREACH(elm, list, link) { if (elm->obj->init_done) /* Initialized early. */ continue; /* * Race: other thread might try to use this object before current * one completes the initilization. Not much can be done here * without better locking. */ elm->obj->init_done = true; lock_release(rtld_bind_lock, lockstate); /* * It is legal to have both DT_INIT and DT_INIT_ARRAY defined. * When this happens, DT_INIT is processed first. */ if (elm->obj->init != (Elf_Addr)NULL) { dbg("calling init function for %s at %p", elm->obj->path, (void *)elm->obj->init); LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)elm->obj->init, 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, elm->obj->init); } init_addr = (Elf_Addr *)elm->obj->init_array; if (init_addr != NULL) { for (index = 0; index < elm->obj->init_array_num; index++) { if (init_addr[index] != 0 && init_addr[index] != 1) { dbg("calling init function for %s at %p", elm->obj->path, (void *)init_addr[index]); LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)init_addr[index], 0, 0, elm->obj->path); call_init_pointer(elm->obj, init_addr[index]); } } } wlock_acquire(rtld_bind_lock, lockstate); } errmsg_restore(saved_msg); } static void objlist_clear(Objlist *list) { Objlist_Entry *elm; while (!STAILQ_EMPTY(list)) { elm = STAILQ_FIRST(list); STAILQ_REMOVE_HEAD(list, link); free(elm); } } static Objlist_Entry * objlist_find(Objlist *list, const Obj_Entry *obj) { Objlist_Entry *elm; STAILQ_FOREACH(elm, list, link) if (elm->obj == obj) return elm; return NULL; } static void objlist_init(Objlist *list) { STAILQ_INIT(list); } static void objlist_push_head(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; elm = NEW(Objlist_Entry); elm->obj = obj; STAILQ_INSERT_HEAD(list, elm, link); } static void objlist_push_tail(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; elm = NEW(Objlist_Entry); elm->obj = obj; STAILQ_INSERT_TAIL(list, elm, link); } static void objlist_put_after(Objlist *list, Obj_Entry *listobj, Obj_Entry *obj) { Objlist_Entry *elm, *listelm; STAILQ_FOREACH(listelm, list, link) { if (listelm->obj == listobj) break; } elm = NEW(Objlist_Entry); elm->obj = obj; if (listelm != NULL) STAILQ_INSERT_AFTER(list, listelm, elm, link); else STAILQ_INSERT_TAIL(list, elm, link); } static void objlist_remove(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; if ((elm = objlist_find(list, obj)) != NULL) { STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link); free(elm); } } /* * Relocate dag rooted in the specified object. * Returns 0 on success, or -1 on failure. */ static int relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { Objlist_Entry *elm; int error; error = 0; STAILQ_FOREACH(elm, &root->dagmembers, link) { error = relocate_object(elm->obj, bind_now, rtldobj, flags, lockstate); if (error == -1) break; } return (error); } /* * Relocate single object. * Returns 0 on success, or -1 on failure. */ static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { if (obj->relocated) return (0); obj->relocated = true; if (obj != rtldobj) dbg("relocating \"%s\"", obj->path); if (obj->symtab == NULL || obj->strtab == NULL || !(obj->valid_hash_sysv || obj->valid_hash_gnu)) { _rtld_error("%s: Shared object has no run-time symbol table", obj->path); return (-1); } if (obj->textrel) { /* There are relocations to the write-protected text segment. */ if (mprotect(obj->mapbase, obj->textsize, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) { _rtld_error("%s: Cannot write-enable text segment: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* Process the non-PLT non-IFUNC relocations. */ if (reloc_non_plt(obj, rtldobj, flags, lockstate)) return (-1); if (obj->textrel) { /* Re-protected the text segment. */ if (mprotect(obj->mapbase, obj->textsize, PROT_READ|PROT_EXEC) == -1) { _rtld_error("%s: Cannot write-protect text segment: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* Set the special PLT or GOT entries. */ init_pltgot(obj); /* Process the PLT relocations. */ if (reloc_plt(obj) == -1) return (-1); /* Relocate the jump slots if we are doing immediate binding. */ if (obj->bind_now || bind_now) if (reloc_jmpslots(obj, flags, lockstate) == -1) return (-1); /* * Process the non-PLT IFUNC relocations. The relocations are * processed in two phases, because IFUNC resolvers may * reference other symbols, which must be readily processed * before resolvers are called. */ if (obj->non_plt_gnu_ifunc && reloc_non_plt(obj, rtldobj, flags | SYMLOOK_IFUNC, lockstate)) return (-1); if (obj->relro_size > 0) { if (mprotect(obj->relro_page, obj->relro_size, PROT_READ) == -1) { _rtld_error("%s: Cannot enforce relro protection: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* * Set up the magic number and version in the Obj_Entry. These * were checked in the crt1.o from the original ElfKit, so we * set them for backward compatibility. */ obj->magic = RTLD_MAGIC; obj->version = RTLD_VERSION; return (0); } /* * Relocate newly-loaded shared objects. The argument is a pointer to * the Obj_Entry for the first such object. All objects from the first * to the end of the list of objects are relocated. Returns 0 on success, * or -1 on failure. */ static int relocate_objects(Obj_Entry *first, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { Obj_Entry *obj; int error; for (error = 0, obj = first; obj != NULL; obj = obj->next) { error = relocate_object(obj, bind_now, rtldobj, flags, lockstate); if (error == -1) break; } return (error); } /* * The handling of R_MACHINE_IRELATIVE relocations and jumpslots * referencing STT_GNU_IFUNC symbols is postponed till the other * relocations are done. The indirect functions specified as * ifunc are allowed to call other symbols, so we need to have * objects relocated before asking for resolution from indirects. * * The R_MACHINE_IRELATIVE slots are resolved in greedy fashion, * instead of the usual lazy handling of PLT slots. It is * consistent with how GNU does it. */ static int resolve_object_ifunc(Obj_Entry *obj, bool bind_now, int flags, RtldLockState *lockstate) { if (obj->irelative && reloc_iresolve(obj, lockstate) == -1) return (-1); if ((obj->bind_now || bind_now) && obj->gnu_ifunc && reloc_gnu_ifunc(obj, flags, lockstate) == -1) return (-1); return (0); } static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, RtldLockState *lockstate) { Obj_Entry *obj; for (obj = first; obj != NULL; obj = obj->next) { if (resolve_object_ifunc(obj, bind_now, flags, lockstate) == -1) return (-1); } return (0); } static int initlist_objects_ifunc(Objlist *list, bool bind_now, int flags, RtldLockState *lockstate) { Objlist_Entry *elm; STAILQ_FOREACH(elm, list, link) { if (resolve_object_ifunc(elm->obj, bind_now, flags, lockstate) == -1) return (-1); } return (0); } /* * Cleanup procedure. It will be called (by the atexit mechanism) just * before the process exits. */ static void rtld_exit(void) { RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); dbg("rtld_exit()"); objlist_call_fini(&list_fini, NULL, &lockstate); /* No need to remove the items from the list, since we are exiting. */ if (!libmap_disable) lm_fini(); lock_release(rtld_bind_lock, &lockstate); } /* * Iterate over a search path, translate each element, and invoke the * callback on the result. */ static void * path_enumerate(const char *path, path_enum_proc callback, void *arg) { const char *trans; if (path == NULL) return (NULL); path += strspn(path, ":;"); while (*path != '\0') { size_t len; char *res; len = strcspn(path, ":;"); trans = lm_findn(NULL, path, len); if (trans) res = callback(trans, strlen(trans), arg); else res = callback(path, len, arg); if (res != NULL) return (res); path += len; path += strspn(path, ":;"); } return (NULL); } struct try_library_args { const char *name; size_t namelen; char *buffer; size_t buflen; }; static void * try_library_path(const char *dir, size_t dirlen, void *param) { struct try_library_args *arg; arg = param; if (*dir == '/' || trust) { char *pathname; if (dirlen + 1 + arg->namelen + 1 > arg->buflen) return (NULL); pathname = arg->buffer; strncpy(pathname, dir, dirlen); pathname[dirlen] = '/'; strcpy(pathname + dirlen + 1, arg->name); dbg(" Trying \"%s\"", pathname); if (access(pathname, F_OK) == 0) { /* We found it */ pathname = xmalloc(dirlen + 1 + arg->namelen + 1); strcpy(pathname, arg->buffer); return (pathname); } } return (NULL); } static char * search_library_path(const char *name, const char *path) { char *p; struct try_library_args arg; if (path == NULL) return NULL; arg.name = name; arg.namelen = strlen(name); arg.buffer = xmalloc(PATH_MAX); arg.buflen = PATH_MAX; p = path_enumerate(path, try_library_path, &arg); free(arg.buffer); return (p); } /* * Finds the library with the given name using the directory descriptors * listed in the LD_LIBRARY_PATH_FDS environment variable. * * Returns a freshly-opened close-on-exec file descriptor for the library, * or -1 if the library cannot be found. */ static char * search_library_pathfds(const char *name, const char *path, int *fdp) { char *envcopy, *fdstr, *found, *last_token; size_t len; int dirfd, fd; dbg("%s('%s', '%s', fdp)", __func__, name, path); /* Don't load from user-specified libdirs into setuid binaries. */ if (!trust) return (NULL); /* We can't do anything if LD_LIBRARY_PATH_FDS isn't set. */ if (path == NULL) return (NULL); /* LD_LIBRARY_PATH_FDS only works with relative paths. */ if (name[0] == '/') { dbg("Absolute path (%s) passed to %s", name, __func__); return (NULL); } /* * Use strtok_r() to walk the FD:FD:FD list. This requires a local * copy of the path, as strtok_r rewrites separator tokens * with '\0'. */ found = NULL; envcopy = xstrdup(path); for (fdstr = strtok_r(envcopy, ":", &last_token); fdstr != NULL; fdstr = strtok_r(NULL, ":", &last_token)) { dirfd = parse_libdir(fdstr); if (dirfd < 0) break; fd = __sys_openat(dirfd, name, O_RDONLY | O_CLOEXEC | O_VERIFY); if (fd >= 0) { *fdp = fd; len = strlen(fdstr) + strlen(name) + 3; found = xmalloc(len); if (rtld_snprintf(found, len, "#%d/%s", dirfd, name) < 0) { _rtld_error("error generating '%d/%s'", dirfd, name); rtld_die(); } dbg("open('%s') => %d", found, fd); break; } } free(envcopy); return (found); } int dlclose(void *handle) { Obj_Entry *root; RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); root = dlcheck(handle); if (root == NULL) { lock_release(rtld_bind_lock, &lockstate); return -1; } LD_UTRACE(UTRACE_DLCLOSE_START, handle, NULL, 0, root->dl_refcount, root->path); /* Unreference the object and its dependencies. */ root->dl_refcount--; if (root->refcount == 1) { /* * The object will be no longer referenced, so we must unload it. * First, call the fini functions. */ objlist_call_fini(&list_fini, root, &lockstate); unref_dag(root); /* Finish cleaning up the newly-unreferenced objects. */ GDB_STATE(RT_DELETE,&root->linkmap); unload_object(root); GDB_STATE(RT_CONSISTENT,NULL); } else unref_dag(root); LD_UTRACE(UTRACE_DLCLOSE_STOP, handle, NULL, 0, 0, NULL); lock_release(rtld_bind_lock, &lockstate); return 0; } char * dlerror(void) { char *msg = error_message; error_message = NULL; return msg; } /* * This function is deprecated and has no effect. */ void dllockinit(void *context, void *(*lock_create)(void *context), void (*rlock_acquire)(void *lock), void (*wlock_acquire)(void *lock), void (*lock_release)(void *lock), void (*lock_destroy)(void *lock), void (*context_destroy)(void *context)) { static void *cur_context; static void (*cur_context_destroy)(void *); /* Just destroy the context from the previous call, if necessary. */ if (cur_context_destroy != NULL) cur_context_destroy(cur_context); cur_context = context; cur_context_destroy = context_destroy; } void * dlopen(const char *name, int mode) { return (rtld_dlopen(name, -1, mode)); } void * fdlopen(int fd, int mode) { return (rtld_dlopen(NULL, fd, mode)); } static void * rtld_dlopen(const char *name, int fd, int mode) { RtldLockState lockstate; int lo_flags; LD_UTRACE(UTRACE_DLOPEN_START, NULL, NULL, 0, mode, name); ld_tracing = (mode & RTLD_TRACE) == 0 ? NULL : "1"; if (ld_tracing != NULL) { rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); environ = (char **)*get_program_var_addr("environ", &lockstate); lock_release(rtld_bind_lock, &lockstate); } lo_flags = RTLD_LO_DLOPEN; if (mode & RTLD_NODELETE) lo_flags |= RTLD_LO_NODELETE; if (mode & RTLD_NOLOAD) lo_flags |= RTLD_LO_NOLOAD; if (ld_tracing != NULL) lo_flags |= RTLD_LO_TRACE; return (dlopen_object(name, fd, obj_main, lo_flags, mode & (RTLD_MODEMASK | RTLD_GLOBAL), NULL)); } static void dlopen_cleanup(Obj_Entry *obj) { obj->dl_refcount--; unref_dag(obj); if (obj->refcount == 0) unload_object(obj); } static Obj_Entry * dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, int mode, RtldLockState *lockstate) { Obj_Entry **old_obj_tail; Obj_Entry *obj; Objlist initlist; RtldLockState mlockstate; int result; objlist_init(&initlist); if (lockstate == NULL && !(lo_flags & RTLD_LO_EARLY)) { wlock_acquire(rtld_bind_lock, &mlockstate); lockstate = &mlockstate; } GDB_STATE(RT_ADD,NULL); old_obj_tail = obj_tail; obj = NULL; if (name == NULL && fd == -1) { obj = obj_main; obj->refcount++; } else { obj = load_object(name, fd, refobj, lo_flags); } if (obj) { obj->dl_refcount++; if (mode & RTLD_GLOBAL && objlist_find(&list_global, obj) == NULL) objlist_push_tail(&list_global, obj); if (*old_obj_tail != NULL) { /* We loaded something new. */ assert(*old_obj_tail == obj); result = load_needed_objects(obj, lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY)); init_dag(obj); ref_dag(obj); if (result != -1) result = rtld_verify_versions(&obj->dagmembers); if (result != -1 && ld_tracing) goto trace; if (result == -1 || relocate_object_dag(obj, (mode & RTLD_MODEMASK) == RTLD_NOW, &obj_rtld, (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0, lockstate) == -1) { dlopen_cleanup(obj); obj = NULL; } else if (lo_flags & RTLD_LO_EARLY) { /* * Do not call the init functions for early loaded * filtees. The image is still not initialized enough * for them to work. * * Our object is found by the global object list and * will be ordered among all init calls done right * before transferring control to main. */ } else { /* Make list of init functions to call. */ initlist_add_objects(obj, &obj->next, &initlist); } /* * Process all no_delete or global objects here, given * them own DAGs to prevent their dependencies from being * unloaded. This has to be done after we have loaded all * of the dependencies, so that we do not miss any. */ if (obj != NULL) process_z(obj); } else { /* * Bump the reference counts for objects on this DAG. If * this is the first dlopen() call for the object that was * already loaded as a dependency, initialize the dag * starting at it. */ init_dag(obj); ref_dag(obj); if ((lo_flags & RTLD_LO_TRACE) != 0) goto trace; } if (obj != NULL && ((lo_flags & RTLD_LO_NODELETE) != 0 || obj->z_nodelete) && !obj->ref_nodel) { dbg("obj %s nodelete", obj->path); ref_dag(obj); obj->z_nodelete = obj->ref_nodel = true; } } LD_UTRACE(UTRACE_DLOPEN_STOP, obj, NULL, 0, obj ? obj->dl_refcount : 0, name); GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL); if (!(lo_flags & RTLD_LO_EARLY)) { map_stacks_exec(lockstate); } if (initlist_objects_ifunc(&initlist, (mode & RTLD_MODEMASK) == RTLD_NOW, (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0, lockstate) == -1) { objlist_clear(&initlist); dlopen_cleanup(obj); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); return (NULL); } if (!(lo_flags & RTLD_LO_EARLY)) { /* Call the init functions. */ objlist_call_init(&initlist, lockstate); } objlist_clear(&initlist); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); return obj; trace: trace_loaded_objects(obj); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); exit(0); } static void * do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve, int flags) { DoneList donelist; const Obj_Entry *obj, *defobj; const Elf_Sym *def; SymLook req; RtldLockState lockstate; tls_index ti; void *sym; int res; def = NULL; defobj = NULL; symlook_init(&req, name); req.ventry = ve; req.flags = flags | SYMLOOK_IN_PLT; req.lockstate = &lockstate; LD_UTRACE(UTRACE_DLSYM_START, handle, NULL, 0, 0, name); rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); if (handle == NULL || handle == RTLD_NEXT || handle == RTLD_DEFAULT || handle == RTLD_SELF) { if ((obj = obj_from_addr(retaddr)) == NULL) { _rtld_error("Cannot determine caller's shared object"); lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } if (handle == NULL) { /* Just the caller's shared object. */ res = symlook_obj(&req, obj); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } else if (handle == RTLD_NEXT || /* Objects after caller's */ handle == RTLD_SELF) { /* ... caller included */ if (handle == RTLD_NEXT) obj = obj->next; for (; obj != NULL; obj = obj->next) { res = symlook_obj(&req, obj); if (res == 0) { if (def == NULL || ELF_ST_BIND(req.sym_out->st_info) != STB_WEAK) { def = req.sym_out; defobj = req.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) { res = symlook_obj(&req, &obj_rtld); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } else { assert(handle == RTLD_DEFAULT); res = symlook_default(&req, obj); if (res == 0) { defobj = req.defobj_out; def = req.sym_out; } } } else { if ((obj = dlcheck(handle)) == NULL) { lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } donelist_init(&donelist); if (obj->mainprog) { /* Handle obtained by dlopen(NULL, ...) implies global scope. */ res = symlook_global(&req, &donelist); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) { res = symlook_obj(&req, &obj_rtld); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } else { /* Search the whole DAG rooted at the given object. */ res = symlook_list(&req, &obj->dagmembers, &donelist); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } if (def != NULL) { lock_release(rtld_bind_lock, &lockstate); /* * The value required by the caller is derived from the value * of the symbol. this is simply the relocated value of the * symbol. */ if (ELF_ST_TYPE(def->st_info) == STT_FUNC) sym = make_function_pointer(def, defobj); else if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) sym = rtld_resolve_ifunc(defobj, def); else if (ELF_ST_TYPE(def->st_info) == STT_TLS) { ti.ti_module = defobj->tlsindex; ti.ti_offset = def->st_value; sym = __tls_get_addr(&ti); } else sym = defobj->relocbase + def->st_value; LD_UTRACE(UTRACE_DLSYM_STOP, handle, sym, 0, 0, name); return (sym); } _rtld_error("Undefined symbol \"%s\"", name); lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } void * dlsym(void *handle, const char *name) { return do_dlsym(handle, name, __builtin_return_address(0), NULL, SYMLOOK_DLSYM); } dlfunc_t dlfunc(void *handle, const char *name) { union { void *d; dlfunc_t f; } rv; rv.d = do_dlsym(handle, name, __builtin_return_address(0), NULL, SYMLOOK_DLSYM); return (rv.f); } void * dlvsym(void *handle, const char *name, const char *version) { Ver_Entry ventry; ventry.name = version; ventry.file = NULL; ventry.hash = elf_hash(version); ventry.flags= 0; return do_dlsym(handle, name, __builtin_return_address(0), &ventry, SYMLOOK_DLSYM); } int _rtld_addr_phdr(const void *addr, struct dl_phdr_info *phdr_info) { const Obj_Entry *obj; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); obj = obj_from_addr(addr); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return (0); } rtld_fill_dl_phdr_info(obj, phdr_info); lock_release(rtld_bind_lock, &lockstate); return (1); } int dladdr(const void *addr, Dl_info *info) { const Obj_Entry *obj; const Elf_Sym *def; void *symbol_addr; unsigned long symoffset; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); obj = obj_from_addr(addr); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return 0; } info->dli_fname = obj->path; info->dli_fbase = obj->mapbase; info->dli_saddr = (void *)0; info->dli_sname = NULL; /* * Walk the symbol list looking for the symbol whose address is * closest to the address sent in. */ for (symoffset = 0; symoffset < obj->dynsymcount; symoffset++) { def = obj->symtab + symoffset; /* * For skip the symbol if st_shndx is either SHN_UNDEF or * SHN_COMMON. */ if (def->st_shndx == SHN_UNDEF || def->st_shndx == SHN_COMMON) continue; /* * If the symbol is greater than the specified address, or if it * is further away from addr than the current nearest symbol, * then reject it. */ symbol_addr = obj->relocbase + def->st_value; if (symbol_addr > addr || symbol_addr < info->dli_saddr) continue; /* Update our idea of the nearest symbol. */ info->dli_sname = obj->strtab + def->st_name; info->dli_saddr = symbol_addr; /* Exact match? */ if (info->dli_saddr == addr) break; } lock_release(rtld_bind_lock, &lockstate); return 1; } int dlinfo(void *handle, int request, void *p) { const Obj_Entry *obj; RtldLockState lockstate; int error; rlock_acquire(rtld_bind_lock, &lockstate); if (handle == NULL || handle == RTLD_SELF) { void *retaddr; retaddr = __builtin_return_address(0); /* __GNUC__ only */ if ((obj = obj_from_addr(retaddr)) == NULL) _rtld_error("Cannot determine caller's shared object"); } else obj = dlcheck(handle); if (obj == NULL) { lock_release(rtld_bind_lock, &lockstate); return (-1); } error = 0; switch (request) { case RTLD_DI_LINKMAP: *((struct link_map const **)p) = &obj->linkmap; break; case RTLD_DI_ORIGIN: error = rtld_dirname(obj->path, p); break; case RTLD_DI_SERINFOSIZE: case RTLD_DI_SERINFO: error = do_search_info(obj, request, (struct dl_serinfo *)p); break; default: _rtld_error("Invalid request %d passed to dlinfo()", request); error = -1; } lock_release(rtld_bind_lock, &lockstate); return (error); } static void rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info) { phdr_info->dlpi_addr = (Elf_Addr)obj->relocbase; phdr_info->dlpi_name = obj->path; phdr_info->dlpi_phdr = obj->phdr; phdr_info->dlpi_phnum = obj->phsize / sizeof(obj->phdr[0]); phdr_info->dlpi_tls_modid = obj->tlsindex; phdr_info->dlpi_tls_data = obj->tlsinit; phdr_info->dlpi_adds = obj_loads; phdr_info->dlpi_subs = obj_loads - obj_count; } int dl_iterate_phdr(__dl_iterate_hdr_callback callback, void *param) { struct dl_phdr_info phdr_info; const Obj_Entry *obj; RtldLockState bind_lockstate, phdr_lockstate; int error; wlock_acquire(rtld_phdr_lock, &phdr_lockstate); rlock_acquire(rtld_bind_lock, &bind_lockstate); error = 0; for (obj = obj_list; obj != NULL; obj = obj->next) { rtld_fill_dl_phdr_info(obj, &phdr_info); if ((error = callback(&phdr_info, sizeof phdr_info, param)) != 0) break; } if (error == 0) { rtld_fill_dl_phdr_info(&obj_rtld, &phdr_info); error = callback(&phdr_info, sizeof(phdr_info), param); } lock_release(rtld_bind_lock, &bind_lockstate); lock_release(rtld_phdr_lock, &phdr_lockstate); return (error); } static void * fill_search_info(const char *dir, size_t dirlen, void *param) { struct fill_search_info_args *arg; arg = param; if (arg->request == RTLD_DI_SERINFOSIZE) { arg->serinfo->dls_cnt ++; arg->serinfo->dls_size += sizeof(struct dl_serpath) + dirlen + 1; } else { struct dl_serpath *s_entry; s_entry = arg->serpath; s_entry->dls_name = arg->strspace; s_entry->dls_flags = arg->flags; strncpy(arg->strspace, dir, dirlen); arg->strspace[dirlen] = '\0'; arg->strspace += dirlen + 1; arg->serpath++; } return (NULL); } static int do_search_info(const Obj_Entry *obj, int request, struct dl_serinfo *info) { struct dl_serinfo _info; struct fill_search_info_args args; args.request = RTLD_DI_SERINFOSIZE; args.serinfo = &_info; _info.dls_size = __offsetof(struct dl_serinfo, dls_serpath); _info.dls_cnt = 0; path_enumerate(obj->rpath, fill_search_info, &args); path_enumerate(ld_library_path, fill_search_info, &args); path_enumerate(obj->runpath, fill_search_info, &args); path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args); if (!obj->z_nodeflib) path_enumerate(ld_standard_library_path, fill_search_info, &args); if (request == RTLD_DI_SERINFOSIZE) { info->dls_size = _info.dls_size; info->dls_cnt = _info.dls_cnt; return (0); } if (info->dls_cnt != _info.dls_cnt || info->dls_size != _info.dls_size) { _rtld_error("Uninitialized Dl_serinfo struct passed to dlinfo()"); return (-1); } args.request = RTLD_DI_SERINFO; args.serinfo = info; args.serpath = &info->dls_serpath[0]; args.strspace = (char *)&info->dls_serpath[_info.dls_cnt]; args.flags = LA_SER_RUNPATH; if (path_enumerate(obj->rpath, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_LIBPATH; if (path_enumerate(ld_library_path, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_RUNPATH; if (path_enumerate(obj->runpath, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_CONFIG; if (path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_DEFAULT; if (!obj->z_nodeflib && path_enumerate(ld_standard_library_path, fill_search_info, &args) != NULL) return (-1); return (0); } static int rtld_dirname(const char *path, char *bname) { const char *endp; /* Empty or NULL string gets treated as "." */ if (path == NULL || *path == '\0') { bname[0] = '.'; bname[1] = '\0'; return (0); } /* Strip trailing slashes */ endp = path + strlen(path) - 1; while (endp > path && *endp == '/') endp--; /* Find the start of the dir */ while (endp > path && *endp != '/') endp--; /* Either the dir is "/" or there are no slashes */ if (endp == path) { bname[0] = *endp == '/' ? '/' : '.'; bname[1] = '\0'; return (0); } else { do { endp--; } while (endp > path && *endp == '/'); } if (endp - path + 2 > PATH_MAX) { _rtld_error("Filename is too long: %s", path); return(-1); } strncpy(bname, path, endp - path + 1); bname[endp - path + 1] = '\0'; return (0); } static int rtld_dirname_abs(const char *path, char *base) { char *last; if (realpath(path, base) == NULL) return (-1); dbg("%s -> %s", path, base); last = strrchr(base, '/'); if (last == NULL) return (-1); if (last != base) *last = '\0'; return (0); } static void linkmap_add(Obj_Entry *obj) { struct link_map *l = &obj->linkmap; struct link_map *prev; obj->linkmap.l_name = obj->path; obj->linkmap.l_addr = obj->mapbase; obj->linkmap.l_ld = obj->dynamic; #ifdef __mips__ /* GDB needs load offset on MIPS to use the symbols */ obj->linkmap.l_offs = obj->relocbase; #endif if (r_debug.r_map == NULL) { r_debug.r_map = l; return; } /* * Scan to the end of the list, but not past the entry for the * dynamic linker, which we want to keep at the very end. */ for (prev = r_debug.r_map; prev->l_next != NULL && prev->l_next != &obj_rtld.linkmap; prev = prev->l_next) ; /* Link in the new entry. */ l->l_prev = prev; l->l_next = prev->l_next; if (l->l_next != NULL) l->l_next->l_prev = l; prev->l_next = l; } static void linkmap_delete(Obj_Entry *obj) { struct link_map *l = &obj->linkmap; if (l->l_prev == NULL) { if ((r_debug.r_map = l->l_next) != NULL) l->l_next->l_prev = NULL; return; } if ((l->l_prev->l_next = l->l_next) != NULL) l->l_next->l_prev = l->l_prev; } /* * Function for the debugger to set a breakpoint on to gain control. * * The two parameters allow the debugger to easily find and determine * what the runtime loader is doing and to whom it is doing it. * * When the loadhook trap is hit (r_debug_state, set at program * initialization), the arguments can be found on the stack: * * +8 struct link_map *m * +4 struct r_debug *rd * +0 RetAddr */ void r_debug_state(struct r_debug* rd, struct link_map *m) { /* * The following is a hack to force the compiler to emit calls to * this function, even when optimizing. If the function is empty, * the compiler is not obliged to emit any code for calls to it, * even when marked __noinline. However, gdb depends on those * calls being made. */ __compiler_membar(); } /* * A function called after init routines have completed. This can be used to * break before a program's entry routine is called, and can be used when * main is not available in the symbol table. */ void _r_debug_postinit(struct link_map *m) { /* See r_debug_state(). */ __compiler_membar(); } /* * Get address of the pointer variable in the main program. * Prefer non-weak symbol over the weak one. */ static const void ** get_program_var_addr(const char *name, RtldLockState *lockstate) { SymLook req; DoneList donelist; symlook_init(&req, name); req.lockstate = lockstate; donelist_init(&donelist); if (symlook_global(&req, &donelist) != 0) return (NULL); if (ELF_ST_TYPE(req.sym_out->st_info) == STT_FUNC) return ((const void **)make_function_pointer(req.sym_out, req.defobj_out)); else if (ELF_ST_TYPE(req.sym_out->st_info) == STT_GNU_IFUNC) return ((const void **)rtld_resolve_ifunc(req.defobj_out, req.sym_out)); else return ((const void **)(req.defobj_out->relocbase + req.sym_out->st_value)); } /* * Set a pointer variable in the main program to the given value. This * is used to set key variables such as "environ" before any of the * init functions are called. */ static void set_program_var(const char *name, const void *value) { const void **addr; if ((addr = get_program_var_addr(name, NULL)) != NULL) { dbg("\"%s\": *%p <-- %p", name, addr, value); *addr = value; } } /* * Search the global objects, including dependencies and main object, * for the given symbol. */ static int symlook_global(SymLook *req, DoneList *donelist) { SymLook req1; const Objlist_Entry *elm; int res; symlook_init_from_req(&req1, req); /* Search all objects loaded at program start up. */ if (req->defobj_out == NULL || ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) { res = symlook_list(&req1, &list_main, donelist); if (res == 0 && (req->defobj_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } /* Search all DAGs whose roots are RTLD_GLOBAL objects. */ STAILQ_FOREACH(elm, &list_global, link) { if (req->defobj_out != NULL && ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK) break; res = symlook_list(&req1, &elm->obj->dagmembers, donelist); if (res == 0 && (req->defobj_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } return (req->sym_out != NULL ? 0 : ESRCH); } /* * Given a symbol name in a referencing object, find the corresponding * definition of the symbol. Returns a pointer to the symbol, or NULL if * no definition was found. Returns a pointer to the Obj_Entry of the * defining object via the reference parameter DEFOBJ_OUT. */ static int symlook_default(SymLook *req, const Obj_Entry *refobj) { DoneList donelist; const Objlist_Entry *elm; SymLook req1; int res; donelist_init(&donelist); symlook_init_from_req(&req1, req); /* Look first in the referencing object if linked symbolically. */ if (refobj->symbolic && !donelist_check(&donelist, refobj)) { res = symlook_obj(&req1, refobj); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } symlook_global(req, &donelist); /* Search all dlopened DAGs containing the referencing object. */ STAILQ_FOREACH(elm, &refobj->dldags, link) { if (req->sym_out != NULL && ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK) break; res = symlook_list(&req1, &elm->obj->dagmembers, &donelist); if (res == 0 && (req->sym_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (req->sym_out == NULL || ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) { res = symlook_obj(&req1, &obj_rtld); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } return (req->sym_out != NULL ? 0 : ESRCH); } static int symlook_list(SymLook *req, const Objlist *objlist, DoneList *dlp) { const Elf_Sym *def; const Obj_Entry *defobj; const Objlist_Entry *elm; SymLook req1; int res; def = NULL; defobj = NULL; STAILQ_FOREACH(elm, objlist, link) { if (donelist_check(dlp, elm->obj)) continue; symlook_init_from_req(&req1, req); if ((res = symlook_obj(&req1, elm->obj)) == 0) { if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) { def = req1.sym_out; defobj = req1.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } } if (def != NULL) { req->sym_out = def; req->defobj_out = defobj; return (0); } return (ESRCH); } /* * Search the chain of DAGS cointed to by the given Needed_Entry * for a symbol of the given name. Each DAG is scanned completely * before advancing to the next one. Returns a pointer to the symbol, * or NULL if no definition was found. */ static int symlook_needed(SymLook *req, const Needed_Entry *needed, DoneList *dlp) { const Elf_Sym *def; const Needed_Entry *n; const Obj_Entry *defobj; SymLook req1; int res; def = NULL; defobj = NULL; symlook_init_from_req(&req1, req); for (n = needed; n != NULL; n = n->next) { if (n->obj == NULL || (res = symlook_list(&req1, &n->obj->dagmembers, dlp)) != 0) continue; if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) { def = req1.sym_out; defobj = req1.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } if (def != NULL) { req->sym_out = def; req->defobj_out = defobj; return (0); } return (ESRCH); } /* * Search the symbol table of a single shared object for a symbol of * the given name and version, if requested. Returns a pointer to the * symbol, or NULL if no definition was found. If the object is * filter, return filtered symbol from filtee. * * The symbol's hash value is passed in for efficiency reasons; that * eliminates many recomputations of the hash value. */ int symlook_obj(SymLook *req, const Obj_Entry *obj) { DoneList donelist; SymLook req1; int flags, res, mres; /* * If there is at least one valid hash at this point, we prefer to * use the faster GNU version if available. */ if (obj->valid_hash_gnu) mres = symlook_obj1_gnu(req, obj); else if (obj->valid_hash_sysv) mres = symlook_obj1_sysv(req, obj); else return (EINVAL); if (mres == 0) { if (obj->needed_filtees != NULL) { flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0; load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate); donelist_init(&donelist); symlook_init_from_req(&req1, req); res = symlook_needed(&req1, obj->needed_filtees, &donelist); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; } return (res); } if (obj->needed_aux_filtees != NULL) { flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0; load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate); donelist_init(&donelist); symlook_init_from_req(&req1, req); res = symlook_needed(&req1, obj->needed_aux_filtees, &donelist); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; return (res); } } } return (mres); } /* Symbol match routine common to both hash functions */ static bool matched_symbol(SymLook *req, const Obj_Entry *obj, Sym_Match_Result *result, const unsigned long symnum) { Elf_Versym verndx; const Elf_Sym *symp; const char *strp; symp = obj->symtab + symnum; strp = obj->strtab + symp->st_name; switch (ELF_ST_TYPE(symp->st_info)) { case STT_FUNC: case STT_NOTYPE: case STT_OBJECT: case STT_COMMON: case STT_GNU_IFUNC: if (symp->st_value == 0) return (false); /* fallthrough */ case STT_TLS: if (symp->st_shndx != SHN_UNDEF) break; #ifndef __mips__ else if (((req->flags & SYMLOOK_IN_PLT) == 0) && (ELF_ST_TYPE(symp->st_info) == STT_FUNC)) break; /* fallthrough */ #endif default: return (false); } if (req->name[0] != strp[0] || strcmp(req->name, strp) != 0) return (false); if (req->ventry == NULL) { if (obj->versyms != NULL) { verndx = VER_NDX(obj->versyms[symnum]); if (verndx > obj->vernum) { _rtld_error( "%s: symbol %s references wrong version %d", obj->path, obj->strtab + symnum, verndx); return (false); } /* * If we are not called from dlsym (i.e. this * is a normal relocation from unversioned * binary), accept the symbol immediately if * it happens to have first version after this * shared object became versioned. Otherwise, * if symbol is versioned and not hidden, * remember it. If it is the only symbol with * this name exported by the shared object, it * will be returned as a match by the calling * function. If symbol is global (verndx < 2) * accept it unconditionally. */ if ((req->flags & SYMLOOK_DLSYM) == 0 && verndx == VER_NDX_GIVEN) { result->sym_out = symp; return (true); } else if (verndx >= VER_NDX_GIVEN) { if ((obj->versyms[symnum] & VER_NDX_HIDDEN) == 0) { if (result->vsymp == NULL) result->vsymp = symp; result->vcount++; } return (false); } } result->sym_out = symp; return (true); } if (obj->versyms == NULL) { if (object_match_name(obj, req->ventry->name)) { _rtld_error("%s: object %s should provide version %s " "for symbol %s", obj_rtld.path, obj->path, req->ventry->name, obj->strtab + symnum); return (false); } } else { verndx = VER_NDX(obj->versyms[symnum]); if (verndx > obj->vernum) { _rtld_error("%s: symbol %s references wrong version %d", obj->path, obj->strtab + symnum, verndx); return (false); } if (obj->vertab[verndx].hash != req->ventry->hash || strcmp(obj->vertab[verndx].name, req->ventry->name)) { /* * Version does not match. Look if this is a * global symbol and if it is not hidden. If * global symbol (verndx < 2) is available, * use it. Do not return symbol if we are * called by dlvsym, because dlvsym looks for * a specific version and default one is not * what dlvsym wants. */ if ((req->flags & SYMLOOK_DLSYM) || (verndx >= VER_NDX_GIVEN) || (obj->versyms[symnum] & VER_NDX_HIDDEN)) return (false); } } result->sym_out = symp; return (true); } /* * Search for symbol using SysV hash function. * obj->buckets is known not to be NULL at this point; the test for this was * performed with the obj->valid_hash_sysv assignment. */ static int symlook_obj1_sysv(SymLook *req, const Obj_Entry *obj) { unsigned long symnum; Sym_Match_Result matchres; matchres.sym_out = NULL; matchres.vsymp = NULL; matchres.vcount = 0; for (symnum = obj->buckets[req->hash % obj->nbuckets]; symnum != STN_UNDEF; symnum = obj->chains[symnum]) { if (symnum >= obj->nchains) return (ESRCH); /* Bad object */ if (matched_symbol(req, obj, &matchres, symnum)) { req->sym_out = matchres.sym_out; req->defobj_out = obj; return (0); } } if (matchres.vcount == 1) { req->sym_out = matchres.vsymp; req->defobj_out = obj; return (0); } return (ESRCH); } /* Search for symbol using GNU hash function */ static int symlook_obj1_gnu(SymLook *req, const Obj_Entry *obj) { Elf_Addr bloom_word; const Elf32_Word *hashval; Elf32_Word bucket; Sym_Match_Result matchres; unsigned int h1, h2; unsigned long symnum; matchres.sym_out = NULL; matchres.vsymp = NULL; matchres.vcount = 0; /* Pick right bitmask word from Bloom filter array */ bloom_word = obj->bloom_gnu[(req->hash_gnu / __ELF_WORD_SIZE) & obj->maskwords_bm_gnu]; /* Calculate modulus word size of gnu hash and its derivative */ h1 = req->hash_gnu & (__ELF_WORD_SIZE - 1); h2 = ((req->hash_gnu >> obj->shift2_gnu) & (__ELF_WORD_SIZE - 1)); /* Filter out the "definitely not in set" queries */ if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0) return (ESRCH); /* Locate hash chain and corresponding value element*/ bucket = obj->buckets_gnu[req->hash_gnu % obj->nbuckets_gnu]; if (bucket == 0) return (ESRCH); hashval = &obj->chain_zero_gnu[bucket]; do { if (((*hashval ^ req->hash_gnu) >> 1) == 0) { symnum = hashval - obj->chain_zero_gnu; if (matched_symbol(req, obj, &matchres, symnum)) { req->sym_out = matchres.sym_out; req->defobj_out = obj; return (0); } } } while ((*hashval++ & 1) == 0); if (matchres.vcount == 1) { req->sym_out = matchres.vsymp; req->defobj_out = obj; return (0); } return (ESRCH); } static void trace_loaded_objects(Obj_Entry *obj) { char *fmt1, *fmt2, *fmt, *main_local, *list_containers; int c; if ((main_local = getenv(_LD("TRACE_LOADED_OBJECTS_PROGNAME"))) == NULL) main_local = ""; if ((fmt1 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT1"))) == NULL) fmt1 = "\t%o => %p (%x)\n"; if ((fmt2 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT2"))) == NULL) fmt2 = "\t%o (%x)\n"; list_containers = getenv(_LD("TRACE_LOADED_OBJECTS_ALL")); for (; obj; obj = obj->next) { Needed_Entry *needed; char *name, *path; bool is_lib; if (list_containers && obj->needed != NULL) rtld_printf("%s:\n", obj->path); for (needed = obj->needed; needed; needed = needed->next) { if (needed->obj != NULL) { if (needed->obj->traced && !list_containers) continue; needed->obj->traced = true; path = needed->obj->path; } else path = "not found"; name = (char *)obj->strtab + needed->name; is_lib = strncmp(name, "lib", 3) == 0; /* XXX - bogus */ fmt = is_lib ? fmt1 : fmt2; while ((c = *fmt++) != '\0') { switch (c) { default: rtld_putchar(c); continue; case '\\': switch (c = *fmt) { case '\0': continue; case 'n': rtld_putchar('\n'); break; case 't': rtld_putchar('\t'); break; } break; case '%': switch (c = *fmt) { case '\0': continue; case '%': default: rtld_putchar(c); break; case 'A': rtld_putstr(main_local); break; case 'a': rtld_putstr(obj_main->path); break; case 'o': rtld_putstr(name); break; #if 0 case 'm': rtld_printf("%d", sodp->sod_major); break; case 'n': rtld_printf("%d", sodp->sod_minor); break; #endif case 'p': rtld_putstr(path); break; case 'x': rtld_printf("%p", needed->obj ? needed->obj->mapbase : 0); break; } break; } ++fmt; } } } } /* * Unload a dlopened object and its dependencies from memory and from * our data structures. It is assumed that the DAG rooted in the * object has already been unreferenced, and that the object has a * reference count of 0. */ static void unload_object(Obj_Entry *root) { Obj_Entry *obj; Obj_Entry **linkp; assert(root->refcount == 0); /* * Pass over the DAG removing unreferenced objects from * appropriate lists. */ unlink_object(root); /* Unmap all objects that are no longer referenced. */ linkp = &obj_list->next; while ((obj = *linkp) != NULL) { if (obj->refcount == 0) { LD_UTRACE(UTRACE_UNLOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0, obj->path); dbg("unloading \"%s\"", obj->path); unload_filtees(root); munmap(obj->mapbase, obj->mapsize); linkmap_delete(obj); *linkp = obj->next; obj_count--; obj_free(obj); } else linkp = &obj->next; } obj_tail = linkp; } static void unlink_object(Obj_Entry *root) { Objlist_Entry *elm; if (root->refcount == 0) { /* Remove the object from the RTLD_GLOBAL list. */ objlist_remove(&list_global, root); /* Remove the object from all objects' DAG lists. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { objlist_remove(&elm->obj->dldags, root); if (elm->obj != root) unlink_object(elm->obj); } } } static void ref_dag(Obj_Entry *root) { Objlist_Entry *elm; assert(root->dag_inited); STAILQ_FOREACH(elm, &root->dagmembers, link) elm->obj->refcount++; } static void unref_dag(Obj_Entry *root) { Objlist_Entry *elm; assert(root->dag_inited); STAILQ_FOREACH(elm, &root->dagmembers, link) elm->obj->refcount--; } /* * Common code for MD __tls_get_addr(). */ static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline; static void * tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset) { Elf_Addr *newdtv, *dtv; RtldLockState lockstate; int to_copy; dtv = *dtvp; /* Check dtv generation in case new modules have arrived */ if (dtv[0] != tls_dtv_generation) { wlock_acquire(rtld_bind_lock, &lockstate); newdtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); to_copy = dtv[1]; if (to_copy > tls_max_index) to_copy = tls_max_index; memcpy(&newdtv[2], &dtv[2], to_copy * sizeof(Elf_Addr)); newdtv[0] = tls_dtv_generation; newdtv[1] = tls_max_index; free(dtv); lock_release(rtld_bind_lock, &lockstate); dtv = *dtvp = newdtv; } /* Dynamically allocate module TLS if necessary */ if (dtv[index + 1] == 0) { /* Signal safe, wlock will block out signals. */ wlock_acquire(rtld_bind_lock, &lockstate); if (!dtv[index + 1]) dtv[index + 1] = (Elf_Addr)allocate_module_tls(index); lock_release(rtld_bind_lock, &lockstate); } return ((void *)(dtv[index + 1] + offset)); } void * tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset) { Elf_Addr *dtv; dtv = *dtvp; /* Check dtv generation in case new modules have arrived */ if (__predict_true(dtv[0] == tls_dtv_generation && dtv[index + 1] != 0)) return ((void *)(dtv[index + 1] + offset)); return (tls_get_addr_slow(dtvp, index, offset)); } #if defined(__aarch64__) || defined(__arm__) || defined(__mips__) || \ defined(__powerpc__) || defined(__riscv__) /* * Allocate Static TLS using the Variant I method. */ void * allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) { Obj_Entry *obj; char *tcb; Elf_Addr **tls; Elf_Addr *dtv; Elf_Addr addr; int i; if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) return (oldtcb); assert(tcbsize >= TLS_TCB_SIZE); tcb = xcalloc(1, tls_static_space - TLS_TCB_SIZE + tcbsize); tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE); if (oldtcb != NULL) { memcpy(tls, oldtcb, tls_static_space); free(oldtcb); /* Adjust the DTV. */ dtv = tls[0]; for (i = 0; i < dtv[1]; i++) { if (dtv[i+2] >= (Elf_Addr)oldtcb && dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) { dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tls; } } } else { dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); tls[0] = dtv; dtv[0] = tls_dtv_generation; dtv[1] = tls_max_index; for (obj = objs; obj; obj = obj->next) { if (obj->tlsoffset > 0) { addr = (Elf_Addr)tls + obj->tlsoffset; if (obj->tlsinitsize > 0) memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); if (obj->tlssize > obj->tlsinitsize) memset((void*) (addr + obj->tlsinitsize), 0, obj->tlssize - obj->tlsinitsize); dtv[obj->tlsindex + 1] = addr; } } } return (tcb); } void free_tls(void *tcb, size_t tcbsize, size_t tcbalign) { Elf_Addr *dtv; Elf_Addr tlsstart, tlsend; int dtvsize, i; assert(tcbsize >= TLS_TCB_SIZE); tlsstart = (Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE; tlsend = tlsstart + tls_static_space; dtv = *(Elf_Addr **)tlsstart; dtvsize = dtv[1]; for (i = 0; i < dtvsize; i++) { if (dtv[i+2] && (dtv[i+2] < tlsstart || dtv[i+2] >= tlsend)) { free((void*)dtv[i+2]); } } free(dtv); free(tcb); } #endif #if defined(__i386__) || defined(__amd64__) || defined(__sparc64__) /* * Allocate Static TLS using the Variant II method. */ void * allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign) { Obj_Entry *obj; size_t size, ralign; char *tls; Elf_Addr *dtv, *olddtv; Elf_Addr segbase, oldsegbase, addr; int i; ralign = tcbalign; if (tls_static_max_align > ralign) ralign = tls_static_max_align; size = round(tls_static_space, ralign) + round(tcbsize, ralign); assert(tcbsize >= 2*sizeof(Elf_Addr)); tls = malloc_aligned(size, ralign); dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); segbase = (Elf_Addr)(tls + round(tls_static_space, ralign)); ((Elf_Addr*)segbase)[0] = segbase; ((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv; dtv[0] = tls_dtv_generation; dtv[1] = tls_max_index; if (oldtls) { /* * Copy the static TLS block over whole. */ oldsegbase = (Elf_Addr) oldtls; memcpy((void *)(segbase - tls_static_space), (const void *)(oldsegbase - tls_static_space), tls_static_space); /* * If any dynamic TLS blocks have been created tls_get_addr(), * move them over. */ olddtv = ((Elf_Addr**)oldsegbase)[1]; for (i = 0; i < olddtv[1]; i++) { if (olddtv[i+2] < oldsegbase - size || olddtv[i+2] > oldsegbase) { dtv[i+2] = olddtv[i+2]; olddtv[i+2] = 0; } } /* * We assume that this block was the one we created with * allocate_initial_tls(). */ free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr)); } else { for (obj = objs; obj; obj = obj->next) { if (obj->tlsoffset) { addr = segbase - obj->tlsoffset; memset((void*) (addr + obj->tlsinitsize), 0, obj->tlssize - obj->tlsinitsize); if (obj->tlsinit) memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); dtv[obj->tlsindex + 1] = addr; } } } return (void*) segbase; } void free_tls(void *tls, size_t tcbsize, size_t tcbalign) { Elf_Addr* dtv; size_t size, ralign; int dtvsize, i; Elf_Addr tlsstart, tlsend; /* * Figure out the size of the initial TLS block so that we can * find stuff which ___tls_get_addr() allocated dynamically. */ ralign = tcbalign; if (tls_static_max_align > ralign) ralign = tls_static_max_align; size = round(tls_static_space, ralign); dtv = ((Elf_Addr**)tls)[1]; dtvsize = dtv[1]; tlsend = (Elf_Addr) tls; tlsstart = tlsend - size; for (i = 0; i < dtvsize; i++) { if (dtv[i + 2] != 0 && (dtv[i + 2] < tlsstart || dtv[i + 2] > tlsend)) { free_aligned((void *)dtv[i + 2]); } } free_aligned((void *)tlsstart); free((void*) dtv); } #endif /* * Allocate TLS block for module with given index. */ void * allocate_module_tls(int index) { Obj_Entry* obj; char* p; for (obj = obj_list; obj; obj = obj->next) { if (obj->tlsindex == index) break; } if (!obj) { _rtld_error("Can't find module with TLS index %d", index); rtld_die(); } p = malloc_aligned(obj->tlssize, obj->tlsalign); memcpy(p, obj->tlsinit, obj->tlsinitsize); memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); return p; } bool allocate_tls_offset(Obj_Entry *obj) { size_t off; if (obj->tls_done) return true; if (obj->tlssize == 0) { obj->tls_done = true; return true; } if (tls_last_offset == 0) off = calculate_first_tls_offset(obj->tlssize, obj->tlsalign); else off = calculate_tls_offset(tls_last_offset, tls_last_size, obj->tlssize, obj->tlsalign); /* * If we have already fixed the size of the static TLS block, we * must stay within that size. When allocating the static TLS, we * leave a small amount of space spare to be used for dynamically * loading modules which use static TLS. */ if (tls_static_space != 0) { if (calculate_tls_end(off, obj->tlssize) > tls_static_space) return false; } else if (obj->tlsalign > tls_static_max_align) { tls_static_max_align = obj->tlsalign; } tls_last_offset = obj->tlsoffset = off; tls_last_size = obj->tlssize; obj->tls_done = true; return true; } void free_tls_offset(Obj_Entry *obj) { /* * If we were the last thing to allocate out of the static TLS * block, we give our space back to the 'allocator'. This is a * simplistic workaround to allow libGL.so.1 to be loaded and * unloaded multiple times. */ if (calculate_tls_end(obj->tlsoffset, obj->tlssize) == calculate_tls_end(tls_last_offset, tls_last_size)) { tls_last_offset -= obj->tlssize; tls_last_size = 0; } } void * _rtld_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign) { void *ret; RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); ret = allocate_tls(obj_list, oldtls, tcbsize, tcbalign); lock_release(rtld_bind_lock, &lockstate); return (ret); } void _rtld_free_tls(void *tcb, size_t tcbsize, size_t tcbalign) { RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); free_tls(tcb, tcbsize, tcbalign); lock_release(rtld_bind_lock, &lockstate); } static void object_add_name(Obj_Entry *obj, const char *name) { Name_Entry *entry; size_t len; len = strlen(name); entry = malloc(sizeof(Name_Entry) + len); if (entry != NULL) { strcpy(entry->name, name); STAILQ_INSERT_TAIL(&obj->names, entry, link); } } static int object_match_name(const Obj_Entry *obj, const char *name) { Name_Entry *entry; STAILQ_FOREACH(entry, &obj->names, link) { if (strcmp(name, entry->name) == 0) return (1); } return (0); } static Obj_Entry * locate_dependency(const Obj_Entry *obj, const char *name) { const Objlist_Entry *entry; const Needed_Entry *needed; STAILQ_FOREACH(entry, &list_main, link) { if (object_match_name(entry->obj, name)) return entry->obj; } for (needed = obj->needed; needed != NULL; needed = needed->next) { if (strcmp(obj->strtab + needed->name, name) == 0 || (needed->obj != NULL && object_match_name(needed->obj, name))) { /* * If there is DT_NEEDED for the name we are looking for, * we are all set. Note that object might not be found if * dependency was not loaded yet, so the function can * return NULL here. This is expected and handled * properly by the caller. */ return (needed->obj); } } _rtld_error("%s: Unexpected inconsistency: dependency %s not found", obj->path, name); rtld_die(); } static int check_object_provided_version(Obj_Entry *refobj, const Obj_Entry *depobj, const Elf_Vernaux *vna) { const Elf_Verdef *vd; const char *vername; vername = refobj->strtab + vna->vna_name; vd = depobj->verdef; if (vd == NULL) { _rtld_error("%s: version %s required by %s not defined", depobj->path, vername, refobj->path); return (-1); } for (;;) { if (vd->vd_version != VER_DEF_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verdef entry", depobj->path, vd->vd_version); return (-1); } if (vna->vna_hash == vd->vd_hash) { const Elf_Verdaux *aux = (const Elf_Verdaux *) ((char *)vd + vd->vd_aux); if (strcmp(vername, depobj->strtab + aux->vda_name) == 0) return (0); } if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } if (vna->vna_flags & VER_FLG_WEAK) return (0); _rtld_error("%s: version %s required by %s not found", depobj->path, vername, refobj->path); return (-1); } static int rtld_verify_object_versions(Obj_Entry *obj) { const Elf_Verneed *vn; const Elf_Verdef *vd; const Elf_Verdaux *vda; const Elf_Vernaux *vna; const Obj_Entry *depobj; int maxvernum, vernum; if (obj->ver_checked) return (0); obj->ver_checked = true; maxvernum = 0; /* * Walk over defined and required version records and figure out * max index used by any of them. Do very basic sanity checking * while there. */ vn = obj->verneed; while (vn != NULL) { if (vn->vn_version != VER_NEED_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verneed entry", obj->path, vn->vn_version); return (-1); } vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux); for (;;) { vernum = VER_NEED_IDX(vna->vna_other); if (vernum > maxvernum) maxvernum = vernum; if (vna->vna_next == 0) break; vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next); } if (vn->vn_next == 0) break; vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next); } vd = obj->verdef; while (vd != NULL) { if (vd->vd_version != VER_DEF_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verdef entry", obj->path, vd->vd_version); return (-1); } vernum = VER_DEF_IDX(vd->vd_ndx); if (vernum > maxvernum) maxvernum = vernum; if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } if (maxvernum == 0) return (0); /* * Store version information in array indexable by version index. * Verify that object version requirements are satisfied along the * way. */ obj->vernum = maxvernum + 1; obj->vertab = xcalloc(obj->vernum, sizeof(Ver_Entry)); vd = obj->verdef; while (vd != NULL) { if ((vd->vd_flags & VER_FLG_BASE) == 0) { vernum = VER_DEF_IDX(vd->vd_ndx); assert(vernum <= maxvernum); vda = (const Elf_Verdaux *)((char *)vd + vd->vd_aux); obj->vertab[vernum].hash = vd->vd_hash; obj->vertab[vernum].name = obj->strtab + vda->vda_name; obj->vertab[vernum].file = NULL; obj->vertab[vernum].flags = 0; } if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } vn = obj->verneed; while (vn != NULL) { depobj = locate_dependency(obj, obj->strtab + vn->vn_file); if (depobj == NULL) return (-1); vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux); for (;;) { if (check_object_provided_version(obj, depobj, vna)) return (-1); vernum = VER_NEED_IDX(vna->vna_other); assert(vernum <= maxvernum); obj->vertab[vernum].hash = vna->vna_hash; obj->vertab[vernum].name = obj->strtab + vna->vna_name; obj->vertab[vernum].file = obj->strtab + vn->vn_file; obj->vertab[vernum].flags = (vna->vna_other & VER_NEED_HIDDEN) ? VER_INFO_HIDDEN : 0; if (vna->vna_next == 0) break; vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next); } if (vn->vn_next == 0) break; vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next); } return 0; } static int rtld_verify_versions(const Objlist *objlist) { Objlist_Entry *entry; int rc; rc = 0; STAILQ_FOREACH(entry, objlist, link) { /* * Skip dummy objects or objects that have their version requirements * already checked. */ if (entry->obj->strtab == NULL || entry->obj->vertab != NULL) continue; if (rtld_verify_object_versions(entry->obj) == -1) { rc = -1; if (ld_tracing == NULL) break; } } if (rc == 0 || ld_tracing != NULL) rc = rtld_verify_object_versions(&obj_rtld); return rc; } const Ver_Entry * fetch_ventry(const Obj_Entry *obj, unsigned long symnum) { Elf_Versym vernum; if (obj->vertab) { vernum = VER_NDX(obj->versyms[symnum]); if (vernum >= obj->vernum) { _rtld_error("%s: symbol %s has wrong verneed value %d", obj->path, obj->strtab + symnum, vernum); } else if (obj->vertab[vernum].hash != 0) { return &obj->vertab[vernum]; } } return NULL; } int _rtld_get_stack_prot(void) { return (stack_prot); } int _rtld_is_dlopened(void *arg) { Obj_Entry *obj; RtldLockState lockstate; int res; rlock_acquire(rtld_bind_lock, &lockstate); obj = dlcheck(arg); if (obj == NULL) obj = obj_from_addr(arg); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return (-1); } res = obj->dlopened ? 1 : 0; lock_release(rtld_bind_lock, &lockstate); return (res); } static void map_stacks_exec(RtldLockState *lockstate) { void (*thr_map_stacks_exec)(void); if ((max_stack_flags & PF_X) == 0 || (stack_prot & PROT_EXEC) != 0) return; thr_map_stacks_exec = (void (*)(void))(uintptr_t) get_program_var_addr("__pthread_map_stacks_exec", lockstate); if (thr_map_stacks_exec != NULL) { stack_prot |= PROT_EXEC; thr_map_stacks_exec(); } } void symlook_init(SymLook *dst, const char *name) { bzero(dst, sizeof(*dst)); dst->name = name; dst->hash = elf_hash(name); dst->hash_gnu = gnu_hash(name); } static void symlook_init_from_req(SymLook *dst, const SymLook *src) { dst->name = src->name; dst->hash = src->hash; dst->hash_gnu = src->hash_gnu; dst->ventry = src->ventry; dst->flags = src->flags; dst->defobj_out = NULL; dst->sym_out = NULL; dst->lockstate = src->lockstate; } /* * Parse a file descriptor number without pulling in more of libc (e.g. atoi). */ static int parse_libdir(const char *str) { static const int RADIX = 10; /* XXXJA: possibly support hex? */ const char *orig; int fd; char c; orig = str; fd = 0; for (c = *str; c != '\0'; c = *++str) { if (c < '0' || c > '9') return (-1); fd *= RADIX; fd += c - '0'; } /* Make sure we actually parsed something. */ if (str == orig) { _rtld_error("failed to parse directory FD from '%s'", str); return (-1); } return (fd); } /* * Overrides for libc_pic-provided functions. */ int __getosreldate(void) { size_t len; int oid[2]; int error, osrel; if (osreldate != 0) return (osreldate); oid[0] = CTL_KERN; oid[1] = KERN_OSRELDATE; osrel = 0; len = sizeof(osrel); error = sysctl(oid, 2, &osrel, &len, NULL, 0); if (error == 0 && osrel > 0 && len == sizeof(osrel)) osreldate = osrel; return (osreldate); } void exit(int status) { _exit(status); } void (*__cleanup)(void); int __isthreaded = 0; int _thread_autoinit_dummy_decl = 1; /* * No unresolved symbols for rtld. */ void __pthread_cxa_finalize(struct dl_phdr_info *a) { } void __stack_chk_fail(void) { _rtld_error("stack overflow detected; terminated"); rtld_die(); } __weak_reference(__stack_chk_fail, __stack_chk_fail_local); void __chk_fail(void) { _rtld_error("buffer overflow detected; terminated"); rtld_die(); } const char * rtld_strerror(int errnum) { if (errnum < 0 || errnum >= sys_nerr) return ("Unknown error"); return (sys_errlist[errnum]); } Index: projects/release-pkg/libexec/rtld-elf/rtld.h =================================================================== --- projects/release-pkg/libexec/rtld-elf/rtld.h (revision 293224) +++ projects/release-pkg/libexec/rtld-elf/rtld.h (revision 293225) @@ -1,396 +1,397 @@ /*- * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_H /* { */ #define RTLD_H 1 #include #include #include #include #include #include #include #include #include "rtld_lock.h" #include "rtld_machdep.h" #define NEW(type) ((type *) xmalloc(sizeof(type))) #define CNEW(type) ((type *) xcalloc(1, sizeof(type))) /* We might as well do booleans like C++. */ typedef unsigned char bool; #define false 0 #define true 1 extern size_t tls_last_offset; extern size_t tls_last_size; extern size_t tls_static_space; extern int tls_dtv_generation; extern int tls_max_index; extern int npagesizes; extern size_t *pagesizes; extern int main_argc; extern char **main_argv; extern char **environ; struct stat; struct Struct_Obj_Entry; /* Lists of shared objects */ typedef struct Struct_Objlist_Entry { STAILQ_ENTRY(Struct_Objlist_Entry) link; struct Struct_Obj_Entry *obj; } Objlist_Entry; typedef STAILQ_HEAD(Struct_Objlist, Struct_Objlist_Entry) Objlist; /* Types of init and fini functions */ typedef void (*InitFunc)(void); typedef void (*InitArrFunc)(int, char **, char **); /* Lists of shared object dependencies */ typedef struct Struct_Needed_Entry { struct Struct_Needed_Entry *next; struct Struct_Obj_Entry *obj; unsigned long name; /* Offset of name in string table */ } Needed_Entry; typedef struct Struct_Name_Entry { STAILQ_ENTRY(Struct_Name_Entry) link; char name[1]; } Name_Entry; /* Lock object */ typedef struct Struct_LockInfo { void *context; /* Client context for creating locks */ void *thelock; /* The one big lock */ /* Debugging aids. */ volatile int rcount; /* Number of readers holding lock */ volatile int wcount; /* Number of writers holding lock */ /* Methods */ void *(*lock_create)(void *context); void (*rlock_acquire)(void *lock); void (*wlock_acquire)(void *lock); void (*rlock_release)(void *lock); void (*wlock_release)(void *lock); void (*lock_destroy)(void *lock); void (*context_destroy)(void *context); } LockInfo; typedef struct Struct_Ver_Entry { Elf_Word hash; unsigned int flags; const char *name; const char *file; } Ver_Entry; typedef struct Struct_Sym_Match_Result { const Elf_Sym *sym_out; const Elf_Sym *vsymp; int vcount; } Sym_Match_Result; #define VER_INFO_HIDDEN 0x01 /* * Shared object descriptor. * * Items marked with "(%)" are dynamically allocated, and must be freed * when the structure is destroyed. * * CAUTION: It appears that the JDK port peeks into these structures. * It looks at "next" and "mapbase" at least. Don't add new members * near the front, until this can be straightened out. */ typedef struct Struct_Obj_Entry { /* * These two items have to be set right for compatibility with the * original ElfKit crt1.o. */ Elf_Size magic; /* Magic number (sanity check) */ Elf_Size version; /* Version number of struct format */ struct Struct_Obj_Entry *next; char *path; /* Pathname of underlying file (%) */ char *origin_path; /* Directory path of origin file */ int refcount; int dl_refcount; /* Number of times loaded by dlopen */ /* These items are computed by map_object() or by digest_phdr(). */ caddr_t mapbase; /* Base address of mapped region */ size_t mapsize; /* Size of mapped region in bytes */ size_t textsize; /* Size of text segment in bytes */ Elf_Addr vaddrbase; /* Base address in shared object file */ caddr_t relocbase; /* Relocation constant = mapbase - vaddrbase */ const Elf_Dyn *dynamic; /* Dynamic section */ caddr_t entry; /* Entry point */ const Elf_Phdr *phdr; /* Program header if it is mapped, else NULL */ size_t phsize; /* Size of program header in bytes */ const char *interp; /* Pathname of the interpreter, if any */ Elf_Word stack_flags; /* TLS information */ int tlsindex; /* Index in DTV for this module */ void *tlsinit; /* Base address of TLS init block */ size_t tlsinitsize; /* Size of TLS init block for this module */ size_t tlssize; /* Size of TLS block for this module */ size_t tlsoffset; /* Offset of static TLS block for this module */ size_t tlsalign; /* Alignment of static TLS block */ caddr_t relro_page; size_t relro_size; /* Items from the dynamic section. */ Elf_Addr *pltgot; /* PLT or GOT, depending on architecture */ const Elf_Rel *rel; /* Relocation entries */ unsigned long relsize; /* Size in bytes of relocation info */ const Elf_Rela *rela; /* Relocation entries with addend */ unsigned long relasize; /* Size in bytes of addend relocation info */ const Elf_Rel *pltrel; /* PLT relocation entries */ unsigned long pltrelsize; /* Size in bytes of PLT relocation info */ const Elf_Rela *pltrela; /* PLT relocation entries with addend */ unsigned long pltrelasize; /* Size in bytes of PLT addend reloc info */ const Elf_Sym *symtab; /* Symbol table */ const char *strtab; /* String table */ unsigned long strsize; /* Size in bytes of string table */ #ifdef __mips__ Elf_Word local_gotno; /* Number of local GOT entries */ Elf_Word symtabno; /* Number of dynamic symbols */ Elf_Word gotsym; /* First dynamic symbol in GOT */ #endif #ifdef __powerpc64__ Elf_Addr glink; /* GLINK PLT call stub section */ #endif const Elf_Verneed *verneed; /* Required versions. */ Elf_Word verneednum; /* Number of entries in verneed table */ const Elf_Verdef *verdef; /* Provided versions. */ Elf_Word verdefnum; /* Number of entries in verdef table */ const Elf_Versym *versyms; /* Symbol versions table */ const Elf_Hashelt *buckets; /* Hash table buckets array */ unsigned long nbuckets; /* Number of buckets */ const Elf_Hashelt *chains; /* Hash table chain array */ unsigned long nchains; /* Number of entries in chain array */ Elf32_Word nbuckets_gnu; /* Number of GNU hash buckets*/ Elf32_Word symndx_gnu; /* 1st accessible symbol on dynsym table */ Elf32_Word maskwords_bm_gnu; /* Bloom filter words - 1 (bitmask) */ Elf32_Word shift2_gnu; /* Bloom filter shift count */ Elf32_Word dynsymcount; /* Total entries in dynsym table */ Elf_Addr *bloom_gnu; /* Bloom filter used by GNU hash func */ const Elf_Hashelt *buckets_gnu; /* GNU hash table bucket array */ const Elf_Hashelt *chain_zero_gnu; /* GNU hash table value array (Zeroed) */ char *rpath; /* Search path specified in object */ char *runpath; /* Search path with different priority */ Needed_Entry *needed; /* Shared objects needed by this one (%) */ Needed_Entry *needed_filtees; Needed_Entry *needed_aux_filtees; STAILQ_HEAD(, Struct_Name_Entry) names; /* List of names for this object we know about. */ Ver_Entry *vertab; /* Versions required /defined by this object */ int vernum; /* Number of entries in vertab */ Elf_Addr init; /* Initialization function to call */ Elf_Addr fini; /* Termination function to call */ Elf_Addr preinit_array; /* Pre-initialization array of functions */ Elf_Addr init_array; /* Initialization array of functions */ Elf_Addr fini_array; /* Termination array of functions */ int preinit_array_num; /* Number of entries in preinit_array */ int init_array_num; /* Number of entries in init_array */ int fini_array_num; /* Number of entries in fini_array */ int32_t osrel; /* OSREL note value */ bool mainprog : 1; /* True if this is the main program */ bool rtld : 1; /* True if this is the dynamic linker */ bool relocated : 1; /* True if processed by relocate_objects() */ bool ver_checked : 1; /* True if processed by rtld_verify_object_versions */ bool textrel : 1; /* True if there are relocations to text seg */ bool symbolic : 1; /* True if generated with "-Bsymbolic" */ bool bind_now : 1; /* True if all relocations should be made first */ bool traced : 1; /* Already printed in ldd trace output */ bool jmpslots_done : 1; /* Already have relocated the jump slots */ bool init_done : 1; /* Already have added object to init list */ bool tls_done : 1; /* Already allocated offset for static TLS */ bool phdr_alloc : 1; /* Phdr is allocated and needs to be freed. */ bool z_origin : 1; /* Process rpath and soname tokens */ bool z_nodelete : 1; /* Do not unload the object and dependencies */ bool z_noopen : 1; /* Do not load on dlopen */ bool z_loadfltr : 1; /* Immediately load filtees */ bool z_interpose : 1; /* Interpose all objects but main */ bool z_nodeflib : 1; /* Don't search default library path */ bool z_global : 1; /* Make the object global */ bool ref_nodel : 1; /* Refcount increased to prevent dlclose */ bool init_scanned: 1; /* Object is already on init list. */ bool on_fini_list: 1; /* Object is already on fini list. */ bool dag_inited : 1; /* Object has its DAG initialized. */ bool filtees_loaded : 1; /* Filtees loaded */ bool irelative : 1; /* Object has R_MACHDEP_IRELATIVE relocs */ bool gnu_ifunc : 1; /* Object has references to STT_GNU_IFUNC */ bool non_plt_gnu_ifunc : 1; /* Object has non-plt IFUNC references */ bool crt_no_init : 1; /* Object' crt does not call _init/_fini */ bool valid_hash_sysv : 1; /* A valid System V hash hash tag is available */ bool valid_hash_gnu : 1; /* A valid GNU hash tag is available */ bool dlopened : 1; /* dlopen()-ed (vs. load statically) */ + bool writable_dynamic : 1; /* PT_DYNAMIC is writable */ struct link_map linkmap; /* For GDB and dlinfo() */ Objlist dldags; /* Object belongs to these dlopened DAGs (%) */ Objlist dagmembers; /* DAG has these members (%) */ dev_t dev; /* Object's filesystem's device */ ino_t ino; /* Object's inode number */ void *priv; /* Platform-dependent */ } Obj_Entry; #define RTLD_MAGIC 0xd550b87a #define RTLD_VERSION 1 #define RTLD_STATIC_TLS_EXTRA 128 /* Flags to be passed into symlook_ family of functions. */ #define SYMLOOK_IN_PLT 0x01 /* Lookup for PLT symbol */ #define SYMLOOK_DLSYM 0x02 /* Return newest versioned symbol. Used by dlsym. */ #define SYMLOOK_EARLY 0x04 /* Symlook is done during initialization. */ #define SYMLOOK_IFUNC 0x08 /* Allow IFUNC processing in reloc_non_plt(). */ /* Flags for load_object(). */ #define RTLD_LO_NOLOAD 0x01 /* dlopen() specified RTLD_NOLOAD. */ #define RTLD_LO_DLOPEN 0x02 /* Load_object() called from dlopen(). */ #define RTLD_LO_TRACE 0x04 /* Only tracing. */ #define RTLD_LO_NODELETE 0x08 /* Loaded object cannot be closed. */ #define RTLD_LO_FILTEES 0x10 /* Loading filtee. */ #define RTLD_LO_EARLY 0x20 /* Do not call ctors, postpone it to the initialization during the image start. */ /* * Symbol cache entry used during relocation to avoid multiple lookups * of the same symbol. */ typedef struct Struct_SymCache { const Elf_Sym *sym; /* Symbol table entry */ const Obj_Entry *obj; /* Shared object which defines it */ } SymCache; /* * This structure provides a reentrant way to keep a list of objects and * check which ones have already been processed in some way. */ typedef struct Struct_DoneList { const Obj_Entry **objs; /* Array of object pointers */ unsigned int num_alloc; /* Allocated size of the array */ unsigned int num_used; /* Number of array slots used */ } DoneList; struct Struct_RtldLockState { int lockstate; sigjmp_buf env; }; struct fill_search_info_args { int request; unsigned int flags; struct dl_serinfo *serinfo; struct dl_serpath *serpath; char *strspace; }; /* * The pack of arguments and results for the symbol lookup functions. */ typedef struct Struct_SymLook { const char *name; unsigned long hash; uint32_t hash_gnu; const Ver_Entry *ventry; int flags; const Obj_Entry *defobj_out; const Elf_Sym *sym_out; struct Struct_RtldLockState *lockstate; } SymLook; void _rtld_error(const char *, ...) __printflike(1, 2) __exported; void rtld_die(void) __dead2; const char *rtld_strerror(int); Obj_Entry *map_object(int, const char *, const struct stat *); void *xcalloc(size_t, size_t); void *xmalloc(size_t); char *xstrdup(const char *); void *malloc_aligned(size_t size, size_t align); void free_aligned(void *ptr); extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; extern Elf_Sym sym_zero; /* For resolving undefined weak refs. */ void dump_relocations(Obj_Entry *); void dump_obj_relocations(Obj_Entry *); void dump_Elf_Rel(Obj_Entry *, const Elf_Rel *, u_long); void dump_Elf_Rela(Obj_Entry *, const Elf_Rela *, u_long); /* * Function declarations. */ unsigned long elf_hash(const char *); const Elf_Sym *find_symdef(unsigned long, const Obj_Entry *, const Obj_Entry **, int, SymCache *, struct Struct_RtldLockState *); void init_pltgot(Obj_Entry *); void lockdflt_init(void); void digest_notes(Obj_Entry *, Elf_Addr, Elf_Addr); void obj_free(Obj_Entry *); Obj_Entry *obj_new(void); void _rtld_bind_start(void); void *rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def); void symlook_init(SymLook *, const char *); int symlook_obj(SymLook *, const Obj_Entry *); void *tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset); void *allocate_tls(Obj_Entry *, void *, size_t, size_t); void free_tls(void *, size_t, size_t); void *allocate_module_tls(int index); bool allocate_tls_offset(Obj_Entry *obj); void free_tls_offset(Obj_Entry *obj); const Ver_Entry *fetch_ventry(const Obj_Entry *obj, unsigned long); /* * MD function declarations. */ int do_copy_relocations(Obj_Entry *); int reloc_non_plt(Obj_Entry *, Obj_Entry *, int flags, struct Struct_RtldLockState *); int reloc_plt(Obj_Entry *); int reloc_jmpslots(Obj_Entry *, int flags, struct Struct_RtldLockState *); int reloc_iresolve(Obj_Entry *, struct Struct_RtldLockState *); int reloc_gnu_ifunc(Obj_Entry *, int flags, struct Struct_RtldLockState *); void allocate_initial_tls(Obj_Entry *); #endif /* } */ Index: projects/release-pkg/release/Makefile =================================================================== --- projects/release-pkg/release/Makefile (revision 293224) +++ projects/release-pkg/release/Makefile (revision 293225) @@ -1,306 +1,306 @@ # $FreeBSD$ # # Makefile for building releases and release media. # # User-driven targets: # cdrom: Builds release CD-ROM media (disc1.iso) # dvdrom: Builds release DVD-ROM media (dvd1.iso) # memstick: Builds memory stick image (memstick.img) # mini-memstick: Builds minimal memory stick image (mini-memstick.img) # ftp: Sets up FTP distribution area (ftp) # release: Invokes real-release, vm-release, and cloudware-release targets # real-release: Build all media and FTP distribution area # vm-release: Build all virtual machine image targets # cloudware-release: Build all cloud hosting provider targets # install: Invokes the release-install and vm-install targets # release-install: Copies all release installation media into ${DESTDIR} # vm-install: Copies all virtual machine images into ${DESTDIR} # # Variables affecting the build process: # WORLDDIR: location of src tree -- must have built world and default kernel # (by default, the directory above this one) # PORTSDIR: location of ports tree to distribute (default: /usr/ports) # DOCDIR: location of doc tree (default: /usr/doc) # XTRADIR: xtra-bits-dir argument for /mkisoimages.sh # NOPKG: if set, do not distribute third-party packages # NOPORTS: if set, do not distribute ports tree # NOSRC: if set, do not distribute source tree # NODOC: if set, do not generate release documentation # WITH_DVD: if set, generate dvd1.iso # WITH_COMPRESSED_IMAGES: if set, compress installation images with xz(1) # (uncompressed images are not removed) # WITH_VMIMAGES: if set, build virtual machine images with the release # WITH_COMPRESSED_VMIMAGES: if set, compress virtual machine disk images # with xz(1) (extremely time consuming) # WITH_CLOUDWARE: if set, build cloud hosting disk images with the release # TARGET/TARGET_ARCH: architecture of built release # WORLDDIR?= ${.CURDIR}/.. PORTSDIR?= /usr/ports DOCDIR?= /usr/doc RELNOTES_LANG?= en_US.ISO8859-1 .if !defined(TARGET) || empty(TARGET) TARGET= ${MACHINE} .endif .if !defined(TARGET_ARCH) || empty(TARGET_ARCH) .if ${TARGET} == ${MACHINE} TARGET_ARCH= ${MACHINE_ARCH} .else TARGET_ARCH= ${TARGET} .endif .endif IMAKE= ${MAKE} TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET} DISTDIR= dist # Define OSRELEASE by using newvars.sh .if !defined(OSRELEASE) || empty(OSRELEASE) .for _V in TYPE BRANCH REVISION ${_V}!= eval $$(awk '/^${_V}=/{print}' ${.CURDIR}/../sys/conf/newvers.sh); echo $$${_V} .endfor .for _V in ${TARGET_ARCH} .if !empty(TARGET:M${_V}) OSRELEASE= ${TYPE}-${REVISION}-${BRANCH}-${TARGET} VOLUME_LABEL= ${REVISION:C/[.-]/_/g}_${BRANCH:C/[.-]/_/g}_${TARGET} .else OSRELEASE= ${TYPE}-${REVISION}-${BRANCH}-${TARGET}-${TARGET_ARCH} VOLUME_LABEL= ${REVISION:C/[.-]/_/g}_${BRANCH:C/[.-]/_/g}_${TARGET_ARCH} .endif .endfor .endif .if !defined(VOLUME_LABEL) || empty(VOLUME_LABEL) VOLUME_LABEL= FreeBSD_Install .endif .if !exists(${DOCDIR}) NODOC= true .endif .if !exists(${PORTSDIR}) NOPORTS= true .endif EXTRA_PACKAGES= .if !defined(NOPORTS) EXTRA_PACKAGES+= ports.txz .endif .if !defined(NOSRC) EXTRA_PACKAGES+= src.txz .endif .if !defined(NODOC) EXTRA_PACKAGES+= reldoc .endif RELEASE_TARGETS= ftp IMAGES= .if exists(${.CURDIR}/${TARGET}/mkisoimages.sh) RELEASE_TARGETS+= cdrom IMAGES+= disc1.iso bootonly.iso . if defined(WITH_DVD) && !empty(WITH_DVD) RELEASE_TARGETS+= dvdrom IMAGES+= dvd1.iso . endif .endif .if exists(${.CURDIR}/${TARGET}/make-memstick.sh) RELEASE_TARGETS+= memstick.img RELEASE_TARGETS+= mini-memstick.img IMAGES+= memstick.img IMAGES+= mini-memstick.img .endif CLEANFILES= packagesystem *.txz MANIFEST release ${IMAGES} .if defined(WITH_COMPRESSED_IMAGES) && !empty(WITH_COMPRESSED_IMAGES) . for I in ${IMAGES} CLEANFILES+= ${I}.xz . endfor .endif .if defined(WITH_DVD) && !empty(WITH_DVD) CLEANFILES+= pkg-stage .endif CLEANDIRS= dist ftp disc1 bootonly dvd beforeclean: chflags -R noschg . .include clean: beforeclean base.txz: mkdir -p ${DISTDIR} cd ${WORLDDIR} && ${IMAKE} distributeworld DISTDIR=${.OBJDIR}/${DISTDIR} # Set up mergemaster root database sh ${.CURDIR}/scripts/mm-mtree.sh -m ${WORLDDIR} -F \ "TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}" -D "${.OBJDIR}/${DISTDIR}/base" etcupdate extract -B -M "TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}" \ -s ${WORLDDIR} -d "${.OBJDIR}/${DISTDIR}/base/var/db/etcupdate" # Package all components cd ${WORLDDIR} && ${IMAKE} packageworld DISTDIR=${.OBJDIR}/${DISTDIR} mv ${DISTDIR}/*.txz . kernel.txz: mkdir -p ${DISTDIR} cd ${WORLDDIR} && ${IMAKE} distributekernel packagekernel DISTDIR=${.OBJDIR}/${DISTDIR} mv ${DISTDIR}/kernel*.txz . src.txz: mkdir -p ${DISTDIR}/usr ln -fs ${WORLDDIR} ${DISTDIR}/usr/src cd ${DISTDIR} && tar cLvf - --exclude .svn --exclude .zfs \ --exclude .git --exclude @ --exclude usr/src/release/dist usr/src | \ ${XZ_CMD} > ${.OBJDIR}/src.txz ports.txz: mkdir -p ${DISTDIR}/usr ln -fs ${PORTSDIR} ${DISTDIR}/usr/ports cd ${DISTDIR} && tar cLvf - \ --exclude .git --exclude .svn \ --exclude usr/ports/distfiles --exclude usr/ports/packages \ --exclude 'usr/ports/INDEX*' --exclude work usr/ports | \ ${XZ_CMD} > ${.OBJDIR}/ports.txz reldoc: cd ${.CURDIR}/doc && ${MAKE} all install clean 'FORMATS=html txt' \ INSTALL_COMPRESSED='' URLS_ABSOLUTE=YES DOCDIR=${.OBJDIR}/rdoc mkdir -p reldoc .for i in hardware readme relnotes errata ln -f rdoc/${RELNOTES_LANG}/${i}/article.txt reldoc/${i:tu}.TXT ln -f rdoc/${RELNOTES_LANG}/${i}/article.html reldoc/${i:tu}.HTM .endfor cp rdoc/${RELNOTES_LANG}/readme/docbook.css reldoc disc1: packagesystem # Install system mkdir -p ${.TARGET} cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \ DESTDIR=${.OBJDIR}/${.TARGET} MK_RESCUE=no MK_KERNEL_SYMBOLS=no \ MK_PROFILE=no MK_SENDMAIL=no MK_TESTS=no MK_LIB32=no \ MK_DEBUG_FILES=no # Copy distfiles mkdir -p ${.TARGET}/usr/freebsd-dist - for dist in MANIFEST $$(ls *.txz | grep -v -- '-dbg'); \ + for dist in MANIFEST $$(ls *.txz | grep -vE -- '(base|lib32)-dbg'); \ do cp $${dist} ${.TARGET}/usr/freebsd-dist; \ done # Copy documentation, if generated .if !defined(NODOC) cp reldoc/* ${.TARGET} .endif # Set up installation environment ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf cp ${.CURDIR}/rc.local ${.TARGET}/etc touch ${.TARGET} bootonly: packagesystem # Install system mkdir -p ${.TARGET} cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \ DESTDIR=${.OBJDIR}/${.TARGET} MK_AMD=no MK_AT=no \ MK_GAMES=no MK_GROFF=no \ MK_INSTALLLIB=no MK_LIB32=no MK_MAIL=no \ MK_NCP=no MK_TOOLCHAIN=no MK_PROFILE=no \ MK_INSTALLIB=no MK_RESCUE=no MK_DICT=no \ MK_KERNEL_SYMBOLS=no MK_TESTS=no MK_DEBUG_FILES=no # Copy manifest only (no distfiles) to get checksums mkdir -p ${.TARGET}/usr/freebsd-dist cp MANIFEST ${.TARGET}/usr/freebsd-dist # Copy documentation, if generated .if !defined(NODOC) cp reldoc/* ${.TARGET} .endif # Set up installation environment ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf cp ${.CURDIR}/rc.local ${.TARGET}/etc dvd: packagesystem # Install system mkdir -p ${.TARGET} cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \ DESTDIR=${.OBJDIR}/${.TARGET} MK_RESCUE=no MK_KERNEL_SYMBOLS=no \ MK_TESTS=no MK_DEBUG_FILES=no # Copy distfiles mkdir -p ${.TARGET}/usr/freebsd-dist - for dist in MANIFEST $$(ls *.txz | grep -v -- '-dbg'); \ + for dist in MANIFEST $$(ls *.txz | grep -v -- '(base|lib32)-dbg'); \ do cp $${dist} ${.TARGET}/usr/freebsd-dist; \ done # Copy documentation, if generated .if !defined(NODOC) cp reldoc/* ${.TARGET} .endif # Set up installation environment ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf cp ${.CURDIR}/rc.local ${.TARGET}/etc touch ${.TARGET} release.iso: disc1.iso disc1.iso: disc1 sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_CD ${.TARGET} disc1 ${XTRADIR} dvd1.iso: dvd pkg-stage sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_DVD ${.TARGET} dvd ${XTRADIR} bootonly.iso: bootonly sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_BO ${.TARGET} bootonly ${XTRADIR} memstick: memstick.img memstick.img: disc1 sh ${.CURDIR}/${TARGET}/make-memstick.sh disc1 ${.TARGET} mini-memstick: mini-memstick.img mini-memstick.img: bootonly sh ${.CURDIR}/${TARGET}/make-memstick.sh bootonly ${.TARGET} packagesystem: base.txz kernel.txz ${EXTRA_PACKAGES} sh ${.CURDIR}/scripts/make-manifest.sh *.txz > MANIFEST touch ${.TARGET} pkg-stage: .if !defined(NOPKG) env REPOS_DIR=${.CURDIR}/pkg_repos/ \ sh ${.CURDIR}/scripts/pkg-stage.sh mkdir -p ${.OBJDIR}/dvd/packages/repos/ cp ${.CURDIR}/scripts/FreeBSD_install_cdrom.conf \ ${.OBJDIR}/dvd/packages/repos/ .endif touch ${.TARGET} cdrom: disc1.iso bootonly.iso dvdrom: dvd1.iso ftp: packagesystem rm -rf ftp mkdir -p ftp cp *.txz MANIFEST ftp release: real-release vm-release cloudware-release touch ${.OBJDIR}/${.TARGET} real-release: ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} obj ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} ${RELEASE_TARGETS} install: release-install vm-install cloudware-install release-install: .if defined(DESTDIR) && !empty(DESTDIR) mkdir -p ${DESTDIR} .endif cp -a ftp ${DESTDIR}/ .for I in ${IMAGES} cp -p ${I} ${DESTDIR}/${OSRELEASE}-${I} . if defined(WITH_COMPRESSED_IMAGES) && !empty(WITH_COMPRESSED_IMAGES) ${XZ_CMD} -k ${DESTDIR}/${OSRELEASE}-${I} . endif .endfor cd ${DESTDIR} && sha512 ${OSRELEASE}* > ${DESTDIR}/CHECKSUM.SHA512 cd ${DESTDIR} && sha256 ${OSRELEASE}* > ${DESTDIR}/CHECKSUM.SHA256 .include "${.CURDIR}/Makefile.vm" Index: projects/release-pkg/release/amd64/mkisoimages.sh =================================================================== --- projects/release-pkg/release/amd64/mkisoimages.sh (revision 293224) +++ projects/release-pkg/release/amd64/mkisoimages.sh (revision 293225) @@ -1,60 +1,60 @@ #!/bin/sh # # Module: mkisoimages.sh # Author: Jordan K Hubbard # Date: 22 June 2001 # # $FreeBSD$ # # This script is used by release/Makefile to build the (optional) ISO images # for a FreeBSD release. It is considered architecture dependent since each # platform has a slightly unique way of making bootable CDs. This script # is also allowed to generate any number of images since that is more of # publishing decision than anything else. # # Usage: # # mkisoimages.sh [-b] image-label image-name base-bits-dir [extra-bits-dir] # # Where -b is passed if the ISO image should be made "bootable" by # whatever standards this architecture supports (may be unsupported), # image-label is the ISO image label, image-name is the filename of the # resulting ISO image, base-bits-dir contains the image contents and # extra-bits-dir, if provided, contains additional files to be merged # into base-bits-dir as part of making the image. if [ "x$1" = "x-b" ]; then # This is highly x86-centric and will be used directly below. bootable="-o bootimage=i386;$4/boot/cdboot -o no-emul-boot" # Make EFI system partition (should be done with makefs in the future) dd if=/dev/zero of=efiboot.img bs=4k count=100 device=`mdconfig -a -t vnode -f efiboot.img` newfs_msdos -F 12 -m 0xf8 /dev/$device mkdir efi mount -t msdosfs /dev/$device efi mkdir -p efi/efi/boot cp "$4/boot/loader.efi" efi/efi/boot/bootx64.efi umount efi rmdir efi mdconfig -d -u $device bootable="-o bootimage=i386;efiboot.img -o no-emul-boot $bootable" shift else bootable="" fi if [ $# -lt 3 ]; then echo "Usage: $0 [-b] image-label image-name base-bits-dir [extra-bits-dir]" exit 1 fi LABEL=`echo "$1" | tr '[:lower:]' '[:upper:]'`; shift NAME="$1"; shift publisher="The FreeBSD Project. http://www.FreeBSD.org/" echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > "$1/etc/fstab" makefs -t cd9660 $bootable -o rockridge -o label="$LABEL" -o publisher="$publisher" "$NAME" "$@" -rm "$1/etc/fstab" +rm -f "$1/etc/fstab" rm -f efiboot.img Index: projects/release-pkg/release/i386/mkisoimages.sh =================================================================== --- projects/release-pkg/release/i386/mkisoimages.sh (revision 293224) +++ projects/release-pkg/release/i386/mkisoimages.sh (revision 293225) @@ -1,45 +1,45 @@ #!/bin/sh # # Module: mkisoimages.sh # Author: Jordan K Hubbard # Date: 22 June 2001 # # $FreeBSD$ # # This script is used by release/Makefile to build the (optional) ISO images # for a FreeBSD release. It is considered architecture dependent since each # platform has a slightly unique way of making bootable CDs. This script # is also allowed to generate any number of images since that is more of # publishing decision than anything else. # # Usage: # # mkisoimages.sh [-b] image-label image-name base-bits-dir [extra-bits-dir] # # Where -b is passed if the ISO image should be made "bootable" by # whatever standards this architecture supports (may be unsupported), # image-label is the ISO image label, image-name is the filename of the # resulting ISO image, base-bits-dir contains the image contents and # extra-bits-dir, if provided, contains additional files to be merged # into base-bits-dir as part of making the image. if [ "x$1" = "x-b" ]; then # This is highly x86-centric and will be used directly below. bootable="-o bootimage=i386;$4/boot/cdboot -o no-emul-boot" shift else bootable="" fi if [ $# -lt 3 ]; then echo "Usage: $0 [-b] image-label image-name base-bits-dir [extra-bits-dir]" exit 1 fi LABEL=`echo "$1" | tr '[:lower:]' '[:upper:]'`; shift NAME="$1"; shift publisher="The FreeBSD Project. http://www.FreeBSD.org/" echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > "$1/etc/fstab" makefs -t cd9660 $bootable -o rockridge -o label="$LABEL" -o publisher="$publisher" "$NAME" "$@" -rm "$1/etc/fstab" +rm -f "$1/etc/fstab" Index: projects/release-pkg/release/pc98/mkisoimages.sh =================================================================== --- projects/release-pkg/release/pc98/mkisoimages.sh (revision 293224) +++ projects/release-pkg/release/pc98/mkisoimages.sh (revision 293225) @@ -1,45 +1,45 @@ #!/bin/sh # # Module: mkisoimages.sh # Author: Jordan K Hubbard # Date: 22 June 2001 # # $FreeBSD$ # # This script is used by release/Makefile to build the (optional) ISO images # for a FreeBSD release. It is considered architecture dependent since each # platform has a slightly unique way of making bootable CDs. This script # is also allowed to generate any number of images since that is more of # publishing decision than anything else. # # Usage: # # mkisoimages.sh [-b] image-label image-name base-bits-dir [extra-bits-dir] # # Where -b is passed if the ISO image should be made "bootable" by # whatever standards this architecture supports (may be unsupported), # image-label is the ISO image label, image-name is the filename of the # resulting ISO image, base-bits-dir contains the image contents and # extra-bits-dir, if provided, contains additional files to be merged # into base-bits-dir as part of making the image. if [ "x$1" = "x-b" ]; then # This is highly x86-centric and will be used directly below. bootable="-o generic-bootimage=$4/boot/cdboot" shift else bootable="" fi if [ $# -lt 3 ]; then echo "Usage: $0 [-b] image-label image-name base-bits-dir [extra-bits-dir]" exit 1 fi LABEL=`echo "$1" | tr '[:lower:]' '[:upper:]'`; shift NAME="$1"; shift publisher="The FreeBSD Project. http://www.FreeBSD.org/" echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > "$1/etc/fstab" makefs -t cd9660 $bootable -o rockridge -o label="$LABEL" -o publisher="$publisher" "$NAME" "$@" -rm "$1/etc/fstab" +rm -f "$1/etc/fstab" Index: projects/release-pkg/release/powerpc/mkisoimages.sh =================================================================== --- projects/release-pkg/release/powerpc/mkisoimages.sh (revision 293224) +++ projects/release-pkg/release/powerpc/mkisoimages.sh (revision 293225) @@ -1,69 +1,69 @@ #!/bin/sh # # Module: mkisoimages.sh # Author: Jordan K Hubbard # Date: 22 June 2001 # # $FreeBSD$ # # This script is used by release/Makefile to build the (optional) ISO images # for a FreeBSD release. It is considered architecture dependent since each # platform has a slightly unique way of making bootable CDs. This script # is also allowed to generate any number of images since that is more of # publishing decision than anything else. # # Usage: # # mkisoimages.sh [-b] image-label image-name base-bits-dir [extra-bits-dir] # # Where -b is passed if the ISO image should be made "bootable" by # whatever standards this architecture supports (may be unsupported), # image-label is the ISO image label, image-name is the filename of the # resulting ISO image, base-bits-dir contains the image contents and # extra-bits-dir, if provided, contains additional files to be merged # into base-bits-dir as part of making the image. if [ "x$1" = "x-b" ]; then # Apple boot code uudecode -o /tmp/hfs-boot-block.bz2 "`dirname "$0"`/hfs-boot.bz2.uu" bzip2 -d /tmp/hfs-boot-block.bz2 OFFSET=$(hd /tmp/hfs-boot-block | grep 'Loader START' | cut -f 1 -d ' ') OFFSET=0x$(echo 0x$OFFSET | awk '{printf("%x\n",$1/512);}') dd if="$4/boot/loader" of=/tmp/hfs-boot-block seek=$OFFSET conv=notrunc bootable="-o bootimage=macppc;/tmp/hfs-boot-block -o no-emul-boot" # pSeries/PAPR boot code mkdir -p "$4/ppc/chrp" cp "$4/boot/loader" "$4/ppc/chrp" cat > "$4/ppc/bootinfo.txt" << EOF FreeBSD Install FreeBSD boot &device;:,\ppc\chrp\loader EOF bootable="$bootable -o chrp-boot" # Playstation 3 boot code echo "FreeBSD Install='/boot/loader.ps3'" > "$4/etc/kboot.conf" shift else bootable="" fi if [ $# -lt 3 ]; then echo "Usage: $0 [-b] image-label image-name base-bits-dir [extra-bits-dir]" exit 1 fi LABEL=`echo "$1" | tr '[:lower:]' '[:upper:]'`; shift NAME="$1"; shift publisher="The FreeBSD Project. http://www.FreeBSD.org/" echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > "$1/etc/fstab" makefs -t cd9660 $bootable -o rockridge -o label="$LABEL" -o publisher="$publisher" "$NAME" "$@" -rm "$1/etc/fstab" -rm /tmp/hfs-boot-block +rm -f "$1/etc/fstab" +rm -f /tmp/hfs-boot-block rm -rf "$1/ppc" Index: projects/release-pkg/release/scripts/make-manifest.sh =================================================================== --- projects/release-pkg/release/scripts/make-manifest.sh (revision 293224) +++ projects/release-pkg/release/scripts/make-manifest.sh (revision 293225) @@ -1,26 +1,72 @@ #!/bin/sh # make-manifest.sh: create checksums and package descriptions for the installer # # Usage: make-manifest.sh foo1.txz foo2.txz ... # # The output file looks like this (tab-delimited): # foo1.txz SHA256-checksum Number-of-files foo1 Description Install-by-default # # $FreeBSD$ -desc_base="Base system (MANDATORY)" -desc_kernel="Kernel (MANDATORY)" -desc_doc="Additional documentation" -doc_default=off -desc_lib32="32-bit compatibility libraries" -desc_ports="Ports tree" -desc_src="System source code" -desc_tests="Test suite" -src_default=off -tests_default=off +base="Base system" +doc="Additional Documentation" +kernel="Kernel" +ports="Ports tree" +src="System source tree" +lib32="32-bit compatibility libraries" +tests="Test suite" -for i in $*; do - echo "`basename $i` `sha256 -q $i` `tar tvf $i | wc -l | tr -d ' '` `basename $i .txz` \"`eval echo \\\$desc_$(basename $i .txz)`\" `eval echo \\\${$(basename $i .txz)_default:-on}`" +desc_base="${base} (MANDATORY)" +desc_base_dbg="${base} (Debugging)" +desc_doc="${doc}" +desc_kernel="${kernel} (MANDATORY)" +desc_kernel_dbg="${kernel} (Debugging)" +desc_kernel_alt="Alternate ${kernel}" +desc_kernel_alt_dbg="Alternate ${kernel} (Debugging)" +desc_lib32="${lib32}" +desc_lib32_dbg="${lib32} (Debugging)" +desc_ports="${ports}" +desc_src="${src}" +desc_tests="${tests}" + +default_doc=off +default_src=off +default_tests=off +default_base_dbg=off +default_lib32_dbg=off +default_kernel_alt=off +default_kernel_dbg=on +default_kernel_alt_dbg=off + +for i in ${*}; do + dist="${i}" + distname="${i%%.txz}" + distname="$(echo ${distname} | tr '-' '_')" + distname="$(echo ${distname} | tr 'kernel.' 'kernel_')" + hash="$(sha256 -q ${i})" + nfiles="$(tar tvf ${i} | wc -l | tr -d ' ')" + default="$(eval echo \${default_${distname}:-on})" + desc="$(eval echo \"\${desc_${distname}}\")" + + case ${i} in + kernel-dbg.txz) + desc="${desc_kernel_dbg}" + ;; + kernel.*-dbg.txz) + desc="$(eval echo \"${desc_kernel_alt_dbg}\")" + desc="${desc}: $(eval echo ${i%%-dbg.txz} | cut -f 2 -d '.')" + default="$(eval echo \"${default_kernel_alt_dbg}\")" + ;; + kernel.*.txz) + desc="$(eval echo \"${desc_kernel_alt}\")" + desc="${desc}: $(eval echo ${i%%.txz} | cut -f 2 -d '.')" + default="$(eval echo \"${default_kernel_alt}\")" + ;; + *) + ;; + esac + + printf "${dist}\t${hash}\t${nfiles}\t${distname}\t\"${desc}\"\t${default}\n" done Index: projects/release-pkg/release/sparc64/mkisoimages.sh =================================================================== --- projects/release-pkg/release/sparc64/mkisoimages.sh (revision 293224) +++ projects/release-pkg/release/sparc64/mkisoimages.sh (revision 293225) @@ -1,84 +1,84 @@ #!/bin/sh # # Module: mkisoimages.sh # Author: Jordan K Hubbard # Date: 22 June 2001 # # $FreeBSD$ # # This script is used by release/Makefile to build the (optional) ISO images # for a FreeBSD release. It is considered architecture dependent since each # platform has a slightly unique way of making bootable CDs. This script # is also allowed to generate any number of images since that is more of # publishing decision than anything else. # # Usage: # # mkisoimages.sh [-b] image-label image-name base-bits-dir [extra-bits-dir] # # Where -b is passed if the ISO image should be made "bootable" by # whatever standards this architecture supports (may be unsupported), # image-label is the ISO image label, image-name is the filename of the # resulting ISO image, base-bits-dir contains the image contents and # extra-bits-dir, if provided, contains additional files to be merged # into base-bits-dir as part of making the image. if [ $# -lt 3 ]; then echo "Usage: $0 [-b] image-label image-name base-bits-dir [extra-bits-dir]" > /dev/stderr exit 1 fi case "$1" in -b) BOPT="$1"; shift ;; esac LABEL=`echo "$1" | tr '[:lower:]' '[:upper:]'`; shift NAME="$1"; shift BASEBITSDIR="$1" # Create an ISO image publisher="The FreeBSD Project. http://www.FreeBSD.org/" echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > "$BASEBITSDIR/etc/fstab" makefs -t cd9660 -o rockridge -o label="$LABEL" -o publisher="$publisher" "$NAME.tmp" "$@" -rm "$BASEBITSDIR/etc/fstab" +rm -f "$BASEBITSDIR/etc/fstab" if [ "x$BOPT" != "x-b" ]; then mv "$NAME.tmp" "$NAME" exit 0 fi TMPIMGDIR=`mktemp -d /tmp/bootfs.XXXXXXXX` || exit 1 BOOTFSDIR="$TMPIMGDIR/bootfs" BOOTFSIMG="$TMPIMGDIR/bootfs.img" # Create a boot filesystem mkdir -p "$BOOTFSDIR/boot" cp -p "$BASEBITSDIR/boot/loader" "$BOOTFSDIR/boot" makefs -t ffs -B be -M 512k "$BOOTFSIMG" "$BOOTFSDIR" dd if="$BASEBITSDIR/boot/boot1" of="$BOOTFSIMG" bs=512 conv=notrunc,sync # Create a boot ISO image : ${CYLSIZE:=640} ISOSIZE=$(stat -f %z "$NAME.tmp") ISOBLKS=$((($ISOSIZE + 511) / 512)) ISOCYLS=$((($ISOBLKS + ($CYLSIZE - 1)) / $CYLSIZE)) BOOTFSSIZE=$(stat -f %z "$BOOTFSIMG") BOOTFSBLKS=$((($BOOTFSSIZE + 511) / 512)) BOOTFSCYLS=$((($BOOTFSBLKS + ($CYLSIZE - 1)) / $CYLSIZE)) ENDCYL=$(($ISOCYLS + $BOOTFSCYLS)) NSECTS=$(($ENDCYL * 1 * $CYLSIZE)) dd if="$NAME.tmp" of="$NAME" bs="${CYLSIZE}b" conv=notrunc,sync dd if="$BOOTFSIMG" of="$NAME" bs="${CYLSIZE}b" seek=$ISOCYLS conv=notrunc,sync # The number of alternative cylinders is always 2. dd if=/dev/zero of="$NAME" bs="${CYLSIZE}b" seek=$ENDCYL count=2 conv=notrunc,sync rm -rf "$NAME.tmp" "$TMPIMGDIR" # Write VTOC8 label to boot ISO image MD=`mdconfig -a -t vnode -S 512 -y 1 -x "$CYLSIZE" -f "$NAME"` gpart create -s VTOC8 $MD # !4: usr, for ISO image part gpart add -i 1 -s "$(($ISOCYLS * $CYLSIZE * 512))b" -t \!4 $MD # !2: root, for bootfs part. gpart add -i 6 -s "$(($BOOTFSCYLS * $CYLSIZE * 512))b" -t \!2 $MD mdconfig -d -u ${MD#md} Index: projects/release-pkg/share/man/man4/ioat.4 =================================================================== --- projects/release-pkg/share/man/man4/ioat.4 (revision 293224) +++ projects/release-pkg/share/man/man4/ioat.4 (revision 293225) @@ -1,247 +1,249 @@ .\" Copyright (c) 2015 EMC / Isilon Storage Division .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd December 17, 2015 +.Dd January 5, 2016 .Dt IOAT 4 .Os .Sh NAME .Nm I/OAT .Nd Intel I/O Acceleration Technology .Sh SYNOPSIS To compile this driver into your kernel, place the following line in your kernel configuration file: .Bd -ragged -offset indent .Cd "device ioat" .Ed .Pp Or, to load the driver as a module at boot, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent ioat_load="YES" .Ed .Pp In .Xr loader.conf 5 : .Pp .Cd hw.ioat.force_legacy_interrupts=0 .Pp In .Xr loader.conf 5 or .Xr sysctl.conf 5 : .Pp .Cd hw.ioat.enable_ioat_test=0 .Cd hw.ioat.debug_level=0 (only critical errors; maximum of 3) .Pp .Ft typedef void .Fn (*bus_dmaengine_callback_t) "void *arg" "int error" .Pp .Ft bus_dmaengine_t .Fn ioat_get_dmaengine "uint32_t channel_index" .Ft void .Fn ioat_put_dmaengine "bus_dmaengine_t dmaengine" .Ft int .Fn ioat_get_hwversion "bus_dmaengine_t dmaengine" +.Ft size_t +.Fn ioat_get_max_io_size "bus_dmaengine_t dmaengine" .Ft int .Fn ioat_set_interrupt_coalesce "bus_dmaengine_t dmaengine" "uint16_t delay" .Ft uint16_t .Fn ioat_get_max_coalesce_period "bus_dmaengine_t dmaengine" .Ft void .Fn ioat_acquire "bus_dmaengine_t dmaengine" .Ft void .Fn ioat_release "bus_dmaengine_t dmaengine" .Ft struct bus_dmadesc * .Fo ioat_copy .Fa "bus_dmaengine_t dmaengine" .Fa "bus_addr_t dst" .Fa "bus_addr_t src" .Fa "bus_size_t len" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Ft struct bus_dmadesc * .Fo ioat_copy_8k_aligned .Fa "bus_dmaengine_t dmaengine" .Fa "bus_addr_t dst1" .Fa "bus_addr_t dst2" .Fa "bus_addr_t src1" .Fa "bus_addr_t src2" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Ft struct bus_dmadesc * .Fo ioat_blockfill .Fa "bus_dmaengine_t dmaengine" .Fa "bus_addr_t dst" .Fa "uint64_t fillpattern" .Fa "bus_size_t len" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Ft struct bus_dmadesc * .Fo ioat_null .Fa "bus_dmaengine_t dmaengine" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Sh DESCRIPTION The .Nm driver provides a kernel API to a variety of DMA engines on some Intel server platforms. .Pp There is a number of DMA channels per CPU package. (Typically 4 or 8.) Each may be used independently. Operations on a single channel proceed sequentially. .Pp Blockfill operations can be used to write a 64-bit pattern to memory. .Pp Copy operations can be used to offload memory copies to the DMA engines. .Pp Null operations do nothing, but may be used to test the interrupt and callback mechanism. .Pp All operations can optionally trigger an interrupt at completion with the .Ar DMA_EN_INT flag. For example, a user might submit multiple operations to the same channel and only enable an interrupt and callback for the last operation. .Pp The hardware can delay and coalesce interrupts on a given channel for a configurable period of time, in microseconds. This may be desired to reduce the processing and interrupt overhead per descriptor, especially for workflows consisting of many small operations. Software can control this on a per-channel basis with the .Fn ioat_set_interrupt_coalesce API. The .Fn ioat_get_max_coalesce_period API can be used to determine the maximum coalescing period supported by the hardware, in microseconds. Current platforms support up to a 16.383 millisecond coalescing period. Optimal configuration will vary by workflow and desired operation latency. .Pp All operations are safe to use in a non-blocking context with the .Ar DMA_NO_WAIT flag. (Of course, allocations may fail and operations requested with .Ar DMA_NO_WAIT may return NULL.) .Pp All operations, as well as .Fn ioat_get_dmaengine , can return NULL in special circumstances. For example, if the .Nm driver is being unloaded, or the administrator has induced a hardware reset, or a usage error has resulted in a hardware error state that needs to be recovered from. .Pp It is invalid to attempt to submit new DMA operations in a .Fa bus_dmaengine_callback_t context. .Sh USAGE A typical user will lookup the DMA engine object for a given channel with .Fn ioat_get_dmaengine . When the user wants to offload a copy, they will first .Fn ioat_acquire the .Ar bus_dmaengine_t object for exclusive access to enqueue operations on that channel. Then, they will submit one or more operations using .Fn ioat_blockfill , .Fn ioat_copy , or .Fn ioat_null . After queuing one or more individual DMA operations, they will .Fn ioat_release the .Ar bus_dmaengine_t to drop their exclusive access to the channel. The routine they provided for the .Fa callback_fn argument will be invoked with the provided .Fa callback_arg when the operation is complete. When they are finished with the .Ar bus_dmaengine_t , the user should .Fn ioat_put_dmaengine . .Pp Users MUST NOT block between .Fn ioat_acquire and .Fn ioat_release . Users SHOULD NOT hold .Ar bus_dmaengine_t references for a very long time to enable fault recovery and kernel module unload. .Pp For an example of usage, see .Pa src/sys/dev/ioat/ioat_test.c . .Sh FILES .Bl -tag .It Pa /dev/ioat_test test device for .Xr ioatcontrol 8 .El .Sh SEE ALSO .Xr ioatcontrol 8 .Sh HISTORY The .Nm driver first appeared in .Fx 11.0 . .Sh AUTHORS The .Nm driver was developed by .An \&Jim Harris Aq Mt jimharris@FreeBSD.org , .An \&Carl Delsey Aq Mt carl.r.delsey@intel.com , and .An \&Conrad Meyer Aq Mt cem@FreeBSD.org . This manual page was written by .An \&Conrad Meyer Aq Mt cem@FreeBSD.org . .Sh CAVEATS Copy operation takes bus addresses as parameters, not virtual addresses. .Pp Buffers for individual copy operations must be physically contiguous. .Pp Copies larger than max transfer size (1MB, but may vary by hardware) are not supported. Future versions will likely support this by breaking up the transfer into smaller sizes. .Sh BUGS The .Nm driver only supports blockfill, copy, and null operations at this time. The driver does not yet support advanced DMA modes, such as XOR, that some I/OAT devices support. Index: projects/release-pkg/share/man/man4 =================================================================== --- projects/release-pkg/share/man/man4 (revision 293224) +++ projects/release-pkg/share/man/man4 (revision 293225) Property changes on: projects/release-pkg/share/man/man4 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share/man/man4:r289119,289158,289371-289384,293171-293224 Index: projects/release-pkg/share =================================================================== --- projects/release-pkg/share (revision 293224) +++ projects/release-pkg/share (revision 293225) Property changes on: projects/release-pkg/share ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share:r289119,289371,289383-289384,293171-293224 Index: projects/release-pkg/sys/dev/ioat/ioat.c =================================================================== --- projects/release-pkg/sys/dev/ioat/ioat.c (revision 293224) +++ projects/release-pkg/sys/dev/ioat/ioat.c (revision 293225) @@ -1,1847 +1,1856 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ioat.h" #include "ioat_hw.h" #include "ioat_internal.h" #define IOAT_INTR_TIMO (hz / 10) #define IOAT_REFLK (&ioat->submit_lock) static int ioat_probe(device_t device); static int ioat_attach(device_t device); static int ioat_detach(device_t device); static int ioat_setup_intr(struct ioat_softc *ioat); static int ioat_teardown_intr(struct ioat_softc *ioat); static int ioat3_attach(device_t device); static int ioat_start_channel(struct ioat_softc *ioat); static int ioat_map_pci_bar(struct ioat_softc *ioat); static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void ioat_interrupt_handler(void *arg); static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat); static int chanerr_to_errno(uint32_t); static void ioat_process_events(struct ioat_softc *ioat); static inline uint32_t ioat_get_active(struct ioat_softc *ioat); static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat); static void ioat_free_ring(struct ioat_softc *, uint32_t size, struct ioat_descriptor **); static void ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc); static struct ioat_descriptor *ioat_alloc_ring_entry(struct ioat_softc *, int mflags); static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags); static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index); static struct ioat_descriptor **ioat_prealloc_ring(struct ioat_softc *, uint32_t size, boolean_t need_dscr, int mflags); static int ring_grow(struct ioat_softc *, uint32_t oldorder, struct ioat_descriptor **); static int ring_shrink(struct ioat_softc *, uint32_t oldorder, struct ioat_descriptor **); static void ioat_halted_debug(struct ioat_softc *, uint32_t); static void ioat_timer_callback(void *arg); static void dump_descriptor(void *hw_desc); static void ioat_submit_single(struct ioat_softc *ioat); static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error); static int ioat_reset_hw(struct ioat_softc *ioat); static void ioat_setup_sysctl(device_t device); static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS); static inline struct ioat_softc *ioat_get(struct ioat_softc *, enum ioat_ref_kind); static inline void ioat_put(struct ioat_softc *, enum ioat_ref_kind); static inline void _ioat_putn(struct ioat_softc *, uint32_t, enum ioat_ref_kind, boolean_t); static inline void ioat_putn(struct ioat_softc *, uint32_t, enum ioat_ref_kind); static inline void ioat_putn_locked(struct ioat_softc *, uint32_t, enum ioat_ref_kind); static void ioat_drain_locked(struct ioat_softc *); #define ioat_log_message(v, ...) do { \ if ((v) <= g_ioat_debug_level) { \ device_printf(ioat->device, __VA_ARGS__); \ } \ } while (0) MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations"); SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node"); static int g_force_legacy_interrupts; SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN, &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled"); int g_ioat_debug_level = 0; SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level, 0, "Set log level (0-3) for ioat(4). Higher is more verbose."); /* * OS <-> Driver interface structures */ static device_method_t ioat_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ioat_probe), DEVMETHOD(device_attach, ioat_attach), DEVMETHOD(device_detach, ioat_detach), { 0, 0 } }; static driver_t ioat_pci_driver = { "ioat", ioat_pci_methods, sizeof(struct ioat_softc), }; static devclass_t ioat_devclass; DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0); MODULE_VERSION(ioat, 1); /* * Private data structures */ static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS]; static int ioat_channel_index = 0; SYSCTL_INT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0, "Number of IOAT channels attached"); static struct _pcsid { u_int32_t type; const char *desc; } pci_ids[] = { { 0x34308086, "TBG IOAT Ch0" }, { 0x34318086, "TBG IOAT Ch1" }, { 0x34328086, "TBG IOAT Ch2" }, { 0x34338086, "TBG IOAT Ch3" }, { 0x34298086, "TBG IOAT Ch4" }, { 0x342a8086, "TBG IOAT Ch5" }, { 0x342b8086, "TBG IOAT Ch6" }, { 0x342c8086, "TBG IOAT Ch7" }, { 0x37108086, "JSF IOAT Ch0" }, { 0x37118086, "JSF IOAT Ch1" }, { 0x37128086, "JSF IOAT Ch2" }, { 0x37138086, "JSF IOAT Ch3" }, { 0x37148086, "JSF IOAT Ch4" }, { 0x37158086, "JSF IOAT Ch5" }, { 0x37168086, "JSF IOAT Ch6" }, { 0x37178086, "JSF IOAT Ch7" }, { 0x37188086, "JSF IOAT Ch0 (RAID)" }, { 0x37198086, "JSF IOAT Ch1 (RAID)" }, { 0x3c208086, "SNB IOAT Ch0" }, { 0x3c218086, "SNB IOAT Ch1" }, { 0x3c228086, "SNB IOAT Ch2" }, { 0x3c238086, "SNB IOAT Ch3" }, { 0x3c248086, "SNB IOAT Ch4" }, { 0x3c258086, "SNB IOAT Ch5" }, { 0x3c268086, "SNB IOAT Ch6" }, { 0x3c278086, "SNB IOAT Ch7" }, { 0x3c2e8086, "SNB IOAT Ch0 (RAID)" }, { 0x3c2f8086, "SNB IOAT Ch1 (RAID)" }, { 0x0e208086, "IVB IOAT Ch0" }, { 0x0e218086, "IVB IOAT Ch1" }, { 0x0e228086, "IVB IOAT Ch2" }, { 0x0e238086, "IVB IOAT Ch3" }, { 0x0e248086, "IVB IOAT Ch4" }, { 0x0e258086, "IVB IOAT Ch5" }, { 0x0e268086, "IVB IOAT Ch6" }, { 0x0e278086, "IVB IOAT Ch7" }, { 0x0e2e8086, "IVB IOAT Ch0 (RAID)" }, { 0x0e2f8086, "IVB IOAT Ch1 (RAID)" }, { 0x2f208086, "HSW IOAT Ch0" }, { 0x2f218086, "HSW IOAT Ch1" }, { 0x2f228086, "HSW IOAT Ch2" }, { 0x2f238086, "HSW IOAT Ch3" }, { 0x2f248086, "HSW IOAT Ch4" }, { 0x2f258086, "HSW IOAT Ch5" }, { 0x2f268086, "HSW IOAT Ch6" }, { 0x2f278086, "HSW IOAT Ch7" }, { 0x2f2e8086, "HSW IOAT Ch0 (RAID)" }, { 0x2f2f8086, "HSW IOAT Ch1 (RAID)" }, { 0x0c508086, "BWD IOAT Ch0" }, { 0x0c518086, "BWD IOAT Ch1" }, { 0x0c528086, "BWD IOAT Ch2" }, { 0x0c538086, "BWD IOAT Ch3" }, { 0x6f508086, "BDXDE IOAT Ch0" }, { 0x6f518086, "BDXDE IOAT Ch1" }, { 0x6f528086, "BDXDE IOAT Ch2" }, { 0x6f538086, "BDXDE IOAT Ch3" }, { 0x6f208086, "BDX IOAT Ch0" }, { 0x6f218086, "BDX IOAT Ch1" }, { 0x6f228086, "BDX IOAT Ch2" }, { 0x6f238086, "BDX IOAT Ch3" }, { 0x6f248086, "BDX IOAT Ch4" }, { 0x6f258086, "BDX IOAT Ch5" }, { 0x6f268086, "BDX IOAT Ch6" }, { 0x6f278086, "BDX IOAT Ch7" }, { 0x6f2e8086, "BDX IOAT Ch0 (RAID)" }, { 0x6f2f8086, "BDX IOAT Ch1 (RAID)" }, { 0x00000000, NULL } }; /* * OS <-> Driver linkage functions */ static int ioat_probe(device_t device) { struct _pcsid *ep; u_int32_t type; type = pci_get_devid(device); for (ep = pci_ids; ep->type; ep++) { if (ep->type == type) { device_set_desc(device, ep->desc); return (0); } } return (ENXIO); } static int ioat_attach(device_t device) { struct ioat_softc *ioat; int error; ioat = DEVICE2SOFTC(device); ioat->device = device; error = ioat_map_pci_bar(ioat); if (error != 0) goto err; ioat->version = ioat_read_cbver(ioat); if (ioat->version < IOAT_VER_3_0) { error = ENODEV; goto err; } error = ioat3_attach(device); if (error != 0) goto err; error = pci_enable_busmaster(device); if (error != 0) goto err; error = ioat_setup_intr(ioat); if (error != 0) goto err; error = ioat_reset_hw(ioat); if (error != 0) goto err; ioat_process_events(ioat); ioat_setup_sysctl(device); ioat->chan_idx = ioat_channel_index; ioat_channel[ioat_channel_index++] = ioat; ioat_test_attach(); err: if (error != 0) ioat_detach(device); return (error); } static int ioat_detach(device_t device) { struct ioat_softc *ioat; ioat = DEVICE2SOFTC(device); ioat_test_detach(); mtx_lock(IOAT_REFLK); ioat->quiescing = TRUE; ioat_channel[ioat->chan_idx] = NULL; ioat_drain_locked(ioat); mtx_unlock(IOAT_REFLK); ioat_teardown_intr(ioat); callout_drain(&ioat->timer); pci_disable_busmaster(device); if (ioat->pci_resource != NULL) bus_release_resource(device, SYS_RES_MEMORY, ioat->pci_resource_id, ioat->pci_resource); if (ioat->ring != NULL) ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring); if (ioat->comp_update != NULL) { bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map); bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update, ioat->comp_update_map); bus_dma_tag_destroy(ioat->comp_update_tag); } bus_dma_tag_destroy(ioat->hw_desc_tag); return (0); } static int ioat_teardown_intr(struct ioat_softc *ioat) { if (ioat->tag != NULL) bus_teardown_intr(ioat->device, ioat->res, ioat->tag); if (ioat->res != NULL) bus_release_resource(ioat->device, SYS_RES_IRQ, rman_get_rid(ioat->res), ioat->res); pci_release_msi(ioat->device); return (0); } static int ioat_start_channel(struct ioat_softc *ioat) { uint64_t status; uint32_t chanerr; int i; ioat_acquire(&ioat->dmaengine); ioat_null(&ioat->dmaengine, NULL, NULL, 0); ioat_release(&ioat->dmaengine); for (i = 0; i < 100; i++) { DELAY(1); status = ioat_get_chansts(ioat); if (is_ioat_idle(status)) return (0); } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_log_message(0, "could not start channel: " "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr, IOAT_CHANERR_STR); return (ENXIO); } /* * Initialize Hardware */ static int ioat3_attach(device_t device) { struct ioat_softc *ioat; struct ioat_descriptor **ring; struct ioat_descriptor *next; struct ioat_dma_hw_descriptor *dma_hw_desc; int i, num_descriptors; int error; uint8_t xfercap; error = 0; ioat = DEVICE2SOFTC(device); ioat->capabilities = ioat_read_dmacapability(ioat); ioat_log_message(1, "Capabilities: %b\n", (int)ioat->capabilities, IOAT_DMACAP_STR); xfercap = ioat_read_xfercap(ioat); ioat->max_xfer_size = 1 << xfercap; ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_SUPPORTED) != 0; if (ioat->intrdelay_supported) ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK; /* TODO: need to check DCA here if we ever do XOR/PQ */ mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF); mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF); callout_init(&ioat->timer, 1); /* Establish lock order for Witness */ mtx_lock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); mtx_unlock(&ioat->cleanup_lock); mtx_unlock(&ioat->submit_lock); ioat->is_resize_pending = FALSE; ioat->is_completion_pending = FALSE; ioat->is_reset_pending = FALSE; ioat->is_channel_running = FALSE; bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL, &ioat->comp_update_tag); error = bus_dmamem_alloc(ioat->comp_update_tag, (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map); if (ioat->comp_update == NULL) return (ENOMEM); error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map, ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat, 0); if (error != 0) return (error); ioat->ring_size_order = IOAT_MIN_ORDER; num_descriptors = 1 << ioat->ring_size_order; bus_dma_tag_create(bus_get_dma_tag(ioat->device), 0x40, 0x0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct ioat_dma_hw_descriptor), 1, sizeof(struct ioat_dma_hw_descriptor), 0, NULL, NULL, &ioat->hw_desc_tag); ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT, M_ZERO | M_WAITOK); if (ioat->ring == NULL) return (ENOMEM); ring = ioat->ring; for (i = 0; i < num_descriptors; i++) { ring[i] = ioat_alloc_ring_entry(ioat, M_WAITOK); if (ring[i] == NULL) return (ENOMEM); ring[i]->id = i; } for (i = 0; i < num_descriptors - 1; i++) { next = ring[i + 1]; dma_hw_desc = ring[i]->u.dma; dma_hw_desc->next = next->hw_desc_bus_addr; } ring[i]->u.dma->next = ring[0]->hw_desc_bus_addr; ioat->head = ioat->hw_head = 0; ioat->tail = 0; ioat->last_seen = 0; return (0); } static int ioat_map_pci_bar(struct ioat_softc *ioat) { ioat->pci_resource_id = PCIR_BAR(0); ioat->pci_resource = bus_alloc_resource_any(ioat->device, SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE); if (ioat->pci_resource == NULL) { ioat_log_message(0, "unable to allocate pci resource\n"); return (ENODEV); } ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource); ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource); return (0); } static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { struct ioat_softc *ioat = arg; KASSERT(error == 0, ("%s: error:%d", __func__, error)); ioat->comp_update_bus_addr = seg[0].ds_addr; } static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *baddr; KASSERT(error == 0, ("%s: error:%d", __func__, error)); baddr = arg; *baddr = segs->ds_addr; } /* * Interrupt setup and handlers */ static int ioat_setup_intr(struct ioat_softc *ioat) { uint32_t num_vectors; int error; boolean_t use_msix; boolean_t force_legacy_interrupts; use_msix = FALSE; force_legacy_interrupts = FALSE; if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) { num_vectors = 1; pci_alloc_msix(ioat->device, &num_vectors); if (num_vectors == 1) use_msix = TRUE; } if (use_msix) { ioat->rid = 1; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_ACTIVE); } else { ioat->rid = 0; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_SHAREABLE | RF_ACTIVE); } if (ioat->res == NULL) { ioat_log_message(0, "bus_alloc_resource failed\n"); return (ENOMEM); } ioat->tag = NULL; error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag); if (error != 0) { ioat_log_message(0, "bus_setup_intr failed\n"); return (error); } ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN); return (0); } static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat) { u_int32_t pciid; pciid = pci_get_devid(ioat->device); switch (pciid) { /* BWD: */ case 0x0c508086: case 0x0c518086: case 0x0c528086: case 0x0c538086: /* BDXDE: */ case 0x6f508086: case 0x6f518086: case 0x6f528086: case 0x6f538086: return (TRUE); } return (FALSE); } static void ioat_interrupt_handler(void *arg) { struct ioat_softc *ioat = arg; ioat->stats.interrupts++; ioat_process_events(ioat); } static int chanerr_to_errno(uint32_t chanerr) { if (chanerr == 0) return (0); if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0) return (EFAULT); if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0) return (EIO); /* This one is probably our fault: */ if ((chanerr & IOAT_CHANERR_NDADDERR) != 0) return (EIO); return (EIO); } static void ioat_process_events(struct ioat_softc *ioat) { struct ioat_descriptor *desc; struct bus_dmadesc *dmadesc; uint64_t comp_update, status; uint32_t completed, chanerr; int error; mtx_lock(&ioat->cleanup_lock); completed = 0; comp_update = *ioat->comp_update; status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK; CTR0(KTR_IOAT, __func__); if (status == ioat->last_seen) goto out; while (1) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR1(KTR_IOAT, "completing desc %d", ioat->tail); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, 0); completed++; ioat->tail++; if (desc->hw_desc_bus_addr == status) break; } ioat->last_seen = desc->hw_desc_bus_addr; if (ioat->head == ioat->tail) { ioat->is_completion_pending = FALSE; callout_reset(&ioat->timer, IOAT_INTR_TIMO, ioat_timer_callback, ioat); } ioat->stats.descriptors_processed += completed; out: ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); mtx_unlock(&ioat->cleanup_lock); ioat_putn(ioat, completed, IOAT_ACTIVE_DESCR_REF); wakeup(&ioat->tail); if (!is_ioat_halted(comp_update)) return; ioat->stats.channel_halts++; /* * Fatal programming error on this DMA channel. Flush any outstanding * work with error status and restart the engine. */ ioat_log_message(0, "Channel halted due to fatal programming error\n"); mtx_lock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); ioat->quiescing = TRUE; chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_halted_debug(ioat, chanerr); ioat->stats.last_halt_chanerr = chanerr; while (ioat_get_active(ioat) > 0) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR1(KTR_IOAT, "completing err desc %d", ioat->tail); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, chanerr_to_errno(chanerr)); ioat_putn_locked(ioat, 1, IOAT_ACTIVE_DESCR_REF); ioat->tail++; ioat->stats.descriptors_processed++; ioat->stats.descriptors_error++; } /* Clear error status */ ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); mtx_unlock(&ioat->cleanup_lock); mtx_unlock(&ioat->submit_lock); ioat_log_message(0, "Resetting channel to recover from error\n"); error = ioat_reset_hw(ioat); KASSERT(error == 0, ("%s: reset failed: %d", __func__, error)); } /* * User API functions */ bus_dmaengine_t ioat_get_dmaengine(uint32_t index) { struct ioat_softc *sc; if (index >= ioat_channel_index) return (NULL); sc = ioat_channel[index]; if (sc == NULL || sc->quiescing) return (NULL); return (&ioat_get(sc, IOAT_DMAENGINE_REF)->dmaengine); } void ioat_put_dmaengine(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); ioat_put(ioat, IOAT_DMAENGINE_REF); } int ioat_get_hwversion(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->version); } +size_t +ioat_get_max_io_size(bus_dmaengine_t dmaengine) +{ + struct ioat_softc *ioat; + + ioat = to_ioat_softc(dmaengine); + return (ioat->max_xfer_size); +} + int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); if (!ioat->intrdelay_supported) return (ENODEV); if (delay > ioat->intrdelay_max) return (ERANGE); ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay); ioat->cached_intrdelay = ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK; return (0); } uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->intrdelay_max); } void ioat_acquire(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); mtx_lock(&ioat->submit_lock); CTR0(KTR_IOAT, __func__); } void ioat_release(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR0(KTR_IOAT, __func__); ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->hw_head); mtx_unlock(&ioat->submit_lock); } static struct ioat_descriptor * ioat_op_generic(struct ioat_softc *ioat, uint8_t op, uint32_t size, uint64_t src, uint64_t dst, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_generic_hw_descriptor *hw_desc; struct ioat_descriptor *desc; int mflags; mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT((flags & ~DMA_ALL_FLAGS) == 0, ("Unrecognized flag(s): %#x", flags & ~DMA_ALL_FLAGS)); if ((flags & DMA_NO_WAIT) != 0) mflags = M_NOWAIT; else mflags = M_WAITOK; if (size > ioat->max_xfer_size) { ioat_log_message(0, "%s: max_xfer_size = %d, requested = %u\n", __func__, ioat->max_xfer_size, (unsigned)size); return (NULL); } if (ioat_reserve_space(ioat, 1, mflags) != 0) return (NULL); desc = ioat_get_ring_entry(ioat, ioat->head); hw_desc = desc->u.generic; hw_desc->u.control_raw = 0; hw_desc->u.control_generic.op = op; hw_desc->u.control_generic.completion_update = 1; if ((flags & DMA_INT_EN) != 0) hw_desc->u.control_generic.int_enable = 1; hw_desc->size = size; hw_desc->src_addr = src; hw_desc->dest_addr = dst; desc->bus_dmadesc.callback_fn = callback_fn; desc->bus_dmadesc.callback_arg = callback_arg; return (desc); } struct bus_dmadesc * ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.dma; hw_desc->u.control.null = 1; ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); if (((src | dst) & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.dma; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); if (((src1 | src2 | dst1 | dst2) & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n", __func__); return (NULL); } if (((src1 | src2 | dst1 | dst2) & PAGE_MASK) != 0) { ioat_log_message(0, "%s: Addresses must be page-aligned\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, src1, dst1, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.dma; if (src2 != src1 + PAGE_SIZE) { hw_desc->u.control.src_page_break = 1; hw_desc->next_src_addr = src2; } if (dst2 != dst1 + PAGE_SIZE) { hw_desc->u.control.dest_page_break = 1; hw_desc->next_dest_addr = dst2; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_fill_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); if ((ioat->capabilities & IOAT_DMACAP_BFILL) == 0) { ioat_log_message(0, "%s: Device lacks BFILL capability\n", __func__); return (NULL); } if ((dst & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of dst invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, fillpattern, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.fill; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } /* * Ring Management */ static inline uint32_t ioat_get_active(struct ioat_softc *ioat) { return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1)); } static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat) { return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1); } static struct ioat_descriptor * ioat_alloc_ring_entry(struct ioat_softc *ioat, int mflags) { struct ioat_generic_hw_descriptor *hw_desc; struct ioat_descriptor *desc; int error, busdmaflag; error = ENOMEM; hw_desc = NULL; if ((mflags & M_WAITOK) != 0) busdmaflag = BUS_DMA_WAITOK; else busdmaflag = BUS_DMA_NOWAIT; desc = malloc(sizeof(*desc), M_IOAT, mflags); if (desc == NULL) goto out; bus_dmamem_alloc(ioat->hw_desc_tag, (void **)&hw_desc, BUS_DMA_ZERO | busdmaflag, &ioat->hw_desc_map); if (hw_desc == NULL) goto out; memset(&desc->bus_dmadesc, 0, sizeof(desc->bus_dmadesc)); desc->u.generic = hw_desc; error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc, sizeof(*hw_desc), ioat_dmamap_cb, &desc->hw_desc_bus_addr, busdmaflag); if (error) goto out; out: if (error) { ioat_free_ring_entry(ioat, desc); return (NULL); } return (desc); } static void ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc) { if (desc == NULL) return; if (desc->u.generic) bus_dmamem_free(ioat->hw_desc_tag, desc->u.generic, ioat->hw_desc_map); free(desc, M_IOAT); } /* * Reserves space in this IOAT descriptor ring by ensuring enough slots remain * for 'num_descs'. * * If mflags contains M_WAITOK, blocks until enough space is available. * * Returns zero on success, or an errno on error. If num_descs is beyond the * maximum ring size, returns EINVAl; if allocation would block and mflags * contains M_NOWAIT, returns EAGAIN. * * Must be called with the submit_lock held; returns with the lock held. The * lock may be dropped to allocate the ring. * * (The submit_lock is needed to add any entries to the ring, so callers are * assured enough room is available.) */ static int ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags) { struct ioat_descriptor **new_ring; uint32_t order; int error; mtx_assert(&ioat->submit_lock, MA_OWNED); error = 0; if (num_descs < 1 || num_descs > (1 << IOAT_MAX_ORDER)) { error = EINVAL; goto out; } if (ioat->quiescing) { error = ENXIO; goto out; } for (;;) { if (ioat_get_ring_space(ioat) >= num_descs) goto out; order = ioat->ring_size_order; if (ioat->is_resize_pending || order == IOAT_MAX_ORDER) { if ((mflags & M_WAITOK) != 0) { msleep(&ioat->tail, &ioat->submit_lock, 0, "ioat_rsz", 0); continue; } error = EAGAIN; break; } ioat->is_resize_pending = TRUE; for (;;) { mtx_unlock(&ioat->submit_lock); new_ring = ioat_prealloc_ring(ioat, 1 << (order + 1), TRUE, mflags); mtx_lock(&ioat->submit_lock); KASSERT(ioat->ring_size_order == order, ("is_resize_pending should protect order")); if (new_ring == NULL) { KASSERT((mflags & M_WAITOK) == 0, ("allocation failed")); error = EAGAIN; break; } error = ring_grow(ioat, order, new_ring); if (error == 0) break; } ioat->is_resize_pending = FALSE; wakeup(&ioat->tail); if (error) break; } out: mtx_assert(&ioat->submit_lock, MA_OWNED); return (error); } static struct ioat_descriptor ** ioat_prealloc_ring(struct ioat_softc *ioat, uint32_t size, boolean_t need_dscr, int mflags) { struct ioat_descriptor **ring; uint32_t i; int error; KASSERT(size > 0 && powerof2(size), ("bogus size")); ring = malloc(size * sizeof(*ring), M_IOAT, M_ZERO | mflags); if (ring == NULL) return (NULL); if (need_dscr) { error = ENOMEM; for (i = size / 2; i < size; i++) { ring[i] = ioat_alloc_ring_entry(ioat, mflags); if (ring[i] == NULL) goto out; ring[i]->id = i; } } error = 0; out: if (error != 0 && ring != NULL) { ioat_free_ring(ioat, size, ring); ring = NULL; } return (ring); } static void ioat_free_ring(struct ioat_softc *ioat, uint32_t size, struct ioat_descriptor **ring) { uint32_t i; for (i = 0; i < size; i++) { if (ring[i] != NULL) ioat_free_ring_entry(ioat, ring[i]); } free(ring, M_IOAT); } static struct ioat_descriptor * ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index) { return (ioat->ring[index % (1 << ioat->ring_size_order)]); } static int ring_grow(struct ioat_softc *ioat, uint32_t oldorder, struct ioat_descriptor **newring) { struct ioat_descriptor *tmp, *next; struct ioat_dma_hw_descriptor *hw; uint32_t oldsize, newsize, head, tail, i, end; int error; CTR0(KTR_IOAT, __func__); mtx_assert(&ioat->submit_lock, MA_OWNED); if (oldorder != ioat->ring_size_order || oldorder >= IOAT_MAX_ORDER) { error = EINVAL; goto out; } oldsize = (1 << oldorder); newsize = (1 << (oldorder + 1)); mtx_lock(&ioat->cleanup_lock); head = ioat->head & (oldsize - 1); tail = ioat->tail & (oldsize - 1); /* Copy old descriptors to new ring */ for (i = 0; i < oldsize; i++) newring[i] = ioat->ring[i]; /* * If head has wrapped but tail hasn't, we must swap some descriptors * around so that tail can increment directly to head. */ if (head < tail) { for (i = 0; i <= head; i++) { tmp = newring[oldsize + i]; newring[oldsize + i] = newring[i]; newring[oldsize + i]->id = oldsize + i; newring[i] = tmp; newring[i]->id = i; } head += oldsize; } KASSERT(head >= tail, ("invariants")); /* Head didn't wrap; we only need to link in oldsize..newsize */ if (head < oldsize) { i = oldsize - 1; end = newsize; } else { /* Head did wrap; link newhead..newsize and 0..oldhead */ i = head; end = newsize + (head - oldsize) + 1; } /* * Fix up hardware ring, being careful not to trample the active * section (tail -> head). */ for (; i < end; i++) { KASSERT((i & (newsize - 1)) < tail || (i & (newsize - 1)) >= head, ("trampling snake")); next = newring[(i + 1) & (newsize - 1)]; hw = newring[i & (newsize - 1)]->u.dma; hw->next = next->hw_desc_bus_addr; } free(ioat->ring, M_IOAT); ioat->ring = newring; ioat->ring_size_order = oldorder + 1; ioat->tail = tail; ioat->head = head; error = 0; mtx_unlock(&ioat->cleanup_lock); out: if (error) ioat_free_ring(ioat, (1 << (oldorder + 1)), newring); return (error); } static int ring_shrink(struct ioat_softc *ioat, uint32_t oldorder, struct ioat_descriptor **newring) { struct ioat_dma_hw_descriptor *hw; struct ioat_descriptor *ent, *next; uint32_t oldsize, newsize, current_idx, new_idx, i; int error; CTR0(KTR_IOAT, __func__); mtx_assert(&ioat->submit_lock, MA_OWNED); if (oldorder != ioat->ring_size_order || oldorder <= IOAT_MIN_ORDER) { error = EINVAL; goto out_unlocked; } oldsize = (1 << oldorder); newsize = (1 << (oldorder - 1)); mtx_lock(&ioat->cleanup_lock); /* Can't shrink below current active set! */ if (ioat_get_active(ioat) >= newsize) { error = ENOMEM; goto out; } /* * Copy current descriptors to the new ring, dropping the removed * descriptors. */ for (i = 0; i < newsize; i++) { current_idx = (ioat->tail + i) & (oldsize - 1); new_idx = (ioat->tail + i) & (newsize - 1); newring[new_idx] = ioat->ring[current_idx]; newring[new_idx]->id = new_idx; } /* Free deleted descriptors */ for (i = newsize; i < oldsize; i++) { ent = ioat_get_ring_entry(ioat, ioat->tail + i); ioat_free_ring_entry(ioat, ent); } /* Fix up hardware ring. */ hw = newring[(ioat->tail + newsize - 1) & (newsize - 1)]->u.dma; next = newring[(ioat->tail + newsize) & (newsize - 1)]; hw->next = next->hw_desc_bus_addr; free(ioat->ring, M_IOAT); ioat->ring = newring; ioat->ring_size_order = oldorder - 1; error = 0; out: mtx_unlock(&ioat->cleanup_lock); out_unlocked: if (error) ioat_free_ring(ioat, (1 << (oldorder - 1)), newring); return (error); } static void ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr) { struct ioat_descriptor *desc; ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr, IOAT_CHANERR_STR); if (chanerr == 0) return; mtx_assert(&ioat->cleanup_lock, MA_OWNED); desc = ioat_get_ring_entry(ioat, ioat->tail + 0); dump_descriptor(desc->u.raw); desc = ioat_get_ring_entry(ioat, ioat->tail + 1); dump_descriptor(desc->u.raw); } static void ioat_timer_callback(void *arg) { struct ioat_descriptor **newring; struct ioat_softc *ioat; uint32_t order; ioat = arg; ioat_log_message(1, "%s\n", __func__); if (ioat->is_completion_pending) { ioat_process_events(ioat); return; } /* Slowly scale the ring down if idle. */ mtx_lock(&ioat->submit_lock); order = ioat->ring_size_order; if (ioat->is_resize_pending || order == IOAT_MIN_ORDER) { mtx_unlock(&ioat->submit_lock); goto out; } ioat->is_resize_pending = TRUE; mtx_unlock(&ioat->submit_lock); newring = ioat_prealloc_ring(ioat, 1 << (order - 1), FALSE, M_NOWAIT); mtx_lock(&ioat->submit_lock); KASSERT(ioat->ring_size_order == order, ("resize_pending protects order")); if (newring != NULL) ring_shrink(ioat, order, newring); ioat->is_resize_pending = FALSE; mtx_unlock(&ioat->submit_lock); out: if (ioat->ring_size_order > IOAT_MIN_ORDER) callout_reset(&ioat->timer, 10 * hz, ioat_timer_callback, ioat); } /* * Support Functions */ static void ioat_submit_single(struct ioat_softc *ioat) { ioat_get(ioat, IOAT_ACTIVE_DESCR_REF); atomic_add_rel_int(&ioat->head, 1); atomic_add_rel_int(&ioat->hw_head, 1); if (!ioat->is_completion_pending) { ioat->is_completion_pending = TRUE; callout_reset(&ioat->timer, IOAT_INTR_TIMO, ioat_timer_callback, ioat); } ioat->stats.descriptors_submitted++; } static int ioat_reset_hw(struct ioat_softc *ioat) { uint64_t status; uint32_t chanerr; unsigned timeout; int error; mtx_lock(IOAT_REFLK); ioat->quiescing = TRUE; ioat_drain_locked(ioat); mtx_unlock(IOAT_REFLK); status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(ioat); /* Wait at most 20 ms */ for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) && timeout < 20; timeout++) { DELAY(1000); status = ioat_get_chansts(ioat); } if (timeout == 20) { error = ETIMEDOUT; goto out; } KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce")); chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); /* * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors * that can cause stability issues for IOAT v3. */ pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07, 4); chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4); pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4); /* * BDXDE and BWD models reset MSI-X registers on device reset. * Save/restore their contents manually. */ if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets MSI-X registers; saving\n"); pci_save_state(ioat->device); } ioat_reset(ioat); /* Wait at most 20 ms */ for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++) DELAY(1000); if (timeout == 20) { error = ETIMEDOUT; goto out; } if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets registers; restored\n"); pci_restore_state(ioat->device); } /* Reset attempts to return the hardware to "halted." */ status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) { /* So this really shouldn't happen... */ ioat_log_message(0, "Device is active after a reset?\n"); ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); error = 0; goto out; } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); if (chanerr != 0) { mtx_lock(&ioat->cleanup_lock); ioat_halted_debug(ioat, chanerr); mtx_unlock(&ioat->cleanup_lock); error = EIO; goto out; } /* * Bring device back online after reset. Writing CHAINADDR brings the * device back to active. * * The internal ring counter resets to zero, so we have to start over * at zero as well. */ ioat->tail = ioat->head = ioat->hw_head = 0; ioat->last_seen = 0; ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); ioat_write_chancmp(ioat, ioat->comp_update_bus_addr); ioat_write_chainaddr(ioat, ioat->ring[0]->hw_desc_bus_addr); error = 0; out: mtx_lock(IOAT_REFLK); ioat->quiescing = FALSE; mtx_unlock(IOAT_REFLK); if (error == 0) error = ioat_start_channel(ioat); return (error); } static int sysctl_handle_chansts(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; uint64_t status; int error; ioat = arg1; status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS; sbuf_new_for_sysctl(&sb, NULL, 256, req); switch (status) { case IOAT_CHANSTS_ACTIVE: sbuf_printf(&sb, "ACTIVE"); break; case IOAT_CHANSTS_IDLE: sbuf_printf(&sb, "IDLE"); break; case IOAT_CHANSTS_SUSPENDED: sbuf_printf(&sb, "SUSPENDED"); break; case IOAT_CHANSTS_HALTED: sbuf_printf(&sb, "HALTED"); break; case IOAT_CHANSTS_ARMED: sbuf_printf(&sb, "ARMED"); break; default: sbuf_printf(&sb, "UNKNOWN"); break; } error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_dpi(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; #define PRECISION "1" const uintmax_t factor = 10; uintmax_t rate; int error; ioat = arg1; sbuf_new_for_sysctl(&sb, NULL, 16, req); if (ioat->stats.interrupts == 0) { sbuf_printf(&sb, "NaN"); goto out; } rate = ioat->stats.descriptors_processed * factor / ioat->stats.interrupts; sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor, rate % factor); #undef PRECISION out: error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_error(SYSCTL_HANDLER_ARGS) { struct ioat_descriptor *desc; struct ioat_softc *ioat; int error, arg; ioat = arg1; arg = 0; error = SYSCTL_OUT(req, &arg, sizeof(arg)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &arg, sizeof(arg)); if (error != 0) return (error); if (arg != 0) { ioat_acquire(&ioat->dmaengine); desc = ioat_op_generic(ioat, IOAT_OP_COPY, 1, 0xffff000000000000ull, 0xffff000000000000ull, NULL, NULL, 0); if (desc == NULL) error = ENOMEM; else ioat_submit_single(ioat); ioat_release(&ioat->dmaengine); } return (error); } static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; int error, arg; ioat = arg1; arg = 0; error = SYSCTL_OUT(req, &arg, sizeof(arg)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &arg, sizeof(arg)); if (error != 0) return (error); if (arg != 0) error = ioat_reset_hw(ioat); return (error); } static void dump_descriptor(void *hw_desc) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 8; j++) printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]); printf("\n"); } } static void ioat_setup_sysctl(device_t device) { struct sysctl_oid_list *par, *statpar, *state, *hammer; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree, *tmp; struct ioat_softc *ioat; ioat = DEVICE2SOFTC(device); ctx = device_get_sysctl_ctx(device); tree = device_get_sysctl_tree(device); par = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD, &ioat->version, 0, "HW version (0xMM form)"); SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD, &ioat->max_xfer_size, 0, "HW maximum transfer size"); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD, &ioat->intrdelay_supported, 0, "Is INTRDELAY supported"); SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD, &ioat->intrdelay_max, 0, "Maximum configurable INTRDELAY on this channel (microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD, NULL, "IOAT channel internal state"); state = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD, &ioat->ring_size_order, 0, "SW descriptor ring size order"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head, 0, "SW descriptor head pointer index"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail, 0, "SW descriptor tail pointer index"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "hw_head", CTLFLAG_RD, &ioat->hw_head, 0, "HW DMACOUNT"); SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD, ioat->comp_update, "HW addr of last completion"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_resize_pending", CTLFLAG_RD, &ioat->is_resize_pending, 0, "resize pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_completion_pending", CTLFLAG_RD, &ioat->is_completion_pending, 0, "completion pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_reset_pending", CTLFLAG_RD, &ioat->is_reset_pending, 0, "reset pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_channel_running", CTLFLAG_RD, &ioat->is_channel_running, 0, "channel running"); SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts", CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A", "String of the channel status"); SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD, &ioat->cached_intrdelay, 0, "Current INTRDELAY on this channel (cached, microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD, NULL, "Big hammers (mostly for testing)"); hammer = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset", CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_reset, "I", "Set to non-zero to reset the hardware"); SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_error", CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_error, "I", "Set to non-zero to inject a recoverable hardware error"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats", CTLFLAG_RD, NULL, "IOAT channel statistics"); statpar = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts", CTLFLAG_RW, &ioat->stats.interrupts, "Number of interrupts processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors", CTLFLAG_RW, &ioat->stats.descriptors_processed, "Number of descriptors processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted", CTLFLAG_RW, &ioat->stats.descriptors_submitted, "Number of descriptors submitted to this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored", CTLFLAG_RW, &ioat->stats.descriptors_error, "Number of descriptors failed by channel errors"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts", CTLFLAG_RW, &ioat->stats.channel_halts, 0, "Number of times the channel has halted"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr", CTLFLAG_RW, &ioat->stats.last_halt_chanerr, 0, "The raw CHANERR when the channel was last halted"); SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt", CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_dpi, "A", "Descriptors per interrupt"); } static inline struct ioat_softc * ioat_get(struct ioat_softc *ioat, enum ioat_ref_kind kind) { uint32_t old; KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus")); old = atomic_fetchadd_32(&ioat->refcnt, 1); KASSERT(old < UINT32_MAX, ("refcnt overflow")); #ifdef INVARIANTS old = atomic_fetchadd_32(&ioat->refkinds[kind], 1); KASSERT(old < UINT32_MAX, ("refcnt kind overflow")); #endif return (ioat); } static inline void ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind) { _ioat_putn(ioat, n, kind, FALSE); } static inline void ioat_putn_locked(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind) { _ioat_putn(ioat, n, kind, TRUE); } static inline void _ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind, boolean_t locked) { uint32_t old; KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus")); if (n == 0) return; #ifdef INVARIANTS old = atomic_fetchadd_32(&ioat->refkinds[kind], -n); KASSERT(old >= n, ("refcnt kind underflow")); #endif /* Skip acquiring the lock if resulting refcnt > 0. */ for (;;) { old = ioat->refcnt; if (old <= n) break; if (atomic_cmpset_32(&ioat->refcnt, old, old - n)) return; } if (locked) mtx_assert(IOAT_REFLK, MA_OWNED); else mtx_lock(IOAT_REFLK); old = atomic_fetchadd_32(&ioat->refcnt, -n); KASSERT(old >= n, ("refcnt error")); if (old == n) wakeup(IOAT_REFLK); if (!locked) mtx_unlock(IOAT_REFLK); } static inline void ioat_put(struct ioat_softc *ioat, enum ioat_ref_kind kind) { ioat_putn(ioat, 1, kind); } static void ioat_drain_locked(struct ioat_softc *ioat) { mtx_assert(IOAT_REFLK, MA_OWNED); while (ioat->refcnt > 0) msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0); } Index: projects/release-pkg/sys/dev/ioat/ioat.h =================================================================== --- projects/release-pkg/sys/dev/ioat/ioat.h (revision 293224) +++ projects/release-pkg/sys/dev/ioat/ioat.h (revision 293225) @@ -1,141 +1,142 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ __FBSDID("$FreeBSD$"); #ifndef __IOAT_H__ #define __IOAT_H__ #include #include /* * This file defines the public interface to the IOAT driver. */ /* * Enables an interrupt for this operation. Typically, you would only enable * this on the last operation in a group */ #define DMA_INT_EN 0x1 /* * Like M_NOWAIT. Operations will return NULL if they cannot allocate a * descriptor without blocking. */ #define DMA_NO_WAIT 0x2 #define DMA_ALL_FLAGS (DMA_INT_EN | DMA_NO_WAIT) /* * Hardware revision number. Different hardware revisions support different * features. For example, 3.2 cannot read from MMIO space, while 3.3 can. */ #define IOAT_VER_3_0 0x30 #define IOAT_VER_3_2 0x32 #define IOAT_VER_3_3 0x33 typedef void *bus_dmaengine_t; struct bus_dmadesc; typedef void (*bus_dmaengine_callback_t)(void *arg, int error); /* * Called first to acquire a reference to the DMA channel */ bus_dmaengine_t ioat_get_dmaengine(uint32_t channel_index); /* Release the DMA channel */ void ioat_put_dmaengine(bus_dmaengine_t dmaengine); /* Check the DMA engine's HW version */ int ioat_get_hwversion(bus_dmaengine_t dmaengine); +size_t ioat_get_max_io_size(bus_dmaengine_t dmaengine); /* * Set interrupt coalescing on a DMA channel. * * The argument is in microseconds. A zero value disables coalescing. Any * other value delays interrupt generation for N microseconds to provide * opportunity to coalesce multiple operations into a single interrupt. * * Returns an error status, or zero on success. * * - ERANGE if the given value exceeds the delay supported by the hardware. * (All current hardware supports a maximum of 0x3fff microseconds delay.) * - ENODEV if the hardware does not support interrupt coalescing. */ int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay); /* * Return the maximum supported coalescing period, for use in * ioat_set_interrupt_coalesce(). If the hardware does not support coalescing, * returns zero. */ uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine); /* * Acquire must be called before issuing an operation to perform. Release is * called after. Multiple operations can be issued within the context of one * acquire and release */ void ioat_acquire(bus_dmaengine_t dmaengine); void ioat_release(bus_dmaengine_t dmaengine); /* * Issue a blockfill operation. The 64-bit pattern 'fillpattern' is written to * 'len' physically contiguous bytes at 'dst'. * * Only supported on devices with the BFILL capability. */ struct bus_dmadesc *ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); /* Issues the copy data operation */ struct bus_dmadesc *ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); /* * Issue a copy data operation, with constraints: * - src1, src2, dst1, dst2 are all page-aligned addresses * - The quantity to copy is exactly 2 pages; * - src1 -> dst1, src2 -> dst2 * * Why use this instead of normal _copy()? You can copy two non-contiguous * pages (src, dst, or both) with one descriptor. */ struct bus_dmadesc *ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); /* * Issues a null operation. This issues the operation to the hardware, but the * hardware doesn't do anything with it. */ struct bus_dmadesc *ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); #endif /* __IOAT_H__ */ Index: projects/release-pkg/sys/dev/iwm/if_iwm.c =================================================================== --- projects/release-pkg/sys/dev/iwm/if_iwm.c (revision 293224) +++ projects/release-pkg/sys/dev/iwm/if_iwm.c (revision 293225) @@ -1,5004 +1,5008 @@ /* $OpenBSD: if_iwm.c,v 1.39 2015/03/23 00:35:19 jsg Exp $ */ /* * Copyright (c) 2014 genua mbh * Copyright (c) 2014 Fixup Software Ltd. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*- * Based on BSD-licensed source modules in the Linux iwlwifi driver, * which were used as the reference documentation for this implementation. * * Driver version we are currently based off of is * Linux 3.14.3 (tag id a2df521e42b1d9a23f620ac79dbfe8655a8391dd) * *********************************************************************** * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2007 - 2013 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, * USA * * The full GNU General Public License is included in this distribution * in the file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * * * BSD LICENSE * * Copyright(c) 2005 - 2013 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2007-2010 Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const uint8_t iwm_nvm_channels[] = { /* 2.4 GHz */ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 5 GHz */ 36, 40, 44 , 48, 52, 56, 60, 64, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 149, 153, 157, 161, 165 }; #define IWM_NUM_2GHZ_CHANNELS 14 /* * XXX For now, there's simply a fixed set of rate table entries * that are populated. */ const struct iwm_rate { uint8_t rate; uint8_t plcp; } iwm_rates[] = { { 2, IWM_RATE_1M_PLCP }, { 4, IWM_RATE_2M_PLCP }, { 11, IWM_RATE_5M_PLCP }, { 22, IWM_RATE_11M_PLCP }, { 12, IWM_RATE_6M_PLCP }, { 18, IWM_RATE_9M_PLCP }, { 24, IWM_RATE_12M_PLCP }, { 36, IWM_RATE_18M_PLCP }, { 48, IWM_RATE_24M_PLCP }, { 72, IWM_RATE_36M_PLCP }, { 96, IWM_RATE_48M_PLCP }, { 108, IWM_RATE_54M_PLCP }, }; #define IWM_RIDX_CCK 0 #define IWM_RIDX_OFDM 4 #define IWM_RIDX_MAX (nitems(iwm_rates)-1) #define IWM_RIDX_IS_CCK(_i_) ((_i_) < IWM_RIDX_OFDM) #define IWM_RIDX_IS_OFDM(_i_) ((_i_) >= IWM_RIDX_OFDM) static int iwm_store_cscheme(struct iwm_softc *, const uint8_t *, size_t); static int iwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type, const uint8_t *, size_t); static int iwm_set_default_calib(struct iwm_softc *, const void *); static void iwm_fw_info_free(struct iwm_fw_info *); static int iwm_read_firmware(struct iwm_softc *, enum iwm_ucode_type); static void iwm_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int iwm_dma_contig_alloc(bus_dma_tag_t, struct iwm_dma_info *, bus_size_t, bus_size_t); static void iwm_dma_contig_free(struct iwm_dma_info *); static int iwm_alloc_fwmem(struct iwm_softc *); static void iwm_free_fwmem(struct iwm_softc *); static int iwm_alloc_sched(struct iwm_softc *); static void iwm_free_sched(struct iwm_softc *); static int iwm_alloc_kw(struct iwm_softc *); static void iwm_free_kw(struct iwm_softc *); static int iwm_alloc_ict(struct iwm_softc *); static void iwm_free_ict(struct iwm_softc *); static int iwm_alloc_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static void iwm_reset_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static void iwm_free_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static int iwm_alloc_tx_ring(struct iwm_softc *, struct iwm_tx_ring *, int); static void iwm_reset_tx_ring(struct iwm_softc *, struct iwm_tx_ring *); static void iwm_free_tx_ring(struct iwm_softc *, struct iwm_tx_ring *); static void iwm_enable_interrupts(struct iwm_softc *); static void iwm_restore_interrupts(struct iwm_softc *); static void iwm_disable_interrupts(struct iwm_softc *); static void iwm_ict_reset(struct iwm_softc *); static int iwm_allow_mcast(struct ieee80211vap *, struct iwm_softc *); static void iwm_stop_device(struct iwm_softc *); static void iwm_mvm_nic_config(struct iwm_softc *); static int iwm_nic_rx_init(struct iwm_softc *); static int iwm_nic_tx_init(struct iwm_softc *); static int iwm_nic_init(struct iwm_softc *); static void iwm_enable_txq(struct iwm_softc *, int, int); static int iwm_post_alive(struct iwm_softc *); static int iwm_nvm_read_chunk(struct iwm_softc *, uint16_t, uint16_t, uint16_t, uint8_t *, uint16_t *); static int iwm_nvm_read_section(struct iwm_softc *, uint16_t, uint8_t *, uint16_t *); static void iwm_init_channel_map(struct iwm_softc *, const uint16_t * const); static int iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *, const uint16_t *, const uint16_t *, uint8_t, uint8_t); struct iwm_nvm_section; static int iwm_parse_nvm_sections(struct iwm_softc *, struct iwm_nvm_section *); static int iwm_nvm_init(struct iwm_softc *); static int iwm_firmware_load_chunk(struct iwm_softc *, uint32_t, const uint8_t *, uint32_t); static int iwm_load_firmware(struct iwm_softc *, enum iwm_ucode_type); static int iwm_start_fw(struct iwm_softc *, enum iwm_ucode_type); static int iwm_fw_alive(struct iwm_softc *, uint32_t); static int iwm_send_tx_ant_cfg(struct iwm_softc *, uint8_t); static int iwm_send_phy_cfg_cmd(struct iwm_softc *); static int iwm_mvm_load_ucode_wait_alive(struct iwm_softc *, enum iwm_ucode_type); static int iwm_run_init_mvm_ucode(struct iwm_softc *, int); static int iwm_rx_addbuf(struct iwm_softc *, int, int); static int iwm_mvm_calc_rssi(struct iwm_softc *, struct iwm_rx_phy_info *); static int iwm_mvm_get_signal_strength(struct iwm_softc *, struct iwm_rx_phy_info *); static void iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static int iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *); static void iwm_mvm_rx_rx_mpdu(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static int iwm_mvm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_node *); static void iwm_mvm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static void iwm_cmd_done(struct iwm_softc *, struct iwm_rx_packet *); #if 0 static void iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t); #endif static const struct iwm_rate * iwm_tx_fill_cmd(struct iwm_softc *, struct iwm_node *, struct ieee80211_frame *, struct iwm_tx_cmd *); static int iwm_tx(struct iwm_softc *, struct mbuf *, struct ieee80211_node *, int); static int iwm_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void iwm_mvm_add_sta_cmd_v6_to_v5(struct iwm_mvm_add_sta_cmd_v6 *, struct iwm_mvm_add_sta_cmd_v5 *); static int iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *, struct iwm_mvm_add_sta_cmd_v6 *, int *); static int iwm_mvm_sta_send_to_fw(struct iwm_softc *, struct iwm_node *, int); static int iwm_mvm_add_sta(struct iwm_softc *, struct iwm_node *); static int iwm_mvm_update_sta(struct iwm_softc *, struct iwm_node *); static int iwm_mvm_add_int_sta_common(struct iwm_softc *, struct iwm_int_sta *, const uint8_t *, uint16_t, uint16_t); static int iwm_mvm_add_aux_sta(struct iwm_softc *); static int iwm_mvm_update_quotas(struct iwm_softc *, struct iwm_node *); static int iwm_auth(struct ieee80211vap *, struct iwm_softc *); static int iwm_assoc(struct ieee80211vap *, struct iwm_softc *); static int iwm_release(struct iwm_softc *, struct iwm_node *); static struct ieee80211_node * iwm_node_alloc(struct ieee80211vap *, const uint8_t[IEEE80211_ADDR_LEN]); static void iwm_setrates(struct iwm_softc *, struct iwm_node *); static int iwm_media_change(struct ifnet *); static int iwm_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void iwm_endscan_cb(void *, int); static int iwm_init_hw(struct iwm_softc *); static void iwm_init(struct iwm_softc *); static void iwm_start(struct iwm_softc *); static void iwm_stop(struct iwm_softc *); static void iwm_watchdog(void *); static void iwm_parent(struct ieee80211com *); #ifdef IWM_DEBUG static const char * iwm_desc_lookup(uint32_t); static void iwm_nic_error(struct iwm_softc *); #endif static void iwm_notif_intr(struct iwm_softc *); static void iwm_intr(void *); static int iwm_attach(device_t); static void iwm_preinit(void *); static int iwm_detach_local(struct iwm_softc *sc, int); static void iwm_init_task(void *); static void iwm_radiotap_attach(struct iwm_softc *); static struct ieee80211vap * iwm_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void iwm_vap_delete(struct ieee80211vap *); static void iwm_scan_start(struct ieee80211com *); static void iwm_scan_end(struct ieee80211com *); static void iwm_update_mcast(struct ieee80211com *); static void iwm_set_channel(struct ieee80211com *); static void iwm_scan_curchan(struct ieee80211_scan_state *, unsigned long); static void iwm_scan_mindwell(struct ieee80211_scan_state *); static int iwm_detach(device_t); /* * Firmware parser. */ static int iwm_store_cscheme(struct iwm_softc *sc, const uint8_t *data, size_t dlen) { const struct iwm_fw_cscheme_list *l = (const void *)data; if (dlen < sizeof(*l) || dlen < sizeof(l->size) + l->size * sizeof(*l->cs)) return EINVAL; /* we don't actually store anything for now, always use s/w crypto */ return 0; } static int iwm_firmware_store_section(struct iwm_softc *sc, enum iwm_ucode_type type, const uint8_t *data, size_t dlen) { struct iwm_fw_sects *fws; struct iwm_fw_onesect *fwone; if (type >= IWM_UCODE_TYPE_MAX) return EINVAL; if (dlen < sizeof(uint32_t)) return EINVAL; fws = &sc->sc_fw.fw_sects[type]; if (fws->fw_count >= IWM_UCODE_SECT_MAX) return EINVAL; fwone = &fws->fw_sect[fws->fw_count]; /* first 32bit are device load offset */ memcpy(&fwone->fws_devoff, data, sizeof(uint32_t)); /* rest is data */ fwone->fws_data = data + sizeof(uint32_t); fwone->fws_len = dlen - sizeof(uint32_t); fws->fw_count++; fws->fw_totlen += fwone->fws_len; return 0; } /* iwlwifi: iwl-drv.c */ struct iwm_tlv_calib_data { uint32_t ucode_type; struct iwm_tlv_calib_ctrl calib; } __packed; static int iwm_set_default_calib(struct iwm_softc *sc, const void *data) { const struct iwm_tlv_calib_data *def_calib = data; uint32_t ucode_type = le32toh(def_calib->ucode_type); if (ucode_type >= IWM_UCODE_TYPE_MAX) { device_printf(sc->sc_dev, "Wrong ucode_type %u for default " "calibration.\n", ucode_type); return EINVAL; } sc->sc_default_calib[ucode_type].flow_trigger = def_calib->calib.flow_trigger; sc->sc_default_calib[ucode_type].event_trigger = def_calib->calib.event_trigger; return 0; } static void iwm_fw_info_free(struct iwm_fw_info *fw) { firmware_put(fw->fw_fp, FIRMWARE_UNLOAD); fw->fw_fp = NULL; /* don't touch fw->fw_status */ memset(fw->fw_sects, 0, sizeof(fw->fw_sects)); } static int iwm_read_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { struct iwm_fw_info *fw = &sc->sc_fw; const struct iwm_tlv_ucode_header *uhdr; struct iwm_ucode_tlv tlv; enum iwm_ucode_tlv_type tlv_type; const struct firmware *fwp; const uint8_t *data; int error = 0; size_t len; if (fw->fw_status == IWM_FW_STATUS_DONE && ucode_type != IWM_UCODE_TYPE_INIT) return 0; while (fw->fw_status == IWM_FW_STATUS_INPROGRESS) msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfwp", 0); fw->fw_status = IWM_FW_STATUS_INPROGRESS; if (fw->fw_fp != NULL) iwm_fw_info_free(fw); /* * Load firmware into driver memory. * fw_fp will be set. */ IWM_UNLOCK(sc); fwp = firmware_get(sc->sc_fwname); IWM_LOCK(sc); if (fwp == NULL) { device_printf(sc->sc_dev, "could not read firmware %s (error %d)\n", sc->sc_fwname, error); goto out; } fw->fw_fp = fwp; /* * Parse firmware contents */ uhdr = (const void *)fw->fw_fp->data; if (*(const uint32_t *)fw->fw_fp->data != 0 || le32toh(uhdr->magic) != IWM_TLV_UCODE_MAGIC) { device_printf(sc->sc_dev, "invalid firmware %s\n", sc->sc_fwname); error = EINVAL; goto out; } sc->sc_fwver = le32toh(uhdr->ver); data = uhdr->data; len = fw->fw_fp->datasize - sizeof(*uhdr); while (len >= sizeof(tlv)) { size_t tlv_len; const void *tlv_data; memcpy(&tlv, data, sizeof(tlv)); tlv_len = le32toh(tlv.length); tlv_type = le32toh(tlv.type); len -= sizeof(tlv); data += sizeof(tlv); tlv_data = data; if (len < tlv_len) { device_printf(sc->sc_dev, "firmware too short: %zu bytes\n", len); error = EINVAL; goto parse_out; } switch ((int)tlv_type) { case IWM_UCODE_TLV_PROBE_MAX_LEN: if (tlv_len < sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: PROBE_MAX_LEN (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } sc->sc_capa_max_probe_len = le32toh(*(const uint32_t *)tlv_data); /* limit it to something sensible */ if (sc->sc_capa_max_probe_len > (1<<16)) { IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV, "%s: IWM_UCODE_TLV_PROBE_MAX_LEN " "ridiculous\n", __func__); error = EINVAL; goto parse_out; } break; case IWM_UCODE_TLV_PAN: if (tlv_len) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_PAN: tlv_len (%d) > 0\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } sc->sc_capaflags |= IWM_UCODE_TLV_FLAGS_PAN; break; case IWM_UCODE_TLV_FLAGS: if (tlv_len < sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_FLAGS: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } /* * Apparently there can be many flags, but Linux driver * parses only the first one, and so do we. * * XXX: why does this override IWM_UCODE_TLV_PAN? * Intentional or a bug? Observations from * current firmware file: * 1) TLV_PAN is parsed first * 2) TLV_FLAGS contains TLV_FLAGS_PAN * ==> this resets TLV_PAN to itself... hnnnk */ sc->sc_capaflags = le32toh(*(const uint32_t *)tlv_data); break; case IWM_UCODE_TLV_CSCHEME: if ((error = iwm_store_cscheme(sc, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: iwm_store_cscheme(): returned %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_NUM_OF_CPU: if (tlv_len != sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_NUM_OF_CPU: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } if (le32toh(*(const uint32_t*)tlv_data) != 1) { device_printf(sc->sc_dev, "%s: driver supports " "only TLV_NUM_OF_CPU == 1", __func__); error = EINVAL; goto parse_out; } break; case IWM_UCODE_TLV_SEC_RT: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_REGULAR, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_REGULAR: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_SEC_INIT: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_INIT, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_INIT: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_SEC_WOWLAN: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_WOW, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_WOW: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_DEF_CALIB: if (tlv_len != sizeof(struct iwm_tlv_calib_data)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_DEV_CALIB: tlv_len (%d) < sizeof(iwm_tlv_calib_data) (%d)\n", __func__, (int) tlv_len, (int) sizeof(struct iwm_tlv_calib_data)); error = EINVAL; goto parse_out; } if ((error = iwm_set_default_calib(sc, tlv_data)) != 0) { device_printf(sc->sc_dev, "%s: iwm_set_default_calib() failed: %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_PHY_SKU: if (tlv_len != sizeof(uint32_t)) { error = EINVAL; device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_PHY_SKU: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); goto parse_out; } sc->sc_fw_phy_config = le32toh(*(const uint32_t *)tlv_data); break; case IWM_UCODE_TLV_API_CHANGES_SET: case IWM_UCODE_TLV_ENABLED_CAPABILITIES: /* ignore, not used by current driver */ break; default: device_printf(sc->sc_dev, "%s: unknown firmware section %d, abort\n", __func__, tlv_type); error = EINVAL; goto parse_out; } len -= roundup(tlv_len, 4); data += roundup(tlv_len, 4); } KASSERT(error == 0, ("unhandled error")); parse_out: if (error) { device_printf(sc->sc_dev, "firmware parse error %d, " "section type %d\n", error, tlv_type); } if (!(sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_PM_CMD_SUPPORT)) { device_printf(sc->sc_dev, "device uses unsupported power ops\n"); error = ENOTSUP; } out: if (error) { fw->fw_status = IWM_FW_STATUS_NONE; if (fw->fw_fp != NULL) iwm_fw_info_free(fw); } else fw->fw_status = IWM_FW_STATUS_DONE; wakeup(&sc->sc_fw); return error; } /* * DMA resource routines */ static void iwm_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs)); *(bus_addr_t *)arg = segs[0].ds_addr; } static int iwm_dma_contig_alloc(bus_dma_tag_t tag, struct iwm_dma_info *dma, bus_size_t size, bus_size_t alignment) { int error; dma->tag = NULL; dma->size = size; error = bus_dma_tag_create(tag, alignment, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &dma->tag); if (error != 0) goto fail; error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &dma->map); if (error != 0) goto fail; error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, iwm_dma_map_addr, &dma->paddr, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); return 0; fail: iwm_dma_contig_free(dma); return error; } static void iwm_dma_contig_free(struct iwm_dma_info *dma) { if (dma->map != NULL) { if (dma->vaddr != NULL) { bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); dma->vaddr = NULL; } bus_dmamap_destroy(dma->tag, dma->map); dma->map = NULL; } if (dma->tag != NULL) { bus_dma_tag_destroy(dma->tag); dma->tag = NULL; } } /* fwmem is used to load firmware onto the card */ static int iwm_alloc_fwmem(struct iwm_softc *sc) { /* Must be aligned on a 16-byte boundary. */ return iwm_dma_contig_alloc(sc->sc_dmat, &sc->fw_dma, sc->sc_fwdmasegsz, 16); } static void iwm_free_fwmem(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->fw_dma); } /* tx scheduler rings. not used? */ static int iwm_alloc_sched(struct iwm_softc *sc) { int rv; /* TX scheduler rings must be aligned on a 1KB boundary. */ rv = iwm_dma_contig_alloc(sc->sc_dmat, &sc->sched_dma, nitems(sc->txq) * sizeof(struct iwm_agn_scd_bc_tbl), 1024); return rv; } static void iwm_free_sched(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->sched_dma); } /* keep-warm page is used internally by the card. see iwl-fh.h for more info */ static int iwm_alloc_kw(struct iwm_softc *sc) { return iwm_dma_contig_alloc(sc->sc_dmat, &sc->kw_dma, 4096, 4096); } static void iwm_free_kw(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->kw_dma); } /* interrupt cause table */ static int iwm_alloc_ict(struct iwm_softc *sc) { return iwm_dma_contig_alloc(sc->sc_dmat, &sc->ict_dma, IWM_ICT_SIZE, 1<ict_dma); } static int iwm_alloc_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { bus_size_t size; int i, error; ring->cur = 0; /* Allocate RX descriptors (256-byte aligned). */ size = IWM_RX_RING_COUNT * sizeof(uint32_t); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256); if (error != 0) { device_printf(sc->sc_dev, "could not allocate RX ring DMA memory\n"); goto fail; } ring->desc = ring->desc_dma.vaddr; /* Allocate RX status area (16-byte aligned). */ error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->stat_dma, sizeof(*ring->stat), 16); if (error != 0) { device_printf(sc->sc_dev, "could not allocate RX status DMA memory\n"); goto fail; } ring->stat = ring->stat_dma.vaddr; /* Create RX buffer DMA tag. */ error = bus_dma_tag_create(sc->sc_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, IWM_RBUF_SIZE, 1, IWM_RBUF_SIZE, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA tag, error %d\n", __func__, error); goto fail; } /* * Allocate and map RX buffers. */ for (i = 0; i < IWM_RX_RING_COUNT; i++) { if ((error = iwm_rx_addbuf(sc, IWM_RBUF_SIZE, i)) != 0) { goto fail; } } return 0; fail: iwm_free_rx_ring(sc, ring); return error; } static void iwm_reset_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { /* XXX print out if we can't lock the NIC? */ if (iwm_nic_lock(sc)) { /* XXX handle if RX stop doesn't finish? */ (void) iwm_pcie_rx_stop(sc); iwm_nic_unlock(sc); } /* Reset the ring state */ ring->cur = 0; memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat)); } static void iwm_free_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { int i; iwm_dma_contig_free(&ring->desc_dma); iwm_dma_contig_free(&ring->stat_dma); for (i = 0; i < IWM_RX_RING_COUNT; i++) { struct iwm_rx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->map != NULL) { bus_dmamap_destroy(ring->data_dmat, data->map); data->map = NULL; } } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } static int iwm_alloc_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring, int qid) { bus_addr_t paddr; bus_size_t size; int i, error; ring->qid = qid; ring->queued = 0; ring->cur = 0; /* Allocate TX descriptors (256-byte aligned). */ size = IWM_TX_RING_COUNT * sizeof (struct iwm_tfd); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256); if (error != 0) { device_printf(sc->sc_dev, "could not allocate TX ring DMA memory\n"); goto fail; } ring->desc = ring->desc_dma.vaddr; /* * We only use rings 0 through 9 (4 EDCA + cmd) so there is no need * to allocate commands space for other rings. */ if (qid > IWM_MVM_CMD_QUEUE) return 0; size = IWM_TX_RING_COUNT * sizeof(struct iwm_device_cmd); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->cmd_dma, size, 4); if (error != 0) { device_printf(sc->sc_dev, "could not allocate TX cmd DMA memory\n"); goto fail; } ring->cmd = ring->cmd_dma.vaddr; error = bus_dma_tag_create(sc->sc_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, IWM_MAX_SCATTER - 2, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create TX buf DMA tag\n"); goto fail; } paddr = ring->cmd_dma.paddr; for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; data->cmd_paddr = paddr; data->scratch_paddr = paddr + sizeof(struct iwm_cmd_header) + offsetof(struct iwm_tx_cmd, scratch); paddr += sizeof(struct iwm_device_cmd); error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "could not create TX buf DMA map\n"); goto fail; } } KASSERT(paddr == ring->cmd_dma.paddr + size, ("invalid physical address")); return 0; fail: iwm_free_tx_ring(sc, ring); return error; } static void iwm_reset_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring) { int i; for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } } /* Clear TX descriptors. */ memset(ring->desc, 0, ring->desc_dma.size); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); sc->qfullmsk &= ~(1 << ring->qid); ring->queued = 0; ring->cur = 0; } static void iwm_free_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring) { int i; iwm_dma_contig_free(&ring->desc_dma); iwm_dma_contig_free(&ring->cmd_dma); for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->map != NULL) { bus_dmamap_destroy(ring->data_dmat, data->map); data->map = NULL; } } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } /* * High-level hardware frobbing routines */ static void iwm_enable_interrupts(struct iwm_softc *sc) { sc->sc_intmask = IWM_CSR_INI_SET_MASK; IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask); } static void iwm_restore_interrupts(struct iwm_softc *sc) { IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask); } static void iwm_disable_interrupts(struct iwm_softc *sc) { /* disable interrupts */ IWM_WRITE(sc, IWM_CSR_INT_MASK, 0); /* acknowledge all interrupts */ IWM_WRITE(sc, IWM_CSR_INT, ~0); IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, ~0); } static void iwm_ict_reset(struct iwm_softc *sc) { iwm_disable_interrupts(sc); /* Reset ICT table. */ memset(sc->ict_dma.vaddr, 0, IWM_ICT_SIZE); sc->ict_cur = 0; /* Set physical address of ICT table (4KB aligned). */ IWM_WRITE(sc, IWM_CSR_DRAM_INT_TBL_REG, IWM_CSR_DRAM_INT_TBL_ENABLE | IWM_CSR_DRAM_INIT_TBL_WRAP_CHECK | sc->ict_dma.paddr >> IWM_ICT_PADDR_SHIFT); /* Switch to ICT interrupt mode in driver. */ sc->sc_flags |= IWM_FLAG_USE_ICT; /* Re-enable interrupts. */ IWM_WRITE(sc, IWM_CSR_INT, ~0); iwm_enable_interrupts(sc); } /* iwlwifi pcie/trans.c */ /* * Since this .. hard-resets things, it's time to actually * mark the first vap (if any) as having no mac context. * It's annoying, but since the driver is potentially being * stop/start'ed whilst active (thanks openbsd port!) we * have to correctly track this. */ static void iwm_stop_device(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); int chnl, ntries; int qid; /* tell the device to stop sending interrupts */ iwm_disable_interrupts(sc); /* * FreeBSD-local: mark the first vap as not-uploaded, * so the next transition through auth/assoc * will correctly populate the MAC context. */ if (vap) { struct iwm_vap *iv = IWM_VAP(vap); iv->is_uploaded = 0; } /* device going down, Stop using ICT table */ sc->sc_flags &= ~IWM_FLAG_USE_ICT; /* stop tx and rx. tx and rx bits, as usual, are from if_iwn */ iwm_write_prph(sc, IWM_SCD_TXFACT, 0); /* Stop all DMA channels. */ if (iwm_nic_lock(sc)) { for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) { IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl), 0); for (ntries = 0; ntries < 200; ntries++) { uint32_t r; r = IWM_READ(sc, IWM_FH_TSSR_TX_STATUS_REG); if (r & IWM_FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE( chnl)) break; DELAY(20); } } iwm_nic_unlock(sc); } /* Stop RX ring. */ iwm_reset_rx_ring(sc, &sc->rxq); /* Reset all TX rings. */ for (qid = 0; qid < nitems(sc->txq); qid++) iwm_reset_tx_ring(sc, &sc->txq[qid]); /* * Power-down device's busmaster DMA clocks */ iwm_write_prph(sc, IWM_APMG_CLK_DIS_REG, IWM_APMG_CLK_VAL_DMA_CLK_RQT); DELAY(5); /* Make sure (redundant) we've released our request to stay awake */ IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); /* Stop the device, and put it in low power state */ iwm_apm_stop(sc); /* Upon stop, the APM issues an interrupt if HW RF kill is set. * Clean again the interrupt here */ iwm_disable_interrupts(sc); /* stop and reset the on-board processor */ IWM_WRITE(sc, IWM_CSR_RESET, IWM_CSR_RESET_REG_FLAG_NEVO_RESET); /* * Even if we stop the HW, we still want the RF kill * interrupt */ iwm_enable_rfkill_int(sc); iwm_check_rfkill(sc); } /* iwlwifi: mvm/ops.c */ static void iwm_mvm_nic_config(struct iwm_softc *sc) { uint8_t radio_cfg_type, radio_cfg_step, radio_cfg_dash; uint32_t reg_val = 0; radio_cfg_type = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_TYPE) >> IWM_FW_PHY_CFG_RADIO_TYPE_POS; radio_cfg_step = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_STEP) >> IWM_FW_PHY_CFG_RADIO_STEP_POS; radio_cfg_dash = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_DASH) >> IWM_FW_PHY_CFG_RADIO_DASH_POS; /* SKU control */ reg_val |= IWM_CSR_HW_REV_STEP(sc->sc_hw_rev) << IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_STEP; reg_val |= IWM_CSR_HW_REV_DASH(sc->sc_hw_rev) << IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_DASH; /* radio configuration */ reg_val |= radio_cfg_type << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_TYPE; reg_val |= radio_cfg_step << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_STEP; reg_val |= radio_cfg_dash << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_DASH; IWM_WRITE(sc, IWM_CSR_HW_IF_CONFIG_REG, reg_val); IWM_DPRINTF(sc, IWM_DEBUG_RESET, "Radio type=0x%x-0x%x-0x%x\n", radio_cfg_type, radio_cfg_step, radio_cfg_dash); /* * W/A : NIC is stuck in a reset state after Early PCIe power off * (PCIe power is lost before PERST# is asserted), causing ME FW * to lose ownership and not being able to obtain it back. */ iwm_set_bits_mask_prph(sc, IWM_APMG_PS_CTRL_REG, IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS, ~IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS); } static int iwm_nic_rx_init(struct iwm_softc *sc) { if (!iwm_nic_lock(sc)) return EBUSY; /* * Initialize RX ring. This is from the iwn driver. */ memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat)); /* stop DMA */ IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG, 0); IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_RBDCB_WPTR, 0); IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_FLUSH_RB_REQ, 0); IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RDPTR, 0); IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RBDCB_WPTR_REG, 0); /* Set physical address of RX ring (256-byte aligned). */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RBDCB_BASE_REG, sc->rxq.desc_dma.paddr >> 8); /* Set physical address of RX status (16-byte aligned). */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_STTS_WPTR_REG, sc->rxq.stat_dma.paddr >> 4); /* Enable RX. */ /* * Note: Linux driver also sets this: * (IWM_RX_RB_TIMEOUT << IWM_FH_RCSR_RX_CONFIG_REG_IRQ_RBTH_POS) | * * It causes weird behavior. YMMV. */ IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG, IWM_FH_RCSR_RX_CONFIG_CHNL_EN_ENABLE_VAL | IWM_FH_RCSR_CHNL0_RX_IGNORE_RXF_EMPTY | /* HW bug */ IWM_FH_RCSR_CHNL0_RX_CONFIG_IRQ_DEST_INT_HOST_VAL | IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_4K | IWM_RX_QUEUE_SIZE_LOG << IWM_FH_RCSR_RX_CONFIG_RBDCB_SIZE_POS); IWM_WRITE_1(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_TIMEOUT_DEF); /* W/A for interrupt coalescing bug in 7260 and 3160 */ if (sc->host_interrupt_operation_mode) IWM_SETBITS(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_OPER_MODE); /* * Thus sayeth el jefe (iwlwifi) via a comment: * * This value should initially be 0 (before preparing any * RBs), should be 8 after preparing the first 8 RBs (for example) */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, 8); iwm_nic_unlock(sc); return 0; } static int iwm_nic_tx_init(struct iwm_softc *sc) { int qid; if (!iwm_nic_lock(sc)) return EBUSY; /* Deactivate TX scheduler. */ iwm_write_prph(sc, IWM_SCD_TXFACT, 0); /* Set physical address of "keep warm" page (16-byte aligned). */ IWM_WRITE(sc, IWM_FH_KW_MEM_ADDR_REG, sc->kw_dma.paddr >> 4); /* Initialize TX rings. */ for (qid = 0; qid < nitems(sc->txq); qid++) { struct iwm_tx_ring *txq = &sc->txq[qid]; /* Set physical address of TX ring (256-byte aligned). */ IWM_WRITE(sc, IWM_FH_MEM_CBBC_QUEUE(qid), txq->desc_dma.paddr >> 8); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: loading ring %d descriptors (%p) at %lx\n", __func__, qid, txq->desc, (unsigned long) (txq->desc_dma.paddr >> 8)); } iwm_nic_unlock(sc); return 0; } static int iwm_nic_init(struct iwm_softc *sc) { int error; iwm_apm_init(sc); iwm_set_pwr(sc); iwm_mvm_nic_config(sc); if ((error = iwm_nic_rx_init(sc)) != 0) return error; /* * Ditto for TX, from iwn */ if ((error = iwm_nic_tx_init(sc)) != 0) return error; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: shadow registers enabled\n", __func__); IWM_SETBITS(sc, IWM_CSR_MAC_SHADOW_REG_CTRL, 0x800fffff); return 0; } enum iwm_mvm_tx_fifo { IWM_MVM_TX_FIFO_BK = 0, IWM_MVM_TX_FIFO_BE, IWM_MVM_TX_FIFO_VI, IWM_MVM_TX_FIFO_VO, IWM_MVM_TX_FIFO_MCAST = 5, }; const uint8_t iwm_mvm_ac_to_tx_fifo[] = { IWM_MVM_TX_FIFO_VO, IWM_MVM_TX_FIFO_VI, IWM_MVM_TX_FIFO_BE, IWM_MVM_TX_FIFO_BK, }; static void iwm_enable_txq(struct iwm_softc *sc, int qid, int fifo) { if (!iwm_nic_lock(sc)) { device_printf(sc->sc_dev, "%s: cannot enable txq %d\n", __func__, qid); return; /* XXX return EBUSY */ } /* unactivate before configuration */ iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid), (0 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE) | (1 << IWM_SCD_QUEUE_STTS_REG_POS_SCD_ACT_EN)); if (qid != IWM_MVM_CMD_QUEUE) { iwm_set_bits_prph(sc, IWM_SCD_QUEUECHAIN_SEL, (1 << qid)); } iwm_clear_bits_prph(sc, IWM_SCD_AGGR_SEL, (1 << qid)); IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | 0); iwm_write_prph(sc, IWM_SCD_QUEUE_RDPTR(qid), 0); iwm_write_mem32(sc, sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid), 0); /* Set scheduler window size and frame limit. */ iwm_write_mem32(sc, sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid) + sizeof(uint32_t), ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_POS) & IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_MSK) | ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_POS) & IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_MSK)); iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid), (1 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE) | (fifo << IWM_SCD_QUEUE_STTS_REG_POS_TXF) | (1 << IWM_SCD_QUEUE_STTS_REG_POS_WSL) | IWM_SCD_QUEUE_STTS_REG_MSK); iwm_nic_unlock(sc); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: enabled txq %d FIFO %d\n", __func__, qid, fifo); } static int iwm_post_alive(struct iwm_softc *sc) { int nwords; int error, chnl; if (!iwm_nic_lock(sc)) return EBUSY; if (sc->sched_base != iwm_read_prph(sc, IWM_SCD_SRAM_BASE_ADDR)) { device_printf(sc->sc_dev, "%s: sched addr mismatch", __func__); error = EINVAL; goto out; } iwm_ict_reset(sc); /* Clear TX scheduler state in SRAM. */ nwords = (IWM_SCD_TRANS_TBL_MEM_UPPER_BOUND - IWM_SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(uint32_t); error = iwm_write_mem(sc, sc->sched_base + IWM_SCD_CONTEXT_MEM_LOWER_BOUND, NULL, nwords); if (error) goto out; /* Set physical address of TX scheduler rings (1KB aligned). */ iwm_write_prph(sc, IWM_SCD_DRAM_BASE_ADDR, sc->sched_dma.paddr >> 10); iwm_write_prph(sc, IWM_SCD_CHAINEXT_EN, 0); /* enable command channel */ iwm_enable_txq(sc, IWM_MVM_CMD_QUEUE, 7); iwm_write_prph(sc, IWM_SCD_TXFACT, 0xff); /* Enable DMA channels. */ for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) { IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE); } IWM_SETBITS(sc, IWM_FH_TX_CHICKEN_BITS_REG, IWM_FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN); /* Enable L1-Active */ iwm_clear_bits_prph(sc, IWM_APMG_PCIDEV_STT_REG, IWM_APMG_PCIDEV_STT_VAL_L1_ACT_DIS); out: iwm_nic_unlock(sc); return error; } /* * NVM read access and content parsing. We do not support * external NVM or writing NVM. * iwlwifi/mvm/nvm.c */ /* list of NVM sections we are allowed/need to read */ const int nvm_to_read[] = { IWM_NVM_SECTION_TYPE_HW, IWM_NVM_SECTION_TYPE_SW, IWM_NVM_SECTION_TYPE_CALIBRATION, IWM_NVM_SECTION_TYPE_PRODUCTION, }; /* Default NVM size to read */ #define IWM_NVM_DEFAULT_CHUNK_SIZE (2*1024) #define IWM_MAX_NVM_SECTION_SIZE 7000 #define IWM_NVM_WRITE_OPCODE 1 #define IWM_NVM_READ_OPCODE 0 static int iwm_nvm_read_chunk(struct iwm_softc *sc, uint16_t section, uint16_t offset, uint16_t length, uint8_t *data, uint16_t *len) { offset = 0; struct iwm_nvm_access_cmd nvm_access_cmd = { .offset = htole16(offset), .length = htole16(length), .type = htole16(section), .op_code = IWM_NVM_READ_OPCODE, }; struct iwm_nvm_access_resp *nvm_resp; struct iwm_rx_packet *pkt; struct iwm_host_cmd cmd = { .id = IWM_NVM_ACCESS_CMD, .flags = IWM_CMD_SYNC | IWM_CMD_WANT_SKB | IWM_CMD_SEND_IN_RFKILL, .data = { &nvm_access_cmd, }, }; int ret, bytes_read, offset_read; uint8_t *resp_data; cmd.len[0] = sizeof(struct iwm_nvm_access_cmd); ret = iwm_send_cmd(sc, &cmd); if (ret) return ret; pkt = cmd.resp_pkt; if (pkt->hdr.flags & IWM_CMD_FAILED_MSK) { device_printf(sc->sc_dev, "%s: Bad return from IWM_NVM_ACCES_COMMAND (0x%08X)\n", __func__, pkt->hdr.flags); ret = EIO; goto exit; } /* Extract NVM response */ nvm_resp = (void *)pkt->data; ret = le16toh(nvm_resp->status); bytes_read = le16toh(nvm_resp->length); offset_read = le16toh(nvm_resp->offset); resp_data = nvm_resp->data; if (ret) { device_printf(sc->sc_dev, "%s: NVM access command failed with status %d\n", __func__, ret); ret = EINVAL; goto exit; } if (offset_read != offset) { device_printf(sc->sc_dev, "%s: NVM ACCESS response with invalid offset %d\n", __func__, offset_read); ret = EINVAL; goto exit; } memcpy(data + offset, resp_data, bytes_read); *len = bytes_read; exit: iwm_free_resp(sc, &cmd); return ret; } /* * Reads an NVM section completely. * NICs prior to 7000 family doesn't have a real NVM, but just read * section 0 which is the EEPROM. Because the EEPROM reading is unlimited * by uCode, we need to manually check in this case that we don't * overflow and try to read more than the EEPROM size. * For 7000 family NICs, we supply the maximal size we can read, and * the uCode fills the response with as much data as we can, * without overflowing, so no check is needed. */ static int iwm_nvm_read_section(struct iwm_softc *sc, uint16_t section, uint8_t *data, uint16_t *len) { uint16_t length, seglen; int error; /* Set nvm section read length */ length = seglen = IWM_NVM_DEFAULT_CHUNK_SIZE; *len = 0; /* Read the NVM until exhausted (reading less than requested) */ while (seglen == length) { error = iwm_nvm_read_chunk(sc, section, *len, length, data, &seglen); if (error) { device_printf(sc->sc_dev, "Cannot read NVM from section " "%d offset %d, length %d\n", section, *len, length); return error; } *len += seglen; } IWM_DPRINTF(sc, IWM_DEBUG_RESET, "NVM section %d read completed\n", section); return 0; } /* * BEGIN IWM_NVM_PARSE */ /* iwlwifi/iwl-nvm-parse.c */ /* NVM offsets (in words) definitions */ enum wkp_nvm_offsets { /* NVM HW-Section offset (in words) definitions */ IWM_HW_ADDR = 0x15, /* NVM SW-Section offset (in words) definitions */ IWM_NVM_SW_SECTION = 0x1C0, IWM_NVM_VERSION = 0, IWM_RADIO_CFG = 1, IWM_SKU = 2, IWM_N_HW_ADDRS = 3, IWM_NVM_CHANNELS = 0x1E0 - IWM_NVM_SW_SECTION, /* NVM calibration section offset (in words) definitions */ IWM_NVM_CALIB_SECTION = 0x2B8, IWM_XTAL_CALIB = 0x316 - IWM_NVM_CALIB_SECTION }; /* SKU Capabilities (actual values from NVM definition) */ enum nvm_sku_bits { IWM_NVM_SKU_CAP_BAND_24GHZ = (1 << 0), IWM_NVM_SKU_CAP_BAND_52GHZ = (1 << 1), IWM_NVM_SKU_CAP_11N_ENABLE = (1 << 2), IWM_NVM_SKU_CAP_11AC_ENABLE = (1 << 3), }; /* radio config bits (actual values from NVM definition) */ #define IWM_NVM_RF_CFG_DASH_MSK(x) (x & 0x3) /* bits 0-1 */ #define IWM_NVM_RF_CFG_STEP_MSK(x) ((x >> 2) & 0x3) /* bits 2-3 */ #define IWM_NVM_RF_CFG_TYPE_MSK(x) ((x >> 4) & 0x3) /* bits 4-5 */ #define IWM_NVM_RF_CFG_PNUM_MSK(x) ((x >> 6) & 0x3) /* bits 6-7 */ #define IWM_NVM_RF_CFG_TX_ANT_MSK(x) ((x >> 8) & 0xF) /* bits 8-11 */ #define IWM_NVM_RF_CFG_RX_ANT_MSK(x) ((x >> 12) & 0xF) /* bits 12-15 */ #define DEFAULT_MAX_TX_POWER 16 /** * enum iwm_nvm_channel_flags - channel flags in NVM * @IWM_NVM_CHANNEL_VALID: channel is usable for this SKU/geo * @IWM_NVM_CHANNEL_IBSS: usable as an IBSS channel * @IWM_NVM_CHANNEL_ACTIVE: active scanning allowed * @IWM_NVM_CHANNEL_RADAR: radar detection required * @IWM_NVM_CHANNEL_DFS: dynamic freq selection candidate * @IWM_NVM_CHANNEL_WIDE: 20 MHz channel okay (?) * @IWM_NVM_CHANNEL_40MHZ: 40 MHz channel okay (?) * @IWM_NVM_CHANNEL_80MHZ: 80 MHz channel okay (?) * @IWM_NVM_CHANNEL_160MHZ: 160 MHz channel okay (?) */ enum iwm_nvm_channel_flags { IWM_NVM_CHANNEL_VALID = (1 << 0), IWM_NVM_CHANNEL_IBSS = (1 << 1), IWM_NVM_CHANNEL_ACTIVE = (1 << 3), IWM_NVM_CHANNEL_RADAR = (1 << 4), IWM_NVM_CHANNEL_DFS = (1 << 7), IWM_NVM_CHANNEL_WIDE = (1 << 8), IWM_NVM_CHANNEL_40MHZ = (1 << 9), IWM_NVM_CHANNEL_80MHZ = (1 << 10), IWM_NVM_CHANNEL_160MHZ = (1 << 11), }; /* * Add a channel to the net80211 channel list. * * ieee is the ieee channel number * ch_idx is channel index. * mode is the channel mode - CHAN_A, CHAN_B, CHAN_G. * ch_flags is the iwm channel flags. * * Return 0 on OK, < 0 on error. */ static int iwm_init_net80211_channel(struct iwm_softc *sc, int ieee, int ch_idx, int mode, uint16_t ch_flags) { /* XXX for now, no overflow checking! */ struct ieee80211com *ic = &sc->sc_ic; int is_5ghz, flags; struct ieee80211_channel *channel; channel = &ic->ic_channels[ic->ic_nchans++]; channel->ic_ieee = ieee; is_5ghz = ch_idx >= IWM_NUM_2GHZ_CHANNELS; if (!is_5ghz) { flags = IEEE80211_CHAN_2GHZ; channel->ic_flags = mode; } else { flags = IEEE80211_CHAN_5GHZ; channel->ic_flags = mode; } channel->ic_freq = ieee80211_ieee2mhz(ieee, flags); if (!(ch_flags & IWM_NVM_CHANNEL_ACTIVE)) channel->ic_flags |= IEEE80211_CHAN_PASSIVE; return (0); } static void iwm_init_channel_map(struct iwm_softc *sc, const uint16_t * const nvm_ch_flags) { struct ieee80211com *ic = &sc->sc_ic; struct iwm_nvm_data *data = &sc->sc_nvm; int ch_idx; uint16_t ch_flags; int hw_value; for (ch_idx = 0; ch_idx < nitems(iwm_nvm_channels); ch_idx++) { ch_flags = le16_to_cpup(nvm_ch_flags + ch_idx); if (ch_idx >= IWM_NUM_2GHZ_CHANNELS && !data->sku_cap_band_52GHz_enable) ch_flags &= ~IWM_NVM_CHANNEL_VALID; if (!(ch_flags & IWM_NVM_CHANNEL_VALID)) { IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "Ch. %d Flags %x [%sGHz] - No traffic\n", iwm_nvm_channels[ch_idx], ch_flags, (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ? "5.2" : "2.4"); continue; } hw_value = iwm_nvm_channels[ch_idx]; /* 5GHz? */ if (ch_idx >= IWM_NUM_2GHZ_CHANNELS) { (void) iwm_init_net80211_channel(sc, hw_value, ch_idx, IEEE80211_CHAN_A, ch_flags); } else { (void) iwm_init_net80211_channel(sc, hw_value, ch_idx, IEEE80211_CHAN_B, ch_flags); /* If it's not channel 13, also add 11g */ if (hw_value != 13) (void) iwm_init_net80211_channel(sc, hw_value, ch_idx, IEEE80211_CHAN_G, ch_flags); } IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "Ch. %d Flags %x [%sGHz] - Added\n", iwm_nvm_channels[ch_idx], ch_flags, (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ? "5.2" : "2.4"); } ieee80211_sort_channels(ic->ic_channels, ic->ic_nchans); } static int iwm_parse_nvm_data(struct iwm_softc *sc, const uint16_t *nvm_hw, const uint16_t *nvm_sw, const uint16_t *nvm_calib, uint8_t tx_chains, uint8_t rx_chains) { struct iwm_nvm_data *data = &sc->sc_nvm; uint8_t hw_addr[IEEE80211_ADDR_LEN]; uint16_t radio_cfg, sku; data->nvm_version = le16_to_cpup(nvm_sw + IWM_NVM_VERSION); radio_cfg = le16_to_cpup(nvm_sw + IWM_RADIO_CFG); data->radio_cfg_type = IWM_NVM_RF_CFG_TYPE_MSK(radio_cfg); data->radio_cfg_step = IWM_NVM_RF_CFG_STEP_MSK(radio_cfg); data->radio_cfg_dash = IWM_NVM_RF_CFG_DASH_MSK(radio_cfg); data->radio_cfg_pnum = IWM_NVM_RF_CFG_PNUM_MSK(radio_cfg); data->valid_tx_ant = IWM_NVM_RF_CFG_TX_ANT_MSK(radio_cfg); data->valid_rx_ant = IWM_NVM_RF_CFG_RX_ANT_MSK(radio_cfg); sku = le16_to_cpup(nvm_sw + IWM_SKU); data->sku_cap_band_24GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_24GHZ; data->sku_cap_band_52GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_52GHZ; data->sku_cap_11n_enable = 0; if (!data->valid_tx_ant || !data->valid_rx_ant) { device_printf(sc->sc_dev, "%s: invalid antennas (0x%x, 0x%x)\n", __func__, data->valid_tx_ant, data->valid_rx_ant); return EINVAL; } data->n_hw_addrs = le16_to_cpup(nvm_sw + IWM_N_HW_ADDRS); data->xtal_calib[0] = *(nvm_calib + IWM_XTAL_CALIB); data->xtal_calib[1] = *(nvm_calib + IWM_XTAL_CALIB + 1); /* The byte order is little endian 16 bit, meaning 214365 */ IEEE80211_ADDR_COPY(hw_addr, nvm_hw + IWM_HW_ADDR); data->hw_addr[0] = hw_addr[1]; data->hw_addr[1] = hw_addr[0]; data->hw_addr[2] = hw_addr[3]; data->hw_addr[3] = hw_addr[2]; data->hw_addr[4] = hw_addr[5]; data->hw_addr[5] = hw_addr[4]; iwm_init_channel_map(sc, &nvm_sw[IWM_NVM_CHANNELS]); data->calib_version = 255; /* TODO: this value will prevent some checks from failing, we need to check if this field is still needed, and if it does, where is it in the NVM */ return 0; } /* * END NVM PARSE */ struct iwm_nvm_section { uint16_t length; const uint8_t *data; }; static int iwm_parse_nvm_sections(struct iwm_softc *sc, struct iwm_nvm_section *sections) { const uint16_t *hw, *sw, *calib; /* Checking for required sections */ if (!sections[IWM_NVM_SECTION_TYPE_SW].data || !sections[IWM_NVM_SECTION_TYPE_HW].data) { device_printf(sc->sc_dev, "%s: Can't parse empty NVM sections\n", __func__); return ENOENT; } hw = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_HW].data; sw = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_SW].data; calib = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_CALIBRATION].data; return iwm_parse_nvm_data(sc, hw, sw, calib, IWM_FW_VALID_TX_ANT(sc), IWM_FW_VALID_RX_ANT(sc)); } static int iwm_nvm_init(struct iwm_softc *sc) { struct iwm_nvm_section nvm_sections[IWM_NVM_NUM_OF_SECTIONS]; int i, section, error; uint16_t len; uint8_t *nvm_buffer, *temp; /* Read From FW NVM */ IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "%s: Read NVM\n", __func__); /* TODO: find correct NVM max size for a section */ nvm_buffer = malloc(IWM_OTP_LOW_IMAGE_SIZE, M_DEVBUF, M_NOWAIT); if (nvm_buffer == NULL) return (ENOMEM); for (i = 0; i < nitems(nvm_to_read); i++) { section = nvm_to_read[i]; KASSERT(section <= nitems(nvm_sections), ("too many sections")); error = iwm_nvm_read_section(sc, section, nvm_buffer, &len); if (error) break; temp = malloc(len, M_DEVBUF, M_NOWAIT); if (temp == NULL) { error = ENOMEM; break; } memcpy(temp, nvm_buffer, len); nvm_sections[section].data = temp; nvm_sections[section].length = len; } free(nvm_buffer, M_DEVBUF); if (error) return error; return iwm_parse_nvm_sections(sc, nvm_sections); } /* * Firmware loading gunk. This is kind of a weird hybrid between the * iwn driver and the Linux iwlwifi driver. */ static int iwm_firmware_load_chunk(struct iwm_softc *sc, uint32_t dst_addr, const uint8_t *section, uint32_t byte_cnt) { struct iwm_dma_info *dma = &sc->fw_dma; int error; /* Copy firmware section into pre-allocated DMA-safe memory. */ memcpy(dma->vaddr, section, byte_cnt); bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); if (!iwm_nic_lock(sc)) return EBUSY; sc->sc_fw_chunk_done = 0; IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE); IWM_WRITE(sc, IWM_FH_SRVC_CHNL_SRAM_ADDR_REG(IWM_FH_SRVC_CHNL), dst_addr); IWM_WRITE(sc, IWM_FH_TFDIB_CTRL0_REG(IWM_FH_SRVC_CHNL), dma->paddr & IWM_FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK); IWM_WRITE(sc, IWM_FH_TFDIB_CTRL1_REG(IWM_FH_SRVC_CHNL), (iwm_get_dma_hi_addr(dma->paddr) << IWM_FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt); IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_BUF_STS_REG(IWM_FH_SRVC_CHNL), 1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_NUM | 1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_IDX | IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_VALID); IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD); iwm_nic_unlock(sc); /* wait 1s for this segment to load */ while (!sc->sc_fw_chunk_done) if ((error = msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfw", hz)) != 0) break; return error; } static int iwm_load_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { struct iwm_fw_sects *fws; int error, i, w; const void *data; uint32_t dlen; uint32_t offset; sc->sc_uc.uc_intr = 0; fws = &sc->sc_fw.fw_sects[ucode_type]; for (i = 0; i < fws->fw_count; i++) { data = fws->fw_sect[i].fws_data; dlen = fws->fw_sect[i].fws_len; offset = fws->fw_sect[i].fws_devoff; IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV, "LOAD FIRMWARE type %d offset %u len %d\n", ucode_type, offset, dlen); error = iwm_firmware_load_chunk(sc, offset, data, dlen); if (error) { device_printf(sc->sc_dev, "%s: chunk %u of %u returned error %02d\n", __func__, i, fws->fw_count, error); return error; } } /* wait for the firmware to load */ IWM_WRITE(sc, IWM_CSR_RESET, 0); for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) { error = msleep(&sc->sc_uc, &sc->sc_mtx, 0, "iwmuc", hz/10); } return error; } /* iwlwifi: pcie/trans.c */ static int iwm_start_fw(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { int error; IWM_WRITE(sc, IWM_CSR_INT, ~0); if ((error = iwm_nic_init(sc)) != 0) { device_printf(sc->sc_dev, "unable to init nic\n"); return error; } /* make sure rfkill handshake bits are cleared */ IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED); /* clear (again), then enable host interrupts */ IWM_WRITE(sc, IWM_CSR_INT, ~0); iwm_enable_interrupts(sc); /* really make sure rfkill handshake bits are cleared */ /* maybe we should write a few times more? just to make sure */ IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); /* Load the given image to the HW */ return iwm_load_firmware(sc, ucode_type); } static int iwm_fw_alive(struct iwm_softc *sc, uint32_t sched_base) { return iwm_post_alive(sc); } static int iwm_send_tx_ant_cfg(struct iwm_softc *sc, uint8_t valid_tx_ant) { struct iwm_tx_ant_cfg_cmd tx_ant_cmd = { .valid = htole32(valid_tx_ant), }; return iwm_mvm_send_cmd_pdu(sc, IWM_TX_ANT_CONFIGURATION_CMD, IWM_CMD_SYNC, sizeof(tx_ant_cmd), &tx_ant_cmd); } /* iwlwifi: mvm/fw.c */ static int iwm_send_phy_cfg_cmd(struct iwm_softc *sc) { struct iwm_phy_cfg_cmd phy_cfg_cmd; enum iwm_ucode_type ucode_type = sc->sc_uc_current; /* Set parameters */ phy_cfg_cmd.phy_cfg = htole32(sc->sc_fw_phy_config); phy_cfg_cmd.calib_control.event_trigger = sc->sc_default_calib[ucode_type].event_trigger; phy_cfg_cmd.calib_control.flow_trigger = sc->sc_default_calib[ucode_type].flow_trigger; IWM_DPRINTF(sc, IWM_DEBUG_CMD | IWM_DEBUG_RESET, "Sending Phy CFG command: 0x%x\n", phy_cfg_cmd.phy_cfg); return iwm_mvm_send_cmd_pdu(sc, IWM_PHY_CONFIGURATION_CMD, IWM_CMD_SYNC, sizeof(phy_cfg_cmd), &phy_cfg_cmd); } static int iwm_mvm_load_ucode_wait_alive(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { enum iwm_ucode_type old_type = sc->sc_uc_current; int error; if ((error = iwm_read_firmware(sc, ucode_type)) != 0) return error; sc->sc_uc_current = ucode_type; error = iwm_start_fw(sc, ucode_type); - iwm_fw_info_free(&sc->sc_fw); if (error) { sc->sc_uc_current = old_type; return error; } return iwm_fw_alive(sc, sc->sched_base); } /* * mvm misc bits */ /* * follows iwlwifi/fw.c */ static int iwm_run_init_mvm_ucode(struct iwm_softc *sc, int justnvm) { int error; /* do not operate with rfkill switch turned on */ if ((sc->sc_flags & IWM_FLAG_RFKILL) && !justnvm) { device_printf(sc->sc_dev, "radio is disabled by hardware switch\n"); return EPERM; } sc->sc_init_complete = 0; if ((error = iwm_mvm_load_ucode_wait_alive(sc, IWM_UCODE_TYPE_INIT)) != 0) return error; if (justnvm) { if ((error = iwm_nvm_init(sc)) != 0) { device_printf(sc->sc_dev, "failed to read nvm\n"); return error; } IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, sc->sc_nvm.hw_addr); sc->sc_scan_cmd_len = sizeof(struct iwm_scan_cmd) + sc->sc_capa_max_probe_len + IWM_MAX_NUM_SCAN_CHANNELS * sizeof(struct iwm_scan_channel); sc->sc_scan_cmd = malloc(sc->sc_scan_cmd_len, M_DEVBUF, M_NOWAIT); if (sc->sc_scan_cmd == NULL) return (ENOMEM); return 0; } /* Send TX valid antennas before triggering calibrations */ if ((error = iwm_send_tx_ant_cfg(sc, IWM_FW_VALID_TX_ANT(sc))) != 0) return error; /* * Send phy configurations command to init uCode * to start the 16.0 uCode init image internal calibrations. */ if ((error = iwm_send_phy_cfg_cmd(sc)) != 0 ) { device_printf(sc->sc_dev, "%s: failed to run internal calibration: %d\n", __func__, error); return error; } /* * Nothing to do but wait for the init complete notification * from the firmware */ while (!sc->sc_init_complete) if ((error = msleep(&sc->sc_init_complete, &sc->sc_mtx, 0, "iwminit", 2*hz)) != 0) break; return error; } /* * receive side */ /* (re)stock rx ring, called at init-time and at runtime */ static int iwm_rx_addbuf(struct iwm_softc *sc, int size, int idx) { struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &ring->data[idx]; struct mbuf *m; int error; bus_addr_t paddr; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, IWM_RBUF_SIZE); if (m == NULL) return ENOBUFS; if (data->m != NULL) bus_dmamap_unload(ring->data_dmat, data->map); m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA map, error %d\n", __func__, error); goto fail; } data->m = m; error = bus_dmamap_load(ring->data_dmat, data->map, mtod(data->m, void *), IWM_RBUF_SIZE, iwm_dma_map_addr, &paddr, BUS_DMA_NOWAIT); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, "%s: can't not map mbuf, error %d\n", __func__, error); goto fail; } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREREAD); /* Update RX descriptor. */ ring->desc[idx] = htole32(paddr >> 8); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); return 0; fail: return error; } /* iwlwifi: mvm/rx.c */ #define IWM_RSSI_OFFSET 50 static int iwm_mvm_calc_rssi(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info) { int rssi_a, rssi_b, rssi_a_dbm, rssi_b_dbm, max_rssi_dbm; uint32_t agc_a, agc_b; uint32_t val; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_AGC_IDX]); agc_a = (val & IWM_OFDM_AGC_A_MSK) >> IWM_OFDM_AGC_A_POS; agc_b = (val & IWM_OFDM_AGC_B_MSK) >> IWM_OFDM_AGC_B_POS; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_RSSI_AB_IDX]); rssi_a = (val & IWM_OFDM_RSSI_INBAND_A_MSK) >> IWM_OFDM_RSSI_A_POS; rssi_b = (val & IWM_OFDM_RSSI_INBAND_B_MSK) >> IWM_OFDM_RSSI_B_POS; /* * dBm = rssi dB - agc dB - constant. * Higher AGC (higher radio gain) means lower signal. */ rssi_a_dbm = rssi_a - IWM_RSSI_OFFSET - agc_a; rssi_b_dbm = rssi_b - IWM_RSSI_OFFSET - agc_b; max_rssi_dbm = MAX(rssi_a_dbm, rssi_b_dbm); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "Rssi In A %d B %d Max %d AGCA %d AGCB %d\n", rssi_a_dbm, rssi_b_dbm, max_rssi_dbm, agc_a, agc_b); return max_rssi_dbm; } /* iwlwifi: mvm/rx.c */ /* * iwm_mvm_get_signal_strength - use new rx PHY INFO API * values are reported by the fw as positive values - need to negate * to obtain their dBM. Account for missing antennas by replacing 0 * values by -256dBm: practically 0 power and a non-feasible 8 bit value. */ static int iwm_mvm_get_signal_strength(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info) { int energy_a, energy_b, energy_c, max_energy; uint32_t val; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_ENERGY_ANT_ABC_IDX]); energy_a = (val & IWM_RX_INFO_ENERGY_ANT_A_MSK) >> IWM_RX_INFO_ENERGY_ANT_A_POS; energy_a = energy_a ? -energy_a : -256; energy_b = (val & IWM_RX_INFO_ENERGY_ANT_B_MSK) >> IWM_RX_INFO_ENERGY_ANT_B_POS; energy_b = energy_b ? -energy_b : -256; energy_c = (val & IWM_RX_INFO_ENERGY_ANT_C_MSK) >> IWM_RX_INFO_ENERGY_ANT_C_POS; energy_c = energy_c ? -energy_c : -256; max_energy = MAX(energy_a, energy_b); max_energy = MAX(max_energy, energy_c); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "energy In A %d B %d C %d , and max %d\n", energy_a, energy_b, energy_c, max_energy); return max_energy; } static void iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct iwm_rx_phy_info *phy_info = (void *)pkt->data; IWM_DPRINTF(sc, IWM_DEBUG_RECV, "received PHY stats\n"); bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); memcpy(&sc->sc_last_phy_info, phy_info, sizeof(sc->sc_last_phy_info)); } /* * Retrieve the average noise (in dBm) among receivers. */ static int iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *stats) { int i, total, nbant, noise; total = nbant = noise = 0; for (i = 0; i < 3; i++) { noise = le32toh(stats->beacon_silence_rssi[i]) & 0xff; if (noise) { total += noise; nbant++; } } /* There should be at least one antenna but check anyway. */ return (nbant == 0) ? -127 : (total / nbant) - 107; } /* * iwm_mvm_rx_rx_mpdu - IWM_REPLY_RX_MPDU_CMD handler * * Handles the actual data of the Rx packet from the fw */ static void iwm_mvm_rx_rx_mpdu(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_frame *wh; struct ieee80211_node *ni; struct ieee80211_rx_stats rxs; struct mbuf *m; struct iwm_rx_phy_info *phy_info; struct iwm_rx_mpdu_res_start *rx_res; uint32_t len; uint32_t rx_pkt_status; int rssi; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); phy_info = &sc->sc_last_phy_info; rx_res = (struct iwm_rx_mpdu_res_start *)pkt->data; wh = (struct ieee80211_frame *)(pkt->data + sizeof(*rx_res)); len = le16toh(rx_res->byte_count); rx_pkt_status = le32toh(*(uint32_t *)(pkt->data + sizeof(*rx_res) + len)); m = data->m; m->m_data = pkt->data + sizeof(*rx_res); m->m_pkthdr.len = m->m_len = len; if (__predict_false(phy_info->cfg_phy_cnt > 20)) { device_printf(sc->sc_dev, "dsp size out of range [0,20]: %d\n", phy_info->cfg_phy_cnt); return; } if (!(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_CRC_OK) || !(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_OVERRUN_OK)) { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "Bad CRC or FIFO: 0x%08X.\n", rx_pkt_status); return; /* drop */ } if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_RX_ENERGY_API) { rssi = iwm_mvm_get_signal_strength(sc, phy_info); } else { rssi = iwm_mvm_calc_rssi(sc, phy_info); } rssi = (0 - IWM_MIN_DBM) + rssi; /* normalize */ rssi = MIN(rssi, sc->sc_max_rssi); /* clip to max. 100% */ /* replenish ring for the buffer we're going to feed to the sharks */ if (iwm_rx_addbuf(sc, IWM_RBUF_SIZE, sc->rxq.cur) != 0) { device_printf(sc->sc_dev, "%s: unable to add more buffers\n", __func__); return; } ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "%s: phy_info: channel=%d, flags=0x%08x\n", __func__, le16toh(phy_info->channel), le16toh(phy_info->phy_flags)); /* * Populate an RX state struct with the provided information. */ bzero(&rxs, sizeof(rxs)); rxs.r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ; rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI; rxs.c_ieee = le16toh(phy_info->channel); if (le16toh(phy_info->phy_flags & IWM_RX_RES_PHY_FLAGS_BAND_24)) { rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_2GHZ); } else { rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_5GHZ); } rxs.rssi = rssi - sc->sc_noise; rxs.nf = sc->sc_noise; if (ieee80211_radiotap_active_vap(vap)) { struct iwm_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = 0; if (phy_info->phy_flags & htole16(IWM_PHY_INFO_FLAG_SHPREAMBLE)) tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE; tap->wr_chan_freq = htole16(rxs.c_freq); /* XXX only if ic->ic_curchan->ic_ieee == rxs.c_ieee */ tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags); tap->wr_dbm_antsignal = (int8_t)rssi; tap->wr_dbm_antnoise = (int8_t)sc->sc_noise; tap->wr_tsft = phy_info->system_timestamp; switch (phy_info->rate) { /* CCK rates. */ case 10: tap->wr_rate = 2; break; case 20: tap->wr_rate = 4; break; case 55: tap->wr_rate = 11; break; case 110: tap->wr_rate = 22; break; /* OFDM rates. */ case 0xd: tap->wr_rate = 12; break; case 0xf: tap->wr_rate = 18; break; case 0x5: tap->wr_rate = 24; break; case 0x7: tap->wr_rate = 36; break; case 0x9: tap->wr_rate = 48; break; case 0xb: tap->wr_rate = 72; break; case 0x1: tap->wr_rate = 96; break; case 0x3: tap->wr_rate = 108; break; /* Unknown rate: should not happen. */ default: tap->wr_rate = 0; } } IWM_UNLOCK(sc); if (ni != NULL) { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "input m %p\n", m); ieee80211_input_mimo(ni, m, &rxs); ieee80211_free_node(ni); } else { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "inputall m %p\n", m); ieee80211_input_mimo_all(ic, m, &rxs); } IWM_LOCK(sc); } static int iwm_mvm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_node *in) { struct iwm_mvm_tx_resp *tx_resp = (void *)pkt->data; struct ieee80211_node *ni = &in->in_ni; struct ieee80211vap *vap = ni->ni_vap; int status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK; int failack = tx_resp->failure_frame; KASSERT(tx_resp->frame_count == 1, ("too many frames")); /* Update rate control statistics. */ if (status != IWM_TX_STATUS_SUCCESS && status != IWM_TX_STATUS_DIRECT_DONE) { ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_FAILURE, &failack, NULL); return (1); } else { ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_SUCCESS, &failack, NULL); return (0); } } static void iwm_mvm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct iwm_cmd_header *cmd_hdr = &pkt->hdr; int idx = cmd_hdr->idx; int qid = cmd_hdr->qid; struct iwm_tx_ring *ring = &sc->txq[qid]; struct iwm_tx_data *txd = &ring->data[idx]; struct iwm_node *in = txd->in; struct mbuf *m = txd->m; int status; KASSERT(txd->done == 0, ("txd not done")); KASSERT(txd->in != NULL, ("txd without node")); KASSERT(txd->m != NULL, ("txd without mbuf")); bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); sc->sc_tx_timer = 0; status = iwm_mvm_rx_tx_cmd_single(sc, pkt, in); /* Unmap and free mbuf. */ bus_dmamap_sync(ring->data_dmat, txd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, txd->map); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "free txd %p, in %p\n", txd, txd->in); txd->done = 1; txd->m = NULL; txd->in = NULL; ieee80211_tx_complete(&in->in_ni, m, status); if (--ring->queued < IWM_TX_RING_LOMARK) { sc->qfullmsk &= ~(1 << ring->qid); if (sc->qfullmsk == 0) { /* * Well, we're in interrupt context, but then again * I guess net80211 does all sorts of stunts in * interrupt context, so maybe this is no biggie. */ iwm_start(sc); } } } /* * transmit side */ /* * Process a "command done" firmware notification. This is where we wakeup * processes waiting for a synchronous command completion. * from if_iwn */ static void iwm_cmd_done(struct iwm_softc *sc, struct iwm_rx_packet *pkt) { struct iwm_tx_ring *ring = &sc->txq[IWM_MVM_CMD_QUEUE]; struct iwm_tx_data *data; if (pkt->hdr.qid != IWM_MVM_CMD_QUEUE) { return; /* Not a command ack. */ } data = &ring->data[pkt->hdr.idx]; /* If the command was mapped in an mbuf, free it. */ if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } wakeup(&ring->desc[pkt->hdr.idx]); } #if 0 /* * necessary only for block ack mode */ void iwm_update_sched(struct iwm_softc *sc, int qid, int idx, uint8_t sta_id, uint16_t len) { struct iwm_agn_scd_bc_tbl *scd_bc_tbl; uint16_t w_val; scd_bc_tbl = sc->sched_dma.vaddr; len += 8; /* magic numbers came naturally from paris */ if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_DW_BC_TABLE) len = roundup(len, 4) / 4; w_val = htole16(sta_id << 12 | len); /* Update TX scheduler. */ scd_bc_tbl[qid].tfd_offset[idx] = w_val; bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map, BUS_DMASYNC_PREWRITE); /* I really wonder what this is ?!? */ if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP) { scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = w_val; bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map, BUS_DMASYNC_PREWRITE); } } #endif /* * Take an 802.11 (non-n) rate, find the relevant rate * table entry. return the index into in_ridx[]. * * The caller then uses that index back into in_ridx * to figure out the rate index programmed /into/ * the firmware for this given node. */ static int iwm_tx_rateidx_lookup(struct iwm_softc *sc, struct iwm_node *in, uint8_t rate) { int i; uint8_t r; for (i = 0; i < nitems(in->in_ridx); i++) { r = iwm_rates[in->in_ridx[i]].rate; if (rate == r) return (i); } /* XXX Return the first */ /* XXX TODO: have it return the /lowest/ */ return (0); } /* * Fill in various bit for management frames, and leave them * unfilled for data frames (firmware takes care of that). * Return the selected TX rate. */ static const struct iwm_rate * iwm_tx_fill_cmd(struct iwm_softc *sc, struct iwm_node *in, struct ieee80211_frame *wh, struct iwm_tx_cmd *tx) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni = &in->in_ni; const struct iwm_rate *rinfo; int type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; int ridx, rate_flags; tx->rts_retry_limit = IWM_RTS_DFAULT_RETRY_LIMIT; tx->data_retry_limit = IWM_DEFAULT_TX_RETRY; /* * XXX TODO: everything about the rate selection here is terrible! */ if (type == IEEE80211_FC0_TYPE_DATA) { int i; /* for data frames, use RS table */ (void) ieee80211_ratectl_rate(ni, NULL, 0); i = iwm_tx_rateidx_lookup(sc, in, ni->ni_txrate); ridx = in->in_ridx[i]; /* This is the index into the programmed table */ tx->initial_rate_index = i; tx->tx_flags |= htole32(IWM_TX_CMD_FLG_STA_RATE); IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TXRATE, "%s: start with i=%d, txrate %d\n", __func__, i, iwm_rates[ridx].rate); /* XXX no rate_n_flags? */ return &iwm_rates[ridx]; } /* * For non-data, use the lowest supported rate for the given * operational mode. * * Note: there may not be any rate control information available. * This driver currently assumes if we're transmitting data * frames, use the rate control table. Grr. * * XXX TODO: use the configured rate for the traffic type! */ if (ic->ic_curmode == IEEE80211_MODE_11A) { /* * XXX this assumes the mode is either 11a or not 11a; * definitely won't work for 11n. */ ridx = IWM_RIDX_OFDM; } else { ridx = IWM_RIDX_CCK; } rinfo = &iwm_rates[ridx]; IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: ridx=%d; rate=%d, CCK=%d\n", __func__, ridx, rinfo->rate, !! (IWM_RIDX_IS_CCK(ridx)) ); /* XXX TODO: hard-coded TX antenna? */ rate_flags = 1 << IWM_RATE_MCS_ANT_POS; if (IWM_RIDX_IS_CCK(ridx)) rate_flags |= IWM_RATE_MCS_CCK_MSK; /* XXX hard-coded tx rate */ tx->rate_n_flags = htole32(rate_flags | rinfo->plcp); return rinfo; } #define TB0_SIZE 16 static int iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ieee80211_node *ni, int ac) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwm_node *in = IWM_NODE(ni); struct iwm_tx_ring *ring; struct iwm_tx_data *data; struct iwm_tfd *desc; struct iwm_device_cmd *cmd; struct iwm_tx_cmd *tx; struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct mbuf *m1; const struct iwm_rate *rinfo; uint32_t flags; u_int hdrlen; bus_dma_segment_t *seg, segs[IWM_MAX_SCATTER]; int nsegs; uint8_t tid, type; int i, totlen, error, pad; wh = mtod(m, struct ieee80211_frame *); hdrlen = ieee80211_anyhdrsize(wh); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; tid = 0; ring = &sc->txq[ac]; desc = &ring->desc[ring->cur]; memset(desc, 0, sizeof(*desc)); data = &ring->data[ring->cur]; /* Fill out iwm_tx_cmd to send to the firmware */ cmd = &ring->cmd[ring->cur]; cmd->hdr.code = IWM_TX_CMD; cmd->hdr.flags = 0; cmd->hdr.qid = ring->qid; cmd->hdr.idx = ring->cur; tx = (void *)cmd->data; memset(tx, 0, sizeof(*tx)); rinfo = iwm_tx_fill_cmd(sc, in, wh, tx); /* Encrypt the frame if need be. */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { /* Retrieve key for TX && do software encryption. */ k = ieee80211_crypto_encap(ni, m); if (k == NULL) { m_freem(m); return (ENOBUFS); } /* 802.11 header may have moved. */ wh = mtod(m, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { struct iwm_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_chan_freq = htole16(ni->ni_chan->ic_freq); tap->wt_chan_flags = htole16(ni->ni_chan->ic_flags); tap->wt_rate = rinfo->rate; if (k != NULL) tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP; ieee80211_radiotap_tx(vap, m); } totlen = m->m_pkthdr.len; flags = 0; if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= IWM_TX_CMD_FLG_ACK; } if (type != IEEE80211_FC0_TYPE_DATA && (totlen + IEEE80211_CRC_LEN > vap->iv_rtsthreshold) && !IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= IWM_TX_CMD_FLG_PROT_REQUIRE; } if (IEEE80211_IS_MULTICAST(wh->i_addr1) || type != IEEE80211_FC0_TYPE_DATA) tx->sta_id = sc->sc_aux_sta.sta_id; else tx->sta_id = IWM_STATION_ID; if (type == IEEE80211_FC0_TYPE_MGT) { uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ || subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ) tx->pm_frame_timeout = htole16(3); else tx->pm_frame_timeout = htole16(2); } else { tx->pm_frame_timeout = htole16(0); } if (hdrlen & 3) { /* First segment length must be a multiple of 4. */ flags |= IWM_TX_CMD_FLG_MH_PAD; pad = 4 - (hdrlen & 3); } else pad = 0; tx->driver_txop = 0; tx->next_frame_len = 0; tx->len = htole16(totlen); tx->tid_tspec = tid; tx->life_time = htole32(IWM_TX_CMD_LIFE_TIME_INFINITE); /* Set physical address of "scratch area". */ tx->dram_lsb_ptr = htole32(data->scratch_paddr); tx->dram_msb_ptr = iwm_get_dma_hi_addr(data->scratch_paddr); /* Copy 802.11 header in TX command. */ memcpy(((uint8_t *)tx) + sizeof(*tx), wh, hdrlen); flags |= IWM_TX_CMD_FLG_BT_DIS | IWM_TX_CMD_FLG_SEQ_CTL; tx->sec_ctl = 0; tx->tx_flags |= htole32(flags); /* Trim 802.11 header. */ m_adj(m, hdrlen); error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { if (error != EFBIG) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return error; } /* Too many DMA segments, linearize mbuf. */ m1 = m_collapse(m, M_NOWAIT, IWM_MAX_SCATTER - 2); if (m1 == NULL) { device_printf(sc->sc_dev, "%s: could not defrag mbuf\n", __func__); m_freem(m); return (ENOBUFS); } m = m1; error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return error; } } data->m = m; data->in = in; data->done = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "sending txd %p, in %p\n", data, data->in); KASSERT(data->in != NULL, ("node is NULL")); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "sending data: qid=%d idx=%d len=%d nsegs=%d\n", ring->qid, ring->cur, totlen, nsegs); /* Fill TX descriptor. */ desc->num_tbs = 2 + nsegs; desc->tbs[0].lo = htole32(data->cmd_paddr); desc->tbs[0].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) | (TB0_SIZE << 4); desc->tbs[1].lo = htole32(data->cmd_paddr + TB0_SIZE); desc->tbs[1].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) | ((sizeof(struct iwm_cmd_header) + sizeof(*tx) + hdrlen + pad - TB0_SIZE) << 4); /* Other DMA segments are for data payload. */ for (i = 0; i < nsegs; i++) { seg = &segs[i]; desc->tbs[i+2].lo = htole32(seg->ds_addr); desc->tbs[i+2].hi_n_len = \ htole16(iwm_get_dma_hi_addr(seg->ds_addr)) | ((seg->ds_len) << 4); } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->cmd_dma.tag, ring->cmd_dma.map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); #if 0 iwm_update_sched(sc, ring->qid, ring->cur, tx->sta_id, le16toh(tx->len)); #endif /* Kick TX ring. */ ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT; IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, ring->qid << 8 | ring->cur); /* Mark TX ring as full if we reach a certain threshold. */ if (++ring->queued > IWM_TX_RING_HIMARK) { sc->qfullmsk |= 1 << ring->qid; } return 0; } static int iwm_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct iwm_softc *sc = ic->ic_softc; int error = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "->%s begin\n", __func__); if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) { m_freem(m); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "<-%s not RUNNING\n", __func__); return (ENETDOWN); } IWM_LOCK(sc); /* XXX fix this */ if (params == NULL) { error = iwm_tx(sc, m, ni, 0); } else { error = iwm_tx(sc, m, ni, 0); } sc->sc_tx_timer = 5; IWM_UNLOCK(sc); return (error); } /* * mvm/tx.c */ #if 0 /* * Note that there are transports that buffer frames before they reach * the firmware. This means that after flush_tx_path is called, the * queue might not be empty. The race-free way to handle this is to: * 1) set the station as draining * 2) flush the Tx path * 3) wait for the transport queues to be empty */ int iwm_mvm_flush_tx_path(struct iwm_softc *sc, int tfd_msk, int sync) { struct iwm_tx_path_flush_cmd flush_cmd = { .queues_ctl = htole32(tfd_msk), .flush_ctl = htole16(IWM_DUMP_TX_FIFO_FLUSH), }; int ret; ret = iwm_mvm_send_cmd_pdu(sc, IWM_TXPATH_FLUSH, sync ? IWM_CMD_SYNC : IWM_CMD_ASYNC, sizeof(flush_cmd), &flush_cmd); if (ret) device_printf(sc->sc_dev, "Flushing tx queue failed: %d\n", ret); return ret; } #endif /* * BEGIN mvm/sta.c */ static void iwm_mvm_add_sta_cmd_v6_to_v5(struct iwm_mvm_add_sta_cmd_v6 *cmd_v6, struct iwm_mvm_add_sta_cmd_v5 *cmd_v5) { memset(cmd_v5, 0, sizeof(*cmd_v5)); cmd_v5->add_modify = cmd_v6->add_modify; cmd_v5->tid_disable_tx = cmd_v6->tid_disable_tx; cmd_v5->mac_id_n_color = cmd_v6->mac_id_n_color; IEEE80211_ADDR_COPY(cmd_v5->addr, cmd_v6->addr); cmd_v5->sta_id = cmd_v6->sta_id; cmd_v5->modify_mask = cmd_v6->modify_mask; cmd_v5->station_flags = cmd_v6->station_flags; cmd_v5->station_flags_msk = cmd_v6->station_flags_msk; cmd_v5->add_immediate_ba_tid = cmd_v6->add_immediate_ba_tid; cmd_v5->remove_immediate_ba_tid = cmd_v6->remove_immediate_ba_tid; cmd_v5->add_immediate_ba_ssn = cmd_v6->add_immediate_ba_ssn; cmd_v5->sleep_tx_count = cmd_v6->sleep_tx_count; cmd_v5->sleep_state_flags = cmd_v6->sleep_state_flags; cmd_v5->assoc_id = cmd_v6->assoc_id; cmd_v5->beamform_flags = cmd_v6->beamform_flags; cmd_v5->tfd_queue_msk = cmd_v6->tfd_queue_msk; } static int iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *sc, struct iwm_mvm_add_sta_cmd_v6 *cmd, int *status) { struct iwm_mvm_add_sta_cmd_v5 cmd_v5; if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_STA_KEY_CMD) { return iwm_mvm_send_cmd_pdu_status(sc, IWM_ADD_STA, sizeof(*cmd), cmd, status); } iwm_mvm_add_sta_cmd_v6_to_v5(cmd, &cmd_v5); return iwm_mvm_send_cmd_pdu_status(sc, IWM_ADD_STA, sizeof(cmd_v5), &cmd_v5, status); } /* send station add/update command to firmware */ static int iwm_mvm_sta_send_to_fw(struct iwm_softc *sc, struct iwm_node *in, int update) { struct iwm_mvm_add_sta_cmd_v6 add_sta_cmd; int ret; uint32_t status; memset(&add_sta_cmd, 0, sizeof(add_sta_cmd)); add_sta_cmd.sta_id = IWM_STATION_ID; add_sta_cmd.mac_id_n_color = htole32(IWM_FW_CMD_ID_AND_COLOR(IWM_DEFAULT_MACID, IWM_DEFAULT_COLOR)); if (!update) { add_sta_cmd.tfd_queue_msk = htole32(0xf); IEEE80211_ADDR_COPY(&add_sta_cmd.addr, in->in_ni.ni_bssid); } add_sta_cmd.add_modify = update ? 1 : 0; add_sta_cmd.station_flags_msk |= htole32(IWM_STA_FLG_FAT_EN_MSK | IWM_STA_FLG_MIMO_EN_MSK); status = IWM_ADD_STA_SUCCESS; ret = iwm_mvm_send_add_sta_cmd_status(sc, &add_sta_cmd, &status); if (ret) return ret; switch (status) { case IWM_ADD_STA_SUCCESS: break; default: ret = EIO; device_printf(sc->sc_dev, "IWM_ADD_STA failed\n"); break; } return ret; } static int iwm_mvm_add_sta(struct iwm_softc *sc, struct iwm_node *in) { int ret; ret = iwm_mvm_sta_send_to_fw(sc, in, 0); if (ret) return ret; return 0; } static int iwm_mvm_update_sta(struct iwm_softc *sc, struct iwm_node *in) { return iwm_mvm_sta_send_to_fw(sc, in, 1); } static int iwm_mvm_add_int_sta_common(struct iwm_softc *sc, struct iwm_int_sta *sta, const uint8_t *addr, uint16_t mac_id, uint16_t color) { struct iwm_mvm_add_sta_cmd_v6 cmd; int ret; uint32_t status; memset(&cmd, 0, sizeof(cmd)); cmd.sta_id = sta->sta_id; cmd.mac_id_n_color = htole32(IWM_FW_CMD_ID_AND_COLOR(mac_id, color)); cmd.tfd_queue_msk = htole32(sta->tfd_queue_msk); if (addr) IEEE80211_ADDR_COPY(cmd.addr, addr); ret = iwm_mvm_send_add_sta_cmd_status(sc, &cmd, &status); if (ret) return ret; switch (status) { case IWM_ADD_STA_SUCCESS: IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: Internal station added.\n", __func__); return 0; default: device_printf(sc->sc_dev, "%s: Add internal station failed, status=0x%x\n", __func__, status); ret = EIO; break; } return ret; } static int iwm_mvm_add_aux_sta(struct iwm_softc *sc) { int ret; sc->sc_aux_sta.sta_id = 3; sc->sc_aux_sta.tfd_queue_msk = 0; ret = iwm_mvm_add_int_sta_common(sc, &sc->sc_aux_sta, NULL, IWM_MAC_INDEX_AUX, 0); if (ret) memset(&sc->sc_aux_sta, 0, sizeof(sc->sc_aux_sta)); return ret; } /* * END mvm/sta.c */ /* * BEGIN mvm/quota.c */ static int iwm_mvm_update_quotas(struct iwm_softc *sc, struct iwm_node *in) { struct iwm_time_quota_cmd cmd; int i, idx, ret, num_active_macs, quota, quota_rem; int colors[IWM_MAX_BINDINGS] = { -1, -1, -1, -1, }; int n_ifs[IWM_MAX_BINDINGS] = {0, }; uint16_t id; memset(&cmd, 0, sizeof(cmd)); /* currently, PHY ID == binding ID */ if (in) { id = in->in_phyctxt->id; KASSERT(id < IWM_MAX_BINDINGS, ("invalid id")); colors[id] = in->in_phyctxt->color; if (1) n_ifs[id] = 1; } /* * The FW's scheduling session consists of * IWM_MVM_MAX_QUOTA fragments. Divide these fragments * equally between all the bindings that require quota */ num_active_macs = 0; for (i = 0; i < IWM_MAX_BINDINGS; i++) { cmd.quotas[i].id_and_color = htole32(IWM_FW_CTXT_INVALID); num_active_macs += n_ifs[i]; } quota = 0; quota_rem = 0; if (num_active_macs) { quota = IWM_MVM_MAX_QUOTA / num_active_macs; quota_rem = IWM_MVM_MAX_QUOTA % num_active_macs; } for (idx = 0, i = 0; i < IWM_MAX_BINDINGS; i++) { if (colors[i] < 0) continue; cmd.quotas[idx].id_and_color = htole32(IWM_FW_CMD_ID_AND_COLOR(i, colors[i])); if (n_ifs[i] <= 0) { cmd.quotas[idx].quota = htole32(0); cmd.quotas[idx].max_duration = htole32(0); } else { cmd.quotas[idx].quota = htole32(quota * n_ifs[i]); cmd.quotas[idx].max_duration = htole32(0); } idx++; } /* Give the remainder of the session to the first binding */ cmd.quotas[0].quota = htole32(le32toh(cmd.quotas[0].quota) + quota_rem); ret = iwm_mvm_send_cmd_pdu(sc, IWM_TIME_QUOTA_CMD, IWM_CMD_SYNC, sizeof(cmd), &cmd); if (ret) device_printf(sc->sc_dev, "%s: Failed to send quota: %d\n", __func__, ret); return ret; } /* * END mvm/quota.c */ /* * ieee80211 routines */ /* * Change to AUTH state in 80211 state machine. Roughly matches what * Linux does in bss_info_changed(). */ static int iwm_auth(struct ieee80211vap *vap, struct iwm_softc *sc) { struct ieee80211_node *ni; struct iwm_node *in; struct iwm_vap *iv = IWM_VAP(vap); uint32_t duration; uint32_t min_duration; int error; /* * XXX i have a feeling that the vap node is being * freed from underneath us. Grr. */ ni = ieee80211_ref_node(vap->iv_bss); in = IWM_NODE(ni); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_STATE, "%s: called; vap=%p, bss ni=%p\n", __func__, vap, ni); in->in_assoc = 0; error = iwm_allow_mcast(vap, sc); if (error) { device_printf(sc->sc_dev, "%s: failed to set multicast\n", __func__); goto out; } /* * This is where it deviates from what Linux does. * * Linux iwlwifi doesn't reset the nic each time, nor does it * call ctxt_add() here. Instead, it adds it during vap creation, * and always does does a mac_ctx_changed(). * * The openbsd port doesn't attempt to do that - it reset things * at odd states and does the add here. * * So, until the state handling is fixed (ie, we never reset * the NIC except for a firmware failure, which should drag * the NIC back to IDLE, re-setup and re-add all the mac/phy * contexts that are required), let's do a dirty hack here. */ if (iv->is_uploaded) { if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to add MAC\n", __func__); goto out; } } else { if ((error = iwm_mvm_mac_ctxt_add(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to add MAC\n", __func__); goto out; } } if ((error = iwm_mvm_phy_ctxt_changed(sc, &sc->sc_phyctxt[0], in->in_ni.ni_chan, 1, 1)) != 0) { device_printf(sc->sc_dev, "%s: failed add phy ctxt\n", __func__); goto out; } in->in_phyctxt = &sc->sc_phyctxt[0]; if ((error = iwm_mvm_binding_add_vif(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: binding cmd\n", __func__); goto out; } if ((error = iwm_mvm_add_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: failed to add MAC\n", __func__); goto out; } /* a bit superfluous? */ while (sc->sc_auth_prot) msleep(&sc->sc_auth_prot, &sc->sc_mtx, 0, "iwmauth", 0); sc->sc_auth_prot = 1; duration = min(IWM_MVM_TE_SESSION_PROTECTION_MAX_TIME_MS, 200 + in->in_ni.ni_intval); min_duration = min(IWM_MVM_TE_SESSION_PROTECTION_MIN_TIME_MS, 100 + in->in_ni.ni_intval); iwm_mvm_protect_session(sc, in, duration, min_duration, 500); IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: waiting for auth_prot\n", __func__); while (sc->sc_auth_prot != 2) { /* * well, meh, but if the kernel is sleeping for half a * second, we have bigger problems */ if (sc->sc_auth_prot == 0) { device_printf(sc->sc_dev, "%s: missed auth window!\n", __func__); error = ETIMEDOUT; goto out; } else if (sc->sc_auth_prot == -1) { device_printf(sc->sc_dev, "%s: no time event, denied!\n", __func__); sc->sc_auth_prot = 0; error = EAUTH; goto out; } msleep(&sc->sc_auth_prot, &sc->sc_mtx, 0, "iwmau2", 0); } IWM_DPRINTF(sc, IWM_DEBUG_RESET, "<-%s\n", __func__); error = 0; out: ieee80211_free_node(ni); return (error); } static int iwm_assoc(struct ieee80211vap *vap, struct iwm_softc *sc) { struct iwm_node *in = IWM_NODE(vap->iv_bss); int error; if ((error = iwm_mvm_update_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: failed to update STA\n", __func__); return error; } in->in_assoc = 1; if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to update MAC\n", __func__); return error; } return 0; } static int iwm_release(struct iwm_softc *sc, struct iwm_node *in) { /* * Ok, so *technically* the proper set of calls for going * from RUN back to SCAN is: * * iwm_mvm_power_mac_disable(sc, in); * iwm_mvm_mac_ctxt_changed(sc, in); * iwm_mvm_rm_sta(sc, in); * iwm_mvm_update_quotas(sc, NULL); * iwm_mvm_mac_ctxt_changed(sc, in); * iwm_mvm_binding_remove_vif(sc, in); * iwm_mvm_mac_ctxt_remove(sc, in); * * However, that freezes the device not matter which permutations * and modifications are attempted. Obviously, this driver is missing * something since it works in the Linux driver, but figuring out what * is missing is a little more complicated. Now, since we're going * back to nothing anyway, we'll just do a complete device reset. * Up your's, device! */ //iwm_mvm_flush_tx_path(sc, 0xf, 1); iwm_stop_device(sc); iwm_init_hw(sc); if (in) in->in_assoc = 0; return 0; #if 0 int error; iwm_mvm_power_mac_disable(sc, in); if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) { device_printf(sc->sc_dev, "mac ctxt change fail 1 %d\n", error); return error; } if ((error = iwm_mvm_rm_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "sta remove fail %d\n", error); return error; } error = iwm_mvm_rm_sta(sc, in); in->in_assoc = 0; iwm_mvm_update_quotas(sc, NULL); if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) { device_printf(sc->sc_dev, "mac ctxt change fail 2 %d\n", error); return error; } iwm_mvm_binding_remove_vif(sc, in); iwm_mvm_mac_ctxt_remove(sc, in); return error; #endif } static struct ieee80211_node * iwm_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { return malloc(sizeof (struct iwm_node), M_80211_NODE, M_NOWAIT | M_ZERO); } static void iwm_setrates(struct iwm_softc *sc, struct iwm_node *in) { struct ieee80211_node *ni = &in->in_ni; struct iwm_lq_cmd *lq = &in->in_lq; int nrates = ni->ni_rates.rs_nrates; int i, ridx, tab = 0; int txant = 0; if (nrates > nitems(lq->rs_table)) { device_printf(sc->sc_dev, "%s: node supports %d rates, driver handles " "only %zu\n", __func__, nrates, nitems(lq->rs_table)); return; } /* * XXX .. and most of iwm_node is not initialised explicitly; * it's all just 0x0 passed to the firmware. */ /* first figure out which rates we should support */ /* XXX TODO: this isn't 11n aware /at all/ */ memset(&in->in_ridx, -1, sizeof(in->in_ridx)); IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: nrates=%d\n", __func__, nrates); for (i = 0; i < nrates; i++) { int rate = ni->ni_rates.rs_rates[i] & IEEE80211_RATE_VAL; /* Map 802.11 rate to HW rate index. */ for (ridx = 0; ridx <= IWM_RIDX_MAX; ridx++) if (iwm_rates[ridx].rate == rate) break; if (ridx > IWM_RIDX_MAX) { device_printf(sc->sc_dev, "%s: WARNING: device rate for %d not found!\n", __func__, rate); } else { IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: rate: i: %d, rate=%d, ridx=%d\n", __func__, i, rate, ridx); in->in_ridx[i] = ridx; } } /* then construct a lq_cmd based on those */ memset(lq, 0, sizeof(*lq)); lq->sta_id = IWM_STATION_ID; /* * are these used? (we don't do SISO or MIMO) * need to set them to non-zero, though, or we get an error. */ lq->single_stream_ant_msk = 1; lq->dual_stream_ant_msk = 1; /* * Build the actual rate selection table. * The lowest bits are the rates. Additionally, * CCK needs bit 9 to be set. The rest of the bits * we add to the table select the tx antenna * Note that we add the rates in the highest rate first * (opposite of ni_rates). */ /* * XXX TODO: this should be looping over the min of nrates * and LQ_MAX_RETRY_NUM. Sigh. */ for (i = 0; i < nrates; i++) { int nextant; if (txant == 0) txant = IWM_FW_VALID_TX_ANT(sc); nextant = 1<<(ffs(txant)-1); txant &= ~nextant; /* * Map the rate id into a rate index into * our hardware table containing the * configuration to use for this rate. */ ridx = in->in_ridx[(nrates-1)-i]; tab = iwm_rates[ridx].plcp; tab |= nextant << IWM_RATE_MCS_ANT_POS; if (IWM_RIDX_IS_CCK(ridx)) tab |= IWM_RATE_MCS_CCK_MSK; IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "station rate i=%d, rate=%d, hw=%x\n", i, iwm_rates[ridx].rate, tab); lq->rs_table[i] = htole32(tab); } /* then fill the rest with the lowest possible rate */ for (i = nrates; i < nitems(lq->rs_table); i++) { KASSERT(tab != 0, ("invalid tab")); lq->rs_table[i] = htole32(tab); } } static int iwm_media_change(struct ifnet *ifp) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; struct iwm_softc *sc = ic->ic_softc; int error; error = ieee80211_media_change(ifp); if (error != ENETRESET) return error; IWM_LOCK(sc); if (ic->ic_nrunning > 0) { iwm_stop(sc); iwm_init(sc); } IWM_UNLOCK(sc); return error; } static int iwm_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct iwm_vap *ivp = IWM_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct iwm_softc *sc = ic->ic_softc; struct iwm_node *in; int error; IWM_DPRINTF(sc, IWM_DEBUG_STATE, "switching state %s -> %s\n", ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate]); IEEE80211_UNLOCK(ic); IWM_LOCK(sc); /* disable beacon filtering if we're hopping out of RUN */ if (vap->iv_state == IEEE80211_S_RUN && nstate != vap->iv_state) { iwm_mvm_disable_beacon_filter(sc); if (((in = IWM_NODE(vap->iv_bss)) != NULL)) in->in_assoc = 0; iwm_release(sc, NULL); /* * It's impossible to directly go RUN->SCAN. If we iwm_release() * above then the card will be completely reinitialized, * so the driver must do everything necessary to bring the card * from INIT to SCAN. * * Additionally, upon receiving deauth frame from AP, * OpenBSD 802.11 stack puts the driver in IEEE80211_S_AUTH * state. This will also fail with this driver, so bring the FSM * from IEEE80211_S_RUN to IEEE80211_S_SCAN in this case as well. * * XXX TODO: fix this for FreeBSD! */ if (nstate == IEEE80211_S_SCAN || nstate == IEEE80211_S_AUTH || nstate == IEEE80211_S_ASSOC) { IWM_DPRINTF(sc, IWM_DEBUG_STATE, "Force transition to INIT; MGT=%d\n", arg); IWM_UNLOCK(sc); IEEE80211_LOCK(ic); vap->iv_newstate(vap, IEEE80211_S_INIT, arg); IWM_DPRINTF(sc, IWM_DEBUG_STATE, "Going INIT->SCAN\n"); nstate = IEEE80211_S_SCAN; IEEE80211_UNLOCK(ic); IWM_LOCK(sc); } } switch (nstate) { case IEEE80211_S_INIT: sc->sc_scanband = 0; break; case IEEE80211_S_AUTH: if ((error = iwm_auth(vap, sc)) != 0) { device_printf(sc->sc_dev, "%s: could not move to auth state: %d\n", __func__, error); break; } break; case IEEE80211_S_ASSOC: if ((error = iwm_assoc(vap, sc)) != 0) { device_printf(sc->sc_dev, "%s: failed to associate: %d\n", __func__, error); break; } break; case IEEE80211_S_RUN: { struct iwm_host_cmd cmd = { .id = IWM_LQ_CMD, .len = { sizeof(in->in_lq), }, .flags = IWM_CMD_SYNC, }; /* Update the association state, now we have it all */ /* (eg associd comes in at this point */ error = iwm_assoc(vap, sc); if (error != 0) { device_printf(sc->sc_dev, "%s: failed to update association state: %d\n", __func__, error); break; } in = IWM_NODE(vap->iv_bss); iwm_mvm_power_mac_update_mode(sc, in); iwm_mvm_enable_beacon_filter(sc, in); iwm_mvm_update_quotas(sc, in); iwm_setrates(sc, in); cmd.data[0] = &in->in_lq; if ((error = iwm_send_cmd(sc, &cmd)) != 0) { device_printf(sc->sc_dev, "%s: IWM_LQ_CMD failed\n", __func__); } break; } default: break; } IWM_UNLOCK(sc); IEEE80211_LOCK(ic); return (ivp->iv_newstate(vap, nstate, arg)); } void iwm_endscan_cb(void *arg, int pending) { struct iwm_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; int done; int error; IWM_DPRINTF(sc, IWM_DEBUG_SCAN | IWM_DEBUG_TRACE, "%s: scan ended\n", __func__); IWM_LOCK(sc); if (sc->sc_scanband == IEEE80211_CHAN_2GHZ && sc->sc_nvm.sku_cap_band_52GHz_enable) { done = 0; if ((error = iwm_mvm_scan_request(sc, IEEE80211_CHAN_5GHZ, 0, NULL, 0)) != 0) { device_printf(sc->sc_dev, "could not initiate scan\n"); done = 1; } } else { done = 1; } if (done) { IWM_UNLOCK(sc); ieee80211_scan_done(TAILQ_FIRST(&ic->ic_vaps)); IWM_LOCK(sc); sc->sc_scanband = 0; } IWM_UNLOCK(sc); } static int iwm_init_hw(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; int error, i, qid; if ((error = iwm_start_hw(sc)) != 0) return error; if ((error = iwm_run_init_mvm_ucode(sc, 0)) != 0) { return error; } /* * should stop and start HW since that INIT * image just loaded */ iwm_stop_device(sc); if ((error = iwm_start_hw(sc)) != 0) { device_printf(sc->sc_dev, "could not initialize hardware\n"); return error; } /* omstart, this time with the regular firmware */ error = iwm_mvm_load_ucode_wait_alive(sc, IWM_UCODE_TYPE_REGULAR); if (error) { device_printf(sc->sc_dev, "could not load firmware\n"); goto error; } if ((error = iwm_send_tx_ant_cfg(sc, IWM_FW_VALID_TX_ANT(sc))) != 0) goto error; /* Send phy db control command and then phy db calibration*/ if ((error = iwm_send_phy_db_data(sc)) != 0) goto error; if ((error = iwm_send_phy_cfg_cmd(sc)) != 0) goto error; /* Add auxiliary station for scanning */ if ((error = iwm_mvm_add_aux_sta(sc)) != 0) goto error; for (i = 0; i < IWM_NUM_PHY_CTX; i++) { /* * The channel used here isn't relevant as it's * going to be overwritten in the other flows. * For now use the first channel we have. */ if ((error = iwm_mvm_phy_ctxt_add(sc, &sc->sc_phyctxt[i], &ic->ic_channels[1], 1, 1)) != 0) goto error; } error = iwm_mvm_power_update_device(sc); if (error) goto error; /* Mark TX rings as active. */ for (qid = 0; qid < 4; qid++) { iwm_enable_txq(sc, qid, qid); } return 0; error: iwm_stop_device(sc); return error; } /* Allow multicast from our BSSID. */ static int iwm_allow_mcast(struct ieee80211vap *vap, struct iwm_softc *sc) { struct ieee80211_node *ni = vap->iv_bss; struct iwm_mcast_filter_cmd *cmd; size_t size; int error; size = roundup(sizeof(*cmd), 4); cmd = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); if (cmd == NULL) return ENOMEM; cmd->filter_own = 1; cmd->port_id = 0; cmd->count = 0; cmd->pass_all = 1; IEEE80211_ADDR_COPY(cmd->bssid, ni->ni_bssid); error = iwm_mvm_send_cmd_pdu(sc, IWM_MCAST_FILTER_CMD, IWM_CMD_SYNC, size, cmd); free(cmd, M_DEVBUF); return (error); } static void iwm_init(struct iwm_softc *sc) { int error; if (sc->sc_flags & IWM_FLAG_HW_INITED) { return; } sc->sc_generation++; sc->sc_flags &= ~IWM_FLAG_STOPPED; if ((error = iwm_init_hw(sc)) != 0) { iwm_stop(sc); return; } /* * Ok, firmware loaded and we are jogging */ sc->sc_flags |= IWM_FLAG_HW_INITED; callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc); } static int iwm_transmit(struct ieee80211com *ic, struct mbuf *m) { struct iwm_softc *sc; int error; sc = ic->ic_softc; IWM_LOCK(sc); if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) { IWM_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { IWM_UNLOCK(sc); return (error); } iwm_start(sc); IWM_UNLOCK(sc); return (0); } /* * Dequeue packets from sendq and call send. */ static void iwm_start(struct iwm_softc *sc) { struct ieee80211_node *ni; struct mbuf *m; int ac = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "->%s\n", __func__); while (sc->qfullmsk == 0 && (m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; if (iwm_tx(sc, m, ni, ac) != 0) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); continue; } sc->sc_tx_timer = 15; } IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "<-%s\n", __func__); } static void iwm_stop(struct iwm_softc *sc) { sc->sc_flags &= ~IWM_FLAG_HW_INITED; sc->sc_flags |= IWM_FLAG_STOPPED; sc->sc_generation++; sc->sc_scanband = 0; sc->sc_auth_prot = 0; sc->sc_tx_timer = 0; iwm_stop_device(sc); } static void iwm_watchdog(void *arg) { struct iwm_softc *sc = arg; if (sc->sc_tx_timer > 0) { if (--sc->sc_tx_timer == 0) { device_printf(sc->sc_dev, "device timeout\n"); #ifdef IWM_DEBUG iwm_nic_error(sc); #endif iwm_stop(sc); counter_u64_add(sc->sc_ic.ic_oerrors, 1); return; } } callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc); } static void iwm_parent(struct ieee80211com *ic) { struct iwm_softc *sc = ic->ic_softc; int startall = 0; IWM_LOCK(sc); if (ic->ic_nrunning > 0) { if (!(sc->sc_flags & IWM_FLAG_HW_INITED)) { iwm_init(sc); startall = 1; } } else if (sc->sc_flags & IWM_FLAG_HW_INITED) iwm_stop(sc); IWM_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } /* * The interrupt side of things */ /* * error dumping routines are from iwlwifi/mvm/utils.c */ /* * Note: This structure is read from the device with IO accesses, * and the reading already does the endian conversion. As it is * read with uint32_t-sized accesses, any members with a different size * need to be ordered correctly though! */ struct iwm_error_event_table { uint32_t valid; /* (nonzero) valid, (0) log is empty */ uint32_t error_id; /* type of error */ uint32_t pc; /* program counter */ uint32_t blink1; /* branch link */ uint32_t blink2; /* branch link */ uint32_t ilink1; /* interrupt link */ uint32_t ilink2; /* interrupt link */ uint32_t data1; /* error-specific data */ uint32_t data2; /* error-specific data */ uint32_t data3; /* error-specific data */ uint32_t bcon_time; /* beacon timer */ uint32_t tsf_low; /* network timestamp function timer */ uint32_t tsf_hi; /* network timestamp function timer */ uint32_t gp1; /* GP1 timer register */ uint32_t gp2; /* GP2 timer register */ uint32_t gp3; /* GP3 timer register */ uint32_t ucode_ver; /* uCode version */ uint32_t hw_ver; /* HW Silicon version */ uint32_t brd_ver; /* HW board version */ uint32_t log_pc; /* log program counter */ uint32_t frame_ptr; /* frame pointer */ uint32_t stack_ptr; /* stack pointer */ uint32_t hcmd; /* last host command header */ uint32_t isr0; /* isr status register LMPM_NIC_ISR0: * rxtx_flag */ uint32_t isr1; /* isr status register LMPM_NIC_ISR1: * host_flag */ uint32_t isr2; /* isr status register LMPM_NIC_ISR2: * enc_flag */ uint32_t isr3; /* isr status register LMPM_NIC_ISR3: * time_flag */ uint32_t isr4; /* isr status register LMPM_NIC_ISR4: * wico interrupt */ uint32_t isr_pref; /* isr status register LMPM_NIC_PREF_STAT */ uint32_t wait_event; /* wait event() caller address */ uint32_t l2p_control; /* L2pControlField */ uint32_t l2p_duration; /* L2pDurationField */ uint32_t l2p_mhvalid; /* L2pMhValidBits */ uint32_t l2p_addr_match; /* L2pAddrMatchStat */ uint32_t lmpm_pmg_sel; /* indicate which clocks are turned on * (LMPM_PMG_SEL) */ uint32_t u_timestamp; /* indicate when the date and time of the * compilation */ uint32_t flow_handler; /* FH read/write pointers, RX credit */ } __packed; #define ERROR_START_OFFSET (1 * sizeof(uint32_t)) #define ERROR_ELEM_SIZE (7 * sizeof(uint32_t)) #ifdef IWM_DEBUG struct { const char *name; uint8_t num; } advanced_lookup[] = { { "NMI_INTERRUPT_WDG", 0x34 }, { "SYSASSERT", 0x35 }, { "UCODE_VERSION_MISMATCH", 0x37 }, { "BAD_COMMAND", 0x38 }, { "NMI_INTERRUPT_DATA_ACTION_PT", 0x3C }, { "FATAL_ERROR", 0x3D }, { "NMI_TRM_HW_ERR", 0x46 }, { "NMI_INTERRUPT_TRM", 0x4C }, { "NMI_INTERRUPT_BREAK_POINT", 0x54 }, { "NMI_INTERRUPT_WDG_RXF_FULL", 0x5C }, { "NMI_INTERRUPT_WDG_NO_RBD_RXF_FULL", 0x64 }, { "NMI_INTERRUPT_HOST", 0x66 }, { "NMI_INTERRUPT_ACTION_PT", 0x7C }, { "NMI_INTERRUPT_UNKNOWN", 0x84 }, { "NMI_INTERRUPT_INST_ACTION_PT", 0x86 }, { "ADVANCED_SYSASSERT", 0 }, }; static const char * iwm_desc_lookup(uint32_t num) { int i; for (i = 0; i < nitems(advanced_lookup) - 1; i++) if (advanced_lookup[i].num == num) return advanced_lookup[i].name; /* No entry matches 'num', so it is the last: ADVANCED_SYSASSERT */ return advanced_lookup[i].name; } /* * Support for dumping the error log seemed like a good idea ... * but it's mostly hex junk and the only sensible thing is the * hw/ucode revision (which we know anyway). Since it's here, * I'll just leave it in, just in case e.g. the Intel guys want to * help us decipher some "ADVANCED_SYSASSERT" later. */ static void iwm_nic_error(struct iwm_softc *sc) { struct iwm_error_event_table table; uint32_t base; device_printf(sc->sc_dev, "dumping device error log\n"); base = sc->sc_uc.uc_error_event_table; if (base < 0x800000 || base >= 0x80C000) { device_printf(sc->sc_dev, "Not valid error log pointer 0x%08x\n", base); return; } if (iwm_read_mem(sc, base, &table, sizeof(table)/sizeof(uint32_t)) != 0) { device_printf(sc->sc_dev, "reading errlog failed\n"); return; } if (!table.valid) { device_printf(sc->sc_dev, "errlog not found, skipping\n"); return; } if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { device_printf(sc->sc_dev, "Start IWL Error Log Dump:\n"); device_printf(sc->sc_dev, "Status: 0x%x, count: %d\n", sc->sc_flags, table.valid); } device_printf(sc->sc_dev, "0x%08X | %-28s\n", table.error_id, iwm_desc_lookup(table.error_id)); device_printf(sc->sc_dev, "%08X | uPc\n", table.pc); device_printf(sc->sc_dev, "%08X | branchlink1\n", table.blink1); device_printf(sc->sc_dev, "%08X | branchlink2\n", table.blink2); device_printf(sc->sc_dev, "%08X | interruptlink1\n", table.ilink1); device_printf(sc->sc_dev, "%08X | interruptlink2\n", table.ilink2); device_printf(sc->sc_dev, "%08X | data1\n", table.data1); device_printf(sc->sc_dev, "%08X | data2\n", table.data2); device_printf(sc->sc_dev, "%08X | data3\n", table.data3); device_printf(sc->sc_dev, "%08X | beacon time\n", table.bcon_time); device_printf(sc->sc_dev, "%08X | tsf low\n", table.tsf_low); device_printf(sc->sc_dev, "%08X | tsf hi\n", table.tsf_hi); device_printf(sc->sc_dev, "%08X | time gp1\n", table.gp1); device_printf(sc->sc_dev, "%08X | time gp2\n", table.gp2); device_printf(sc->sc_dev, "%08X | time gp3\n", table.gp3); device_printf(sc->sc_dev, "%08X | uCode version\n", table.ucode_ver); device_printf(sc->sc_dev, "%08X | hw version\n", table.hw_ver); device_printf(sc->sc_dev, "%08X | board version\n", table.brd_ver); device_printf(sc->sc_dev, "%08X | hcmd\n", table.hcmd); device_printf(sc->sc_dev, "%08X | isr0\n", table.isr0); device_printf(sc->sc_dev, "%08X | isr1\n", table.isr1); device_printf(sc->sc_dev, "%08X | isr2\n", table.isr2); device_printf(sc->sc_dev, "%08X | isr3\n", table.isr3); device_printf(sc->sc_dev, "%08X | isr4\n", table.isr4); device_printf(sc->sc_dev, "%08X | isr_pref\n", table.isr_pref); device_printf(sc->sc_dev, "%08X | wait_event\n", table.wait_event); device_printf(sc->sc_dev, "%08X | l2p_control\n", table.l2p_control); device_printf(sc->sc_dev, "%08X | l2p_duration\n", table.l2p_duration); device_printf(sc->sc_dev, "%08X | l2p_mhvalid\n", table.l2p_mhvalid); device_printf(sc->sc_dev, "%08X | l2p_addr_match\n", table.l2p_addr_match); device_printf(sc->sc_dev, "%08X | lmpm_pmg_sel\n", table.lmpm_pmg_sel); device_printf(sc->sc_dev, "%08X | timestamp\n", table.u_timestamp); device_printf(sc->sc_dev, "%08X | flow_handler\n", table.flow_handler); } #endif #define SYNC_RESP_STRUCT(_var_, _pkt_) \ do { \ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\ _var_ = (void *)((_pkt_)+1); \ } while (/*CONSTCOND*/0) #define SYNC_RESP_PTR(_ptr_, _len_, _pkt_) \ do { \ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\ _ptr_ = (void *)((_pkt_)+1); \ } while (/*CONSTCOND*/0) #define ADVANCE_RXQ(sc) (sc->rxq.cur = (sc->rxq.cur + 1) % IWM_RX_RING_COUNT); /* * Process an IWM_CSR_INT_BIT_FH_RX or IWM_CSR_INT_BIT_SW_RX interrupt. * Basic structure from if_iwn */ static void iwm_notif_intr(struct iwm_softc *sc) { uint16_t hw; bus_dmamap_sync(sc->rxq.stat_dma.tag, sc->rxq.stat_dma.map, BUS_DMASYNC_POSTREAD); hw = le16toh(sc->rxq.stat->closed_rb_num) & 0xfff; while (sc->rxq.cur != hw) { struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &sc->rxq.data[sc->rxq.cur]; struct iwm_rx_packet *pkt; struct iwm_cmd_response *cresp; int qid, idx; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); pkt = mtod(data->m, struct iwm_rx_packet *); qid = pkt->hdr.qid & ~0x80; idx = pkt->hdr.idx; IWM_DPRINTF(sc, IWM_DEBUG_INTR, "rx packet qid=%d idx=%d flags=%x type=%x %d %d\n", pkt->hdr.qid & ~0x80, pkt->hdr.idx, pkt->hdr.flags, pkt->hdr.code, sc->rxq.cur, hw); /* * randomly get these from the firmware, no idea why. * they at least seem harmless, so just ignore them for now */ if (__predict_false((pkt->hdr.code == 0 && qid == 0 && idx == 0) || pkt->len_n_flags == htole32(0x55550000))) { ADVANCE_RXQ(sc); continue; } switch (pkt->hdr.code) { case IWM_REPLY_RX_PHY_CMD: iwm_mvm_rx_rx_phy_cmd(sc, pkt, data); break; case IWM_REPLY_RX_MPDU_CMD: iwm_mvm_rx_rx_mpdu(sc, pkt, data); break; case IWM_TX_CMD: iwm_mvm_rx_tx_cmd(sc, pkt, data); break; case IWM_MISSED_BEACONS_NOTIFICATION: { struct iwm_missed_beacons_notif *resp; int missed; /* XXX look at mac_id to determine interface ID */ struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); SYNC_RESP_STRUCT(resp, pkt); missed = le32toh(resp->consec_missed_beacons); IWM_DPRINTF(sc, IWM_DEBUG_BEACON | IWM_DEBUG_STATE, "%s: MISSED_BEACON: mac_id=%d, " "consec_since_last_rx=%d, consec=%d, num_expect=%d " "num_rx=%d\n", __func__, le32toh(resp->mac_id), le32toh(resp->consec_missed_beacons_since_last_rx), le32toh(resp->consec_missed_beacons), le32toh(resp->num_expected_beacons), le32toh(resp->num_recvd_beacons)); /* Be paranoid */ if (vap == NULL) break; /* XXX no net80211 locking? */ if (vap->iv_state == IEEE80211_S_RUN && (ic->ic_flags & IEEE80211_F_SCAN) == 0) { if (missed > vap->iv_bmissthreshold) { /* XXX bad locking; turn into task */ IWM_UNLOCK(sc); ieee80211_beacon_miss(ic); IWM_LOCK(sc); } } break; } case IWM_MVM_ALIVE: { struct iwm_mvm_alive_resp *resp; SYNC_RESP_STRUCT(resp, pkt); sc->sc_uc.uc_error_event_table = le32toh(resp->error_event_table_ptr); sc->sc_uc.uc_log_event_table = le32toh(resp->log_event_table_ptr); sc->sched_base = le32toh(resp->scd_base_ptr); sc->sc_uc.uc_ok = resp->status == IWM_ALIVE_STATUS_OK; sc->sc_uc.uc_intr = 1; wakeup(&sc->sc_uc); break; } case IWM_CALIB_RES_NOTIF_PHY_DB: { struct iwm_calib_res_notif_phy_db *phy_db_notif; SYNC_RESP_STRUCT(phy_db_notif, pkt); iwm_phy_db_set_section(sc, phy_db_notif); break; } case IWM_STATISTICS_NOTIFICATION: { struct iwm_notif_statistics *stats; SYNC_RESP_STRUCT(stats, pkt); memcpy(&sc->sc_stats, stats, sizeof(sc->sc_stats)); sc->sc_noise = iwm_get_noise(&stats->rx.general); break; } case IWM_NVM_ACCESS_CMD: if (sc->sc_wantresp == ((qid << 16) | idx)) { bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); memcpy(sc->sc_cmd_resp, pkt, sizeof(sc->sc_cmd_resp)); } break; case IWM_PHY_CONFIGURATION_CMD: case IWM_TX_ANT_CONFIGURATION_CMD: case IWM_ADD_STA: case IWM_MAC_CONTEXT_CMD: case IWM_REPLY_SF_CFG_CMD: case IWM_POWER_TABLE_CMD: case IWM_PHY_CONTEXT_CMD: case IWM_BINDING_CONTEXT_CMD: case IWM_TIME_EVENT_CMD: case IWM_SCAN_REQUEST_CMD: case IWM_REPLY_BEACON_FILTERING_CMD: case IWM_MAC_PM_POWER_TABLE: case IWM_TIME_QUOTA_CMD: case IWM_REMOVE_STA: case IWM_TXPATH_FLUSH: case IWM_LQ_CMD: SYNC_RESP_STRUCT(cresp, pkt); if (sc->sc_wantresp == ((qid << 16) | idx)) { memcpy(sc->sc_cmd_resp, pkt, sizeof(*pkt)+sizeof(*cresp)); } break; /* ignore */ case 0x6c: /* IWM_PHY_DB_CMD, no idea why it's not in fw-api.h */ break; case IWM_INIT_COMPLETE_NOTIF: sc->sc_init_complete = 1; wakeup(&sc->sc_init_complete); break; case IWM_SCAN_COMPLETE_NOTIFICATION: { struct iwm_scan_complete_notif *notif; SYNC_RESP_STRUCT(notif, pkt); taskqueue_enqueue(sc->sc_tq, &sc->sc_es_task); break; } case IWM_REPLY_ERROR: { struct iwm_error_resp *resp; SYNC_RESP_STRUCT(resp, pkt); device_printf(sc->sc_dev, "firmware error 0x%x, cmd 0x%x\n", le32toh(resp->error_type), resp->cmd_id); break; } case IWM_TIME_EVENT_NOTIFICATION: { struct iwm_time_event_notif *notif; SYNC_RESP_STRUCT(notif, pkt); if (notif->status) { if (le32toh(notif->action) & IWM_TE_V2_NOTIF_HOST_EVENT_START) sc->sc_auth_prot = 2; else sc->sc_auth_prot = 0; } else { sc->sc_auth_prot = -1; } IWM_DPRINTF(sc, IWM_DEBUG_INTR, "%s: time event notification auth_prot=%d\n", __func__, sc->sc_auth_prot); wakeup(&sc->sc_auth_prot); break; } case IWM_MCAST_FILTER_CMD: break; default: device_printf(sc->sc_dev, "frame %d/%d %x UNHANDLED (this should " "not happen)\n", qid, idx, pkt->len_n_flags); break; } /* * Why test bit 0x80? The Linux driver: * * There is one exception: uCode sets bit 15 when it * originates the response/notification, i.e. when the * response/notification is not a direct response to a * command sent by the driver. For example, uCode issues * IWM_REPLY_RX when it sends a received frame to the driver; * it is not a direct response to any driver command. * * Ok, so since when is 7 == 15? Well, the Linux driver * uses a slightly different format for pkt->hdr, and "qid" * is actually the upper byte of a two-byte field. */ if (!(pkt->hdr.qid & (1 << 7))) { iwm_cmd_done(sc, pkt); } ADVANCE_RXQ(sc); } IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); /* * Tell the firmware what we have processed. * Seems like the hardware gets upset unless we align * the write by 8?? */ hw = (hw == 0) ? IWM_RX_RING_COUNT - 1 : hw - 1; IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, hw & ~7); } static void iwm_intr(void *arg) { struct iwm_softc *sc = arg; int handled = 0; int r1, r2, rv = 0; int isperiodic = 0; IWM_LOCK(sc); IWM_WRITE(sc, IWM_CSR_INT_MASK, 0); if (sc->sc_flags & IWM_FLAG_USE_ICT) { uint32_t *ict = sc->ict_dma.vaddr; int tmp; tmp = htole32(ict[sc->ict_cur]); if (!tmp) goto out_ena; /* * ok, there was something. keep plowing until we have all. */ r1 = r2 = 0; while (tmp) { r1 |= tmp; ict[sc->ict_cur] = 0; sc->ict_cur = (sc->ict_cur+1) % IWM_ICT_COUNT; tmp = htole32(ict[sc->ict_cur]); } /* this is where the fun begins. don't ask */ if (r1 == 0xffffffff) r1 = 0; /* i am not expected to understand this */ if (r1 & 0xc0000) r1 |= 0x8000; r1 = (0xff & r1) | ((0xff00 & r1) << 16); } else { r1 = IWM_READ(sc, IWM_CSR_INT); /* "hardware gone" (where, fishing?) */ if (r1 == 0xffffffff || (r1 & 0xfffffff0) == 0xa5a5a5a0) goto out; r2 = IWM_READ(sc, IWM_CSR_FH_INT_STATUS); } if (r1 == 0 && r2 == 0) { goto out_ena; } IWM_WRITE(sc, IWM_CSR_INT, r1 | ~sc->sc_intmask); /* ignored */ handled |= (r1 & (IWM_CSR_INT_BIT_ALIVE /*| IWM_CSR_INT_BIT_SCD*/)); if (r1 & IWM_CSR_INT_BIT_SW_ERR) { #ifdef IWM_DEBUG int i; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); iwm_nic_error(sc); /* Dump driver status (TX and RX rings) while we're here. */ device_printf(sc->sc_dev, "driver status:\n"); for (i = 0; i < IWM_MVM_MAX_QUEUES; i++) { struct iwm_tx_ring *ring = &sc->txq[i]; device_printf(sc->sc_dev, " tx ring %2d: qid=%-2d cur=%-3d " "queued=%-3d\n", i, ring->qid, ring->cur, ring->queued); } device_printf(sc->sc_dev, " rx ring: cur=%d\n", sc->rxq.cur); device_printf(sc->sc_dev, " 802.11 state %d\n", vap->iv_state); #endif device_printf(sc->sc_dev, "fatal firmware error\n"); iwm_stop(sc); rv = 1; goto out; } if (r1 & IWM_CSR_INT_BIT_HW_ERR) { handled |= IWM_CSR_INT_BIT_HW_ERR; device_printf(sc->sc_dev, "hardware error, stopping device\n"); iwm_stop(sc); rv = 1; goto out; } /* firmware chunk loaded */ if (r1 & IWM_CSR_INT_BIT_FH_TX) { IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_TX_MASK); handled |= IWM_CSR_INT_BIT_FH_TX; sc->sc_fw_chunk_done = 1; wakeup(&sc->sc_fw); } if (r1 & IWM_CSR_INT_BIT_RF_KILL) { handled |= IWM_CSR_INT_BIT_RF_KILL; if (iwm_check_rfkill(sc)) { device_printf(sc->sc_dev, "%s: rfkill switch, disabling interface\n", __func__); iwm_stop(sc); } } /* * The Linux driver uses periodic interrupts to avoid races. * We cargo-cult like it's going out of fashion. */ if (r1 & IWM_CSR_INT_BIT_RX_PERIODIC) { handled |= IWM_CSR_INT_BIT_RX_PERIODIC; IWM_WRITE(sc, IWM_CSR_INT, IWM_CSR_INT_BIT_RX_PERIODIC); if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) == 0) IWM_WRITE_1(sc, IWM_CSR_INT_PERIODIC_REG, IWM_CSR_INT_PERIODIC_DIS); isperiodic = 1; } if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) || isperiodic) { handled |= (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX); IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_RX_MASK); iwm_notif_intr(sc); /* enable periodic interrupt, see above */ if (r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX) && !isperiodic) IWM_WRITE_1(sc, IWM_CSR_INT_PERIODIC_REG, IWM_CSR_INT_PERIODIC_ENA); } if (__predict_false(r1 & ~handled)) IWM_DPRINTF(sc, IWM_DEBUG_INTR, "%s: unhandled interrupts: %x\n", __func__, r1); rv = 1; out_ena: iwm_restore_interrupts(sc); out: IWM_UNLOCK(sc); return; } /* * Autoconf glue-sniffing */ #define PCI_VENDOR_INTEL 0x8086 #define PCI_PRODUCT_INTEL_WL_3160_1 0x08b3 #define PCI_PRODUCT_INTEL_WL_3160_2 0x08b4 #define PCI_PRODUCT_INTEL_WL_7260_1 0x08b1 #define PCI_PRODUCT_INTEL_WL_7260_2 0x08b2 #define PCI_PRODUCT_INTEL_WL_7265_1 0x095a #define PCI_PRODUCT_INTEL_WL_7265_2 0x095b static const struct iwm_devices { uint16_t device; const char *name; } iwm_devices[] = { { PCI_PRODUCT_INTEL_WL_3160_1, "Intel Dual Band Wireless AC 3160" }, { PCI_PRODUCT_INTEL_WL_3160_2, "Intel Dual Band Wireless AC 3160" }, { PCI_PRODUCT_INTEL_WL_7260_1, "Intel Dual Band Wireless AC 7260" }, { PCI_PRODUCT_INTEL_WL_7260_2, "Intel Dual Band Wireless AC 7260" }, { PCI_PRODUCT_INTEL_WL_7265_1, "Intel Dual Band Wireless AC 7265" }, { PCI_PRODUCT_INTEL_WL_7265_2, "Intel Dual Band Wireless AC 7265" }, }; static int iwm_probe(device_t dev) { int i; for (i = 0; i < nitems(iwm_devices); i++) if (pci_get_vendor(dev) == PCI_VENDOR_INTEL && pci_get_device(dev) == iwm_devices[i].device) { device_set_desc(dev, iwm_devices[i].name); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int iwm_dev_check(device_t dev) { struct iwm_softc *sc; sc = device_get_softc(dev); switch (pci_get_device(dev)) { case PCI_PRODUCT_INTEL_WL_3160_1: case PCI_PRODUCT_INTEL_WL_3160_2: sc->sc_fwname = "iwm3160fw"; sc->host_interrupt_operation_mode = 1; return (0); case PCI_PRODUCT_INTEL_WL_7260_1: case PCI_PRODUCT_INTEL_WL_7260_2: sc->sc_fwname = "iwm7260fw"; sc->host_interrupt_operation_mode = 1; return (0); case PCI_PRODUCT_INTEL_WL_7265_1: case PCI_PRODUCT_INTEL_WL_7265_2: sc->sc_fwname = "iwm7265fw"; sc->host_interrupt_operation_mode = 0; return (0); default: device_printf(dev, "unknown adapter type\n"); return ENXIO; } } static int iwm_pci_attach(device_t dev) { struct iwm_softc *sc; int count, error, rid; uint16_t reg; sc = device_get_softc(dev); /* Clear device-specific "PCI retry timeout" register (41h). */ reg = pci_read_config(dev, 0x40, sizeof(reg)); pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg)); /* Enable bus-mastering and hardware bug workaround. */ pci_enable_busmaster(dev); reg = pci_read_config(dev, PCIR_STATUS, sizeof(reg)); /* if !MSI */ if (reg & PCIM_STATUS_INTxSTATE) { reg &= ~PCIM_STATUS_INTxSTATE; } pci_write_config(dev, PCIR_STATUS, reg, sizeof(reg)); rid = PCIR_BAR(0); sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_mem == NULL) { device_printf(sc->sc_dev, "can't map mem space\n"); return (ENXIO); } sc->sc_st = rman_get_bustag(sc->sc_mem); sc->sc_sh = rman_get_bushandle(sc->sc_mem); /* Install interrupt handler. */ count = 1; rid = 0; if (pci_alloc_msi(dev, &count) == 0) rid = 1; sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->sc_irq == NULL) { device_printf(dev, "can't map interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, iwm_intr, sc, &sc->sc_ih); if (sc->sc_ih == NULL) { device_printf(dev, "can't establish interrupt"); return (ENXIO); } sc->sc_dmat = bus_get_dma_tag(sc->sc_dev); return (0); } static void iwm_pci_detach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); if (sc->sc_irq != NULL) { bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->sc_irq), sc->sc_irq); pci_release_msi(dev); } if (sc->sc_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->sc_mem), sc->sc_mem); } static int iwm_attach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; int error; int txq_i, i; sc->sc_dev = dev; IWM_LOCK_INIT(sc); mbufq_init(&sc->sc_snd, ifqmaxlen); callout_init_mtx(&sc->sc_watchdog_to, &sc->sc_mtx, 0); TASK_INIT(&sc->sc_es_task, 0, iwm_endscan_cb, sc); sc->sc_tq = taskqueue_create("iwm_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->sc_tq); error = taskqueue_start_threads(&sc->sc_tq, 1, 0, "iwm_taskq"); if (error != 0) { device_printf(dev, "can't start threads, error %d\n", error); goto fail; } /* PCI attach */ error = iwm_pci_attach(dev); if (error != 0) goto fail; sc->sc_wantresp = -1; /* Check device type */ error = iwm_dev_check(dev); if (error != 0) goto fail; sc->sc_fwdmasegsz = IWM_FWDMASEGSZ; /* * We now start fiddling with the hardware */ sc->sc_hw_rev = IWM_READ(sc, IWM_CSR_HW_REV); if (iwm_prepare_card_hw(sc) != 0) { device_printf(dev, "could not initialize hardware\n"); goto fail; } /* Allocate DMA memory for firmware transfers. */ if ((error = iwm_alloc_fwmem(sc)) != 0) { device_printf(dev, "could not allocate memory for firmware\n"); goto fail; } /* Allocate "Keep Warm" page. */ if ((error = iwm_alloc_kw(sc)) != 0) { device_printf(dev, "could not allocate keep warm page\n"); goto fail; } /* We use ICT interrupts */ if ((error = iwm_alloc_ict(sc)) != 0) { device_printf(dev, "could not allocate ICT table\n"); goto fail; } /* Allocate TX scheduler "rings". */ if ((error = iwm_alloc_sched(sc)) != 0) { device_printf(dev, "could not allocate TX scheduler rings\n"); goto fail; } /* Allocate TX rings */ for (txq_i = 0; txq_i < nitems(sc->txq); txq_i++) { if ((error = iwm_alloc_tx_ring(sc, &sc->txq[txq_i], txq_i)) != 0) { device_printf(dev, "could not allocate TX ring %d\n", txq_i); goto fail; } } /* Allocate RX ring. */ if ((error = iwm_alloc_rx_ring(sc, &sc->rxq)) != 0) { device_printf(dev, "could not allocate RX ring\n"); goto fail; } /* Clear pending interrupts. */ IWM_WRITE(sc, IWM_CSR_INT, 0xffffffff); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(sc->sc_dev); ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ ic->ic_opmode = IEEE80211_M_STA; /* default to BSS mode */ /* Set device capabilities. */ ic->ic_caps = IEEE80211_C_STA | IEEE80211_C_WPA | /* WPA/RSN */ IEEE80211_C_WME | IEEE80211_C_SHSLOT | /* short slot time supported */ IEEE80211_C_SHPREAMBLE /* short preamble supported */ // IEEE80211_C_BGSCAN /* capable of bg scanning */ ; for (i = 0; i < nitems(sc->sc_phyctxt); i++) { sc->sc_phyctxt[i].id = i; sc->sc_phyctxt[i].color = 0; sc->sc_phyctxt[i].ref = 0; sc->sc_phyctxt[i].channel = NULL; } /* Max RSSI */ sc->sc_max_rssi = IWM_MAX_DBM - IWM_MIN_DBM; sc->sc_preinit_hook.ich_func = iwm_preinit; sc->sc_preinit_hook.ich_arg = sc; if (config_intrhook_establish(&sc->sc_preinit_hook) != 0) { device_printf(dev, "config_intrhook_establish failed\n"); goto fail; } #ifdef IWM_DEBUG SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &sc->sc_debug, 0, "control debugging"); #endif IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "<-%s\n", __func__); return 0; /* Free allocated memory if something failed during attachment. */ fail: iwm_detach_local(sc, 0); return ENXIO; } static int iwm_update_edca(struct ieee80211com *ic) { struct iwm_softc *sc = ic->ic_softc; device_printf(sc->sc_dev, "%s: called\n", __func__); return (0); } static void iwm_preinit(void *arg) { struct iwm_softc *sc = arg; device_t dev = sc->sc_dev; struct ieee80211com *ic = &sc->sc_ic; int error; IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s\n", __func__); IWM_LOCK(sc); if ((error = iwm_start_hw(sc)) != 0) { device_printf(dev, "could not initialize hardware\n"); IWM_UNLOCK(sc); goto fail; } error = iwm_run_init_mvm_ucode(sc, 1); iwm_stop_device(sc); if (error) { IWM_UNLOCK(sc); goto fail; } device_printf(dev, "revision: 0x%x, firmware %d.%d (API ver. %d)\n", sc->sc_hw_rev & IWM_CSR_HW_REV_TYPE_MSK, IWM_UCODE_MAJOR(sc->sc_fwver), IWM_UCODE_MINOR(sc->sc_fwver), IWM_UCODE_API(sc->sc_fwver)); /* not all hardware can do 5GHz band */ if (!sc->sc_nvm.sku_cap_band_52GHz_enable) memset(&ic->ic_sup_rates[IEEE80211_MODE_11A], 0, sizeof(ic->ic_sup_rates[IEEE80211_MODE_11A])); IWM_UNLOCK(sc); /* * At this point we've committed - if we fail to do setup, * we now also have to tear down the net80211 state. */ ieee80211_ifattach(ic); ic->ic_vap_create = iwm_vap_create; ic->ic_vap_delete = iwm_vap_delete; ic->ic_raw_xmit = iwm_raw_xmit; ic->ic_node_alloc = iwm_node_alloc; ic->ic_scan_start = iwm_scan_start; ic->ic_scan_end = iwm_scan_end; ic->ic_update_mcast = iwm_update_mcast; ic->ic_set_channel = iwm_set_channel; ic->ic_scan_curchan = iwm_scan_curchan; ic->ic_scan_mindwell = iwm_scan_mindwell; ic->ic_wme.wme_update = iwm_update_edca; ic->ic_parent = iwm_parent; ic->ic_transmit = iwm_transmit; iwm_radiotap_attach(sc); if (bootverbose) ieee80211_announce(ic); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "<-%s\n", __func__); config_intrhook_disestablish(&sc->sc_preinit_hook); return; fail: config_intrhook_disestablish(&sc->sc_preinit_hook); iwm_detach_local(sc, 0); } /* * Attach the interface to 802.11 radiotap. */ static void iwm_radiotap_attach(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s begin\n", __func__); ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), IWM_TX_RADIOTAP_PRESENT, &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap), IWM_RX_RADIOTAP_PRESENT); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s end\n", __func__); } static struct ieee80211vap * iwm_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct iwm_vap *ivp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; ivp = malloc(sizeof(struct iwm_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &ivp->iv_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); vap->iv_bmissthreshold = 10; /* override default */ /* Override with driver methods. */ ivp->iv_newstate = vap->iv_newstate; vap->iv_newstate = iwm_newstate; ieee80211_ratectl_init(vap); /* Complete setup. */ ieee80211_vap_attach(vap, iwm_media_change, ieee80211_media_status, mac); ic->ic_opmode = opmode; return vap; } static void iwm_vap_delete(struct ieee80211vap *vap) { struct iwm_vap *ivp = IWM_VAP(vap); ieee80211_ratectl_deinit(vap); ieee80211_vap_detach(vap); free(ivp, M_80211_VAP); } static void iwm_scan_start(struct ieee80211com *ic) { struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwm_softc *sc = ic->ic_softc; int error; if (sc->sc_scanband) return; IWM_LOCK(sc); error = iwm_mvm_scan_request(sc, IEEE80211_CHAN_2GHZ, 0, NULL, 0); if (error) { device_printf(sc->sc_dev, "could not initiate scan\n"); IWM_UNLOCK(sc); ieee80211_cancel_scan(vap); } else IWM_UNLOCK(sc); } static void iwm_scan_end(struct ieee80211com *ic) { } static void iwm_update_mcast(struct ieee80211com *ic) { } static void iwm_set_channel(struct ieee80211com *ic) { } static void iwm_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { } static void iwm_scan_mindwell(struct ieee80211_scan_state *ss) { return; } void iwm_init_task(void *arg1) { struct iwm_softc *sc = arg1; IWM_LOCK(sc); while (sc->sc_flags & IWM_FLAG_BUSY) msleep(&sc->sc_flags, &sc->sc_mtx, 0, "iwmpwr", 0); sc->sc_flags |= IWM_FLAG_BUSY; iwm_stop(sc); if (sc->sc_ic.ic_nrunning > 0) iwm_init(sc); sc->sc_flags &= ~IWM_FLAG_BUSY; wakeup(&sc->sc_flags); IWM_UNLOCK(sc); } static int iwm_resume(device_t dev) { uint16_t reg; /* Clear device-specific "PCI retry timeout" register (41h). */ reg = pci_read_config(dev, 0x40, sizeof(reg)); pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg)); iwm_init_task(device_get_softc(dev)); return 0; } static int iwm_suspend(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); if (sc->sc_ic.ic_nrunning > 0) { IWM_LOCK(sc); iwm_stop(sc); IWM_UNLOCK(sc); } return (0); } static int iwm_detach_local(struct iwm_softc *sc, int do_net80211) { + struct iwm_fw_info *fw = &sc->sc_fw; device_t dev = sc->sc_dev; int i; if (sc->sc_tq) { taskqueue_drain_all(sc->sc_tq); taskqueue_free(sc->sc_tq); } callout_drain(&sc->sc_watchdog_to); iwm_stop_device(sc); if (do_net80211) ieee80211_ifdetach(&sc->sc_ic); /* Free descriptor rings */ for (i = 0; i < nitems(sc->txq); i++) iwm_free_tx_ring(sc, &sc->txq[i]); + + /* Free firmware */ + if (fw->fw_fp != NULL) + iwm_fw_info_free(fw); /* Free scheduler */ iwm_free_sched(sc); if (sc->ict_dma.vaddr != NULL) iwm_free_ict(sc); if (sc->kw_dma.vaddr != NULL) iwm_free_kw(sc); if (sc->fw_dma.vaddr != NULL) iwm_free_fwmem(sc); /* Finished with the hardware - detach things */ iwm_pci_detach(dev); mbufq_drain(&sc->sc_snd); IWM_LOCK_DESTROY(sc); return (0); } static int iwm_detach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); return (iwm_detach_local(sc, 1)); } static device_method_t iwm_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, iwm_probe), DEVMETHOD(device_attach, iwm_attach), DEVMETHOD(device_detach, iwm_detach), DEVMETHOD(device_suspend, iwm_suspend), DEVMETHOD(device_resume, iwm_resume), DEVMETHOD_END }; static driver_t iwm_pci_driver = { "iwm", iwm_pci_methods, sizeof (struct iwm_softc) }; static devclass_t iwm_devclass; DRIVER_MODULE(iwm, pci, iwm_pci_driver, iwm_devclass, NULL, NULL); MODULE_DEPEND(iwm, firmware, 1, 1, 1); MODULE_DEPEND(iwm, pci, 1, 1, 1); MODULE_DEPEND(iwm, wlan, 1, 1, 1); Index: projects/release-pkg/sys/kern/vfs_default.c =================================================================== --- projects/release-pkg/sys/kern/vfs_default.c (revision 293224) +++ projects/release-pkg/sys/kern/vfs_default.c (revision 293225) @@ -1,1307 +1,1301 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed * to Berkeley by John Heidemann of the UCLA Ficus project. * * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int vop_nolookup(struct vop_lookup_args *); static int vop_norename(struct vop_rename_args *); static int vop_nostrategy(struct vop_strategy_args *); static int get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, int dirbuflen, off_t *off, char **cpos, int *len, int *eofflag, struct thread *td); static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td); #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) static int vop_stdis_text(struct vop_is_text_args *ap); static int vop_stdset_text(struct vop_set_text_args *ap); static int vop_stdunset_text(struct vop_unset_text_args *ap); static int vop_stdget_writecount(struct vop_get_writecount_args *ap); static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't * implement a particular VOP. * * If there is no specific entry here, we will return EOPNOTSUPP. * * Note that every filesystem has to implement either vop_access * or vop_accessx; failing to do so will result in immediate crash * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), * which calls vop_stdaccess() etc. */ struct vop_vector default_vnodeops = { .vop_default = NULL, .vop_bypass = VOP_EOPNOTSUPP, .vop_access = vop_stdaccess, .vop_accessx = vop_stdaccessx, .vop_advise = vop_stdadvise, .vop_advlock = vop_stdadvlock, .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, .vop_getpages = vop_stdgetpages, .vop_getpages_async = vop_stdgetpages_async, .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, .vop_ioctl = VOP_ENOTTY, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, .vop_lock1 = vop_stdlock, .vop_lookup = vop_nolookup, .vop_open = VOP_NULL, .vop_pathconf = VOP_EINVAL, .vop_poll = vop_nopoll, .vop_putpages = vop_stdputpages, .vop_readlink = VOP_EINVAL, .vop_rename = vop_norename, .vop_revoke = VOP_PANIC, .vop_strategy = vop_nostrategy, .vop_unlock = vop_stdunlock, .vop_vptocnp = vop_stdvptocnp, .vop_vptofh = vop_stdvptofh, .vop_unp_bind = vop_stdunp_bind, .vop_unp_connect = vop_stdunp_connect, .vop_unp_detach = vop_stdunp_detach, .vop_is_text = vop_stdis_text, .vop_set_text = vop_stdset_text, .vop_unset_text = vop_stdunset_text, .vop_get_writecount = vop_stdget_writecount, .vop_add_writecount = vop_stdadd_writecount, }; /* * Series of placeholder functions for various error returns for * VOPs. */ int vop_eopnotsupp(struct vop_generic_args *ap) { /* printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); */ return (EOPNOTSUPP); } int vop_ebadf(struct vop_generic_args *ap) { return (EBADF); } int vop_enotty(struct vop_generic_args *ap) { return (ENOTTY); } int vop_einval(struct vop_generic_args *ap) { return (EINVAL); } int vop_enoent(struct vop_generic_args *ap) { return (ENOENT); } int vop_null(struct vop_generic_args *ap) { return (0); } /* * Helper function to panic on some bad VOPs in some filesystems. */ int vop_panic(struct vop_generic_args *ap) { panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); } /* * vop_std and vop_no are default functions for use by * filesystems that need the "default reasonable" implementation for a * particular operation. * * The documentation for the operations they implement exists (if it exists) * in the VOP_(9) manpage (all uppercase). */ /* * Default vop for filesystems that do not support name lookup */ static int vop_nolookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * vop_norename: * * Handle unlock and reference counting for arguments of vop_rename * for filesystems that do not implement rename operation. */ static int vop_norename(struct vop_rename_args *ap) { vop_rename_fail(ap); return (EOPNOTSUPP); } /* * vop_nostrategy: * * Strategy routine for VFS devices that have none. * * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy * routine. Typically this is done for a BIO_READ strategy call. * Typically B_INVAL is assumed to already be clear prior to a write * and should not be cleared manually unless you just made the buffer * invalid. BIO_ERROR should be cleared either way. */ static int vop_nostrategy (struct vop_strategy_args *ap) { printf("No strategy for buffer at %p\n", ap->a_bp); vprint("vnode", ap->a_vp); ap->a_bp->b_ioflags |= BIO_ERROR; ap->a_bp->b_error = EOPNOTSUPP; bufdone(ap->a_bp); return (EOPNOTSUPP); } static int get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, int dirbuflen, off_t *off, char **cpos, int *len, int *eofflag, struct thread *td) { int error, reclen; struct uio uio; struct iovec iov; struct dirent *dp; KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); if (*len == 0) { iov.iov_base = dirbuf; iov.iov_len = dirbuflen; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = *off; uio.uio_resid = dirbuflen; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; *eofflag = 0; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, NULL, NULL); if (error) return (error); *off = uio.uio_offset; *cpos = dirbuf; *len = (dirbuflen - uio.uio_resid); if (*len == 0) return (ENOENT); } dp = (struct dirent *)(*cpos); reclen = dp->d_reclen; *dpp = dp; /* check for malformed directory.. */ if (reclen < DIRENT_MINSIZE) return (EINVAL); *cpos += reclen; *len -= reclen; return (0); } /* * Check if a named file exists in a given directory vnode. */ static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) { char *dirbuf, *cpos; int error, eofflag, dirbuflen, len, found; off_t off; struct dirent *dp; struct vattr va; KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); found = 0; error = VOP_GETATTR(vp, &va, td->td_ucred); if (error) return (found); dirbuflen = DEV_BSIZE; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); off = 0; len = 0; do { error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, &cpos, &len, &eofflag, td); if (error) goto out; if (dp->d_type != DT_WHT && dp->d_fileno != 0 && strcmp(dp->d_name, dirname) == 0) { found = 1; goto out; } } while (len > 0 || !eofflag); out: free(dirbuf, M_TEMP); return (found); } int vop_stdaccess(struct vop_access_args *ap) { KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); } int vop_stdaccessx(struct vop_accessx_args *ap) { int error; accmode_t accmode = ap->a_accmode; error = vfs_unixify_accmode(&accmode); if (error != 0) return (error); if (accmode == 0) return (0); return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); } /* * Advisory record locking support */ int vop_stdadvlock(struct vop_advlock_args *ap) { struct vnode *vp; struct vattr vattr; int error; vp = ap->a_vp; if (ap->a_fl->l_whence == SEEK_END) { /* * The NFSv4 server must avoid doing a vn_lock() here, since it * can deadlock the nfsd threads, due to a LOR. Fortunately * the NFSv4 server always uses SEEK_SET and this code is * only required for the SEEK_END case. */ vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); VOP_UNLOCK(vp, 0); if (error) return (error); } else vattr.va_size = 0; return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); } int vop_stdadvlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp; struct vattr vattr; int error; vp = ap->a_vp; if (ap->a_fl->l_whence == SEEK_END) { /* The size argument is only needed for SEEK_END. */ vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); VOP_UNLOCK(vp, 0); if (error) return (error); } else vattr.va_size = 0; return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); } int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) { struct vnode *vp; vp = ap->a_vp; lf_purgelocks(vp, &vp->v_lockf); return (0); } /* * vop_stdpathconf: * * Standard implementation of POSIX pathconf, to get information about limits * for a filesystem. * Override per filesystem for the case where the filesystem has smaller * limits. */ int vop_stdpathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_MAX_CANON: *ap->a_retval = MAX_CANON; return (0); case _PC_MAX_INPUT: *ap->a_retval = MAX_INPUT; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_VDISABLE: *ap->a_retval = _POSIX_VDISABLE; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Standard lock, unlock and islocked functions. */ int vop_stdlock(ap) struct vop_lock1_args /* { struct vnode *a_vp; int a_flags; char *file; int line; } */ *ap; { struct vnode *vp = ap->a_vp; return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, ap->a_line)); } /* See above. */ int vop_stdunlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; int a_flags; } */ *ap; { struct vnode *vp = ap->a_vp; return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp))); } /* See above. */ int vop_stdislocked(ap) struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; { return (lockstatus(ap->a_vp->v_vnlock)); } /* * Return true for select/poll. */ int vop_nopoll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct thread *a_td; } */ *ap; { return (poll_no_poll(ap->a_events)); } /* * Implement poll for local filesystems that support it. */ int vop_stdpoll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct thread *a_td; } */ *ap; { if (ap->a_events & ~POLLSTANDARD) return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); } /* * Return our mount point, as we will take charge of the writes. */ int vop_stdgetwritemount(ap) struct vop_getwritemount_args /* { struct vnode *a_vp; struct mount **a_mpp; } */ *ap; { struct mount *mp; /* * XXX Since this is called unlocked we may be recycled while * attempting to ref the mount. If this is the case or mountpoint * will be set to NULL. We only have to prevent this call from * returning with a ref to an incorrect mountpoint. It is not * harmful to return with a ref to our previous mountpoint. */ mp = ap->a_vp->v_mount; if (mp != NULL) { vfs_ref(mp); if (mp != ap->a_vp->v_mount) { vfs_rel(mp); mp = NULL; } } *(ap->a_mpp) = mp; return (0); } /* XXX Needs good comment and VOP_BMAP(9) manpage */ int vop_stdbmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct bufobj **a_bop; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_bop != NULL) *ap->a_bop = &ap->a_vp->v_bufobj; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } int vop_stdfsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct buf *bp; struct bufobj *bo; struct buf *nbp; int error = 0; int maxretry = 1000; /* large, arbitrarily chosen */ bo = &vp->v_bufobj; BO_LOCK(bo); loop1: /* * MARK/SCAN initialization to avoid infinite loops. */ TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { bp->b_vflags &= ~BV_SCANNED; bp->b_error = 0; } /* * Flush all dirty buffers associated with a vnode. */ loop2: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if ((bp->b_vflags & BV_SCANNED) != 0) continue; bp->b_vflags |= BV_SCANNED; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (ap->a_waitfor != MNT_WAIT) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL, BO_LOCKPTR(bo)) != 0) { BO_LOCK(bo); goto loop1; } BO_LOCK(bo); } BO_UNLOCK(bo); KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); if ((bp->b_flags & B_DELWRI) == 0) panic("fsync: not dirty"); if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { vfs_bio_awrite(bp); } else { bremfree(bp); bawrite(bp); } BO_LOCK(bo); goto loop2; } /* * If synchronous the caller expects us to completely resolve all * dirty buffers in the system. Wait for in-progress I/O to * complete (which could include background bitmap writes), then * retry if dirty blocks still exist. */ if (ap->a_waitfor == MNT_WAIT) { bufobj_wwait(bo, 0, 0); if (bo->bo_dirty.bv_cnt > 0) { /* * If we are unable to write any of these buffers * then we fail now rather than trying endlessly * to write them out. */ TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) if ((error = bp->b_error) == 0) continue; if (error == 0 && --maxretry >= 0) goto loop1; error = EAGAIN; } } BO_UNLOCK(bo); if (error == EAGAIN) vprint("fsync: giving up on dirty", vp); return (error); } /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ int vop_stdgetpages(ap) struct vop_getpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int *a_rbehind; int *a_rahead; } */ *ap; { return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); } static int vop_stdgetpages_async(struct vop_getpages_async_args *ap) { int error; error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead); ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); return (error); } int vop_stdkqfilter(struct vop_kqfilter_args *ap) { return vfs_kqfilter(ap); } /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ int vop_stdputpages(ap) struct vop_putpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_sync; int *a_rtvals; } */ *ap; { return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals); } int vop_stdvptofh(struct vop_vptofh_args *ap) { return (EOPNOTSUPP); } int vop_stdvptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct ucred *cred = ap->a_cred; char *buf = ap->a_buf; int *buflen = ap->a_buflen; char *dirbuf, *cpos; int i, error, eofflag, dirbuflen, flags, locked, len, covered; off_t off; ino_t fileno; struct vattr va; struct nameidata nd; struct thread *td; struct dirent *dp; struct vnode *mvp; i = *buflen; error = 0; covered = 0; td = curthread; if (vp->v_type != VDIR) return (ENOENT); error = VOP_GETATTR(vp, &va, cred); if (error) return (error); VREF(vp); locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, "..", vp, td); flags = FREAD; error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); if (error) { vn_lock(vp, locked | LK_RETRY); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); mvp = *dvp = nd.ni_vp; if (vp->v_mount != (*dvp)->v_mount && ((*dvp)->v_vflag & VV_ROOT) && ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { *dvp = (*dvp)->v_mount->mnt_vnodecovered; VREF(mvp); VOP_UNLOCK(mvp, 0); vn_close(mvp, FREAD, cred, td); VREF(*dvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); covered = 1; } fileno = va.va_fileid; dirbuflen = DEV_BSIZE; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); if ((*dvp)->v_type != VDIR) { error = ENOENT; goto out; } off = 0; len = 0; do { /* call VOP_READDIR of parent */ error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, &cpos, &len, &eofflag, td); if (error) goto out; if ((dp->d_type != DT_WHT) && (dp->d_fileno == fileno)) { if (covered) { VOP_UNLOCK(*dvp, 0); vn_lock(mvp, LK_SHARED | LK_RETRY); if (dirent_exists(mvp, dp->d_name, td)) { error = ENOENT; VOP_UNLOCK(mvp, 0); vn_lock(*dvp, LK_SHARED | LK_RETRY); goto out; } VOP_UNLOCK(mvp, 0); vn_lock(*dvp, LK_SHARED | LK_RETRY); } i -= dp->d_namlen; if (i < 0) { error = ENOMEM; goto out; } if (dp->d_namlen == 1 && dp->d_name[0] == '.') { error = ENOENT; } else { bcopy(dp->d_name, buf + i, dp->d_namlen); error = 0; } goto out; } } while (len > 0 || !eofflag); error = ENOENT; out: free(dirbuf, M_TEMP); if (!error) { *buflen = i; vref(*dvp); } if (covered) { vput(*dvp); vrele(mvp); } else { VOP_UNLOCK(mvp, 0); vn_close(mvp, FREAD, cred, td); } vn_lock(vp, locked | LK_RETRY); return (error); } int vop_stdallocate(struct vop_allocate_args *ap) { #ifdef __notyet__ struct statfs sfs; #endif struct iovec aiov; struct vattr vattr, *vap; struct uio auio; off_t fsize, len, cur, offset; uint8_t *buf; struct thread *td; struct vnode *vp; size_t iosize; int error; buf = NULL; error = 0; td = curthread; vap = &vattr; vp = ap->a_vp; len = *ap->a_len; offset = *ap->a_offset; error = VOP_GETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; fsize = vap->va_size; iosize = vap->va_blocksize; if (iosize == 0) iosize = BLKDEV_IOSIZE; if (iosize > MAXPHYS) iosize = MAXPHYS; buf = malloc(iosize, M_TEMP, M_WAITOK); #ifdef __notyet__ /* * Check if the filesystem sets f_maxfilesize; if not use * VOP_SETATTR to perform the check. */ error = VFS_STATFS(vp->v_mount, &sfs, td); if (error != 0) goto out; if (sfs.f_maxfilesize) { if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize || offset + len > sfs.f_maxfilesize) { error = EFBIG; goto out; } } else #endif if (offset + len > vap->va_size) { /* * Test offset + len against the filesystem's maxfilesize. */ VATTR_NULL(vap); vap->va_size = offset + len; error = VOP_SETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; VATTR_NULL(vap); vap->va_size = fsize; error = VOP_SETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; } for (;;) { /* * Read and write back anything below the nominal file * size. There's currently no way outside the filesystem * to know whether this area is sparse or not. */ cur = iosize; if ((offset % iosize) != 0) cur -= (offset % iosize); if (cur > len) cur = len; if (offset < fsize) { aiov.iov_base = buf; aiov.iov_len = cur; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = offset; auio.uio_resid = cur; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; error = VOP_READ(vp, &auio, 0, td->td_ucred); if (error != 0) break; if (auio.uio_resid > 0) { bzero(buf + cur - auio.uio_resid, auio.uio_resid); } } else { bzero(buf, cur); } aiov.iov_base = buf; aiov.iov_len = cur; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = offset; auio.uio_resid = cur; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_td = td; error = VOP_WRITE(vp, &auio, 0, td->td_ucred); if (error != 0) break; len -= cur; offset += cur; if (len == 0) break; if (should_yield()) break; } out: *ap->a_len = len; *ap->a_offset = offset; free(buf, M_TEMP); return (error); } int vop_stdadvise(struct vop_advise_args *ap) { struct vnode *vp; struct bufobj *bo; daddr_t startn, endn; off_t start, end; int bsize, error; vp = ap->a_vp; switch (ap->a_advice) { case POSIX_FADV_WILLNEED: /* * Do nothing for now. Filesystems should provide a * custom method which starts an asynchronous read of * the requested region. */ error = 0; break; case POSIX_FADV_DONTNEED: error = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); break; } /* * Deactivate pages in the specified range from the backing VM * object. Pages that are resident in the buffer cache will * remain wired until their corresponding buffers are released * below. */ if (vp->v_object != NULL) { start = trunc_page(ap->a_start); end = round_page(ap->a_end); VM_OBJECT_WLOCK(vp->v_object); vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), OFF_TO_IDX(end)); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_RLOCK(bo); bsize = vp->v_bufobj.bo_bsize; startn = ap->a_start / bsize; endn = ap->a_end / bsize; - for (;;) { - error = bnoreuselist(&bo->bo_clean, bo, startn, endn); - if (error == EAGAIN) - continue; + error = bnoreuselist(&bo->bo_clean, bo, startn, endn); + if (error == 0) error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); - if (error == EAGAIN) - continue; - break; - } BO_RUNLOCK(bo); VOP_UNLOCK(vp, 0); break; default: error = EINVAL; break; } return (error); } int vop_stdunp_bind(struct vop_unp_bind_args *ap) { ap->a_vp->v_socket = ap->a_socket; return (0); } int vop_stdunp_connect(struct vop_unp_connect_args *ap) { *ap->a_socket = ap->a_vp->v_socket; return (0); } int vop_stdunp_detach(struct vop_unp_detach_args *ap) { ap->a_vp->v_socket = NULL; return (0); } static int vop_stdis_text(struct vop_is_text_args *ap) { return ((ap->a_vp->v_vflag & VV_TEXT) != 0); } static int vop_stdset_text(struct vop_set_text_args *ap) { ap->a_vp->v_vflag |= VV_TEXT; return (0); } static int vop_stdunset_text(struct vop_unset_text_args *ap) { ap->a_vp->v_vflag &= ~VV_TEXT; return (0); } static int vop_stdget_writecount(struct vop_get_writecount_args *ap) { *ap->a_writecount = ap->a_vp->v_writecount; return (0); } static int vop_stdadd_writecount(struct vop_add_writecount_args *ap) { ap->a_vp->v_writecount += ap->a_inc; return (0); } /* * vfs default ops * used to fill the vfs function table to get reasonable default return values. */ int vfs_stdroot (mp, flags, vpp) struct mount *mp; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdstatfs (mp, sbp) struct mount *mp; struct statfs *sbp; { return (EOPNOTSUPP); } int vfs_stdquotactl (mp, cmds, uid, arg) struct mount *mp; int cmds; uid_t uid; void *arg; { return (EOPNOTSUPP); } int vfs_stdsync(mp, waitfor) struct mount *mp; int waitfor; { struct vnode *vp, *mvp; struct thread *td; int error, lockreq, allerror = 0; td = curthread; lockreq = LK_EXCLUSIVE | LK_INTERLOCK; if (waitfor != MNT_WAIT) lockreq |= LK_NOWAIT; /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, lockreq, td)) != 0) { if (error == ENOENT) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; vput(vp); } return (allerror); } int vfs_stdnosync (mp, waitfor) struct mount *mp; int waitfor; { return (0); } int vfs_stdvget (mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdfhtovp (mp, fhp, flags, vpp) struct mount *mp; struct fid *fhp; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdinit (vfsp) struct vfsconf *vfsp; { return (0); } int vfs_stduninit (vfsp) struct vfsconf *vfsp; { return(0); } int vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) struct mount *mp; int cmd; struct vnode *filename_vp; int attrnamespace; const char *attrname; { if (filename_vp != NULL) VOP_UNLOCK(filename_vp, 0); return (EOPNOTSUPP); } int vfs_stdsysctl(mp, op, req) struct mount *mp; fsctlop_t op; struct sysctl_req *req; { return (EOPNOTSUPP); } /* end of vfs default ops */ Index: projects/release-pkg/sys/kern/vfs_subr.c =================================================================== --- projects/release-pkg/sys/kern/vfs_subr.c (revision 293224) +++ projects/release-pkg/sys/kern/vfs_subr.c (revision 293225) @@ -1,5228 +1,5232 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ /* * External virtual filesystem routines */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void v_init_counters(struct vnode *); static void v_incr_usecount(struct vnode *); static void v_incr_devcount(struct vnode *); static void v_decr_devcount(struct vnode *); static void vnlru_free(int); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); static void destroy_vpollinfo(struct vpollinfo *vi); /* * Number of vnodes in existence. Increased whenever getnewvnode() * allocates a new vnode, decreased in vdropl() for VI_DOOMED vnode. */ static unsigned long numvnodes; SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes in existence"); static u_long vnodes_created; SYSCTL_ULONG(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, 0, "Number of vnodes created by getnewvnode"); /* * Conversion tables for conversion from vnode types to inode formats * and back. */ enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[10] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT }; /* * List of vnodes that are ready for recycling. */ static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* * "Free" vnode target. Free vnodes are rarely completely free, but are * just ones that are cheap to recycle. Usually they are for files which * have been stat'd but not read; these usually have inode and namecache * data attached to them. This target is the preferred minimum size of a * sub-cache consisting mostly of such files. The system balances the size * of this sub-cache with its complement to try to prevent either from * thrashing while the other is relatively inactive. The targets express * a preference for the best balance. * * "Above" this target there are 2 further targets (watermarks) related * to recyling of free vnodes. In the best-operating case, the cache is * exactly full, the free list has size between vlowat and vhiwat above the * free target, and recycling from it and normal use maintains this state. * Sometimes the free list is below vlowat or even empty, but this state * is even better for immediate use provided the cache is not full. * Otherwise, vnlru_proc() runs to reclaim enough vnodes (usually non-free * ones) to reach one of these states. The watermarks are currently hard- * coded as 4% and 9% of the available space higher. These and the default * of 25% for wantfreevnodes are too large if the memory size is large. * E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim * whenever vnlru_proc() becomes active. */ static u_long wantfreevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Target for minimum number of \"free\" vnodes"); static u_long freevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "Number of \"free\" vnodes"); static u_long recycles_count; SYSCTL_ULONG(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, 0, "Number of vnodes recycled to meet vnode cache targets"); /* * Various variables used for debugging the new implementation of * reassignbuf(). * XXX these are probably of (very) limited utility now. */ static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "Number of calls to reassignbuf"); static u_long free_owe_inact; SYSCTL_ULONG(_vfs, OID_AUTO, free_owe_inact, CTLFLAG_RD, &free_owe_inact, 0, "Number of times free vnodes kept on active list due to VFS " "owing inactivation"); /* To keep more than one thread at a time from running vfs_getnewfsid */ static struct mtx mntid_mtx; /* * Lock for any access to the following: * vnode_free_list * numvnodes * freevnodes */ static struct mtx vnode_free_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; static uma_zone_t buf_trie_zone; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); static struct synclist *syncer_workitem_pending; /* * The sync_mtx protects: * bo->bo_synclist * sync_vnode_count * syncer_delayno * syncer_state * syncer_workitem_pending * syncer_worklist_len * rushjob */ static struct mtx sync_mtx; static struct cv sync_wakeup; #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "Time to delay syncing files (in seconds)"); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "Time to delay syncing directories (in seconds)"); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "Time to delay syncing metadata (in seconds)"); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "Number of times I/O speeded up (rush requests)"); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 static int sync_vnode_count; static int syncer_worklist_len; static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } syncer_state; /* Target for maximum number of vnodes. */ int desiredvnodes; static int gapvnodes; /* gap between wanted and desired */ static int vhiwat; /* enough extras after expansion */ static int vlowat; /* minimal extras before expansion */ static int vstir; /* nonzero to stir non-free vnodes */ static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */ static int sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS) { int error, old_desiredvnodes; old_desiredvnodes = desiredvnodes; if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0) return (error); if (old_desiredvnodes != desiredvnodes) { wantfreevnodes = desiredvnodes / 4; /* XXX locking seems to be incomplete. */ vfs_hash_changesize(desiredvnodes); cache_changesize(desiredvnodes); } return (0); } SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &desiredvnodes, 0, sysctl_update_desiredvnodes, "I", "Target for maximum number of vnodes"); SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)"); static int vnlru_nowhere; SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ static int vnsz2log; /* * Support for the bufobj clean & dirty pctrie. */ static void * buf_trie_alloc(struct pctrie *ptree) { return uma_zalloc(buf_trie_zone, M_NOWAIT); } static void buf_trie_free(struct pctrie *ptree, void *node) { uma_zfree(buf_trie_zone, node); } PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); /* * Initialize the vnode management data structures. * * Reevaluate the following cap on the number of vnodes after the physical * memory size exceeds 512GB. In the limit, as the physical memory size * grows, the ratio of the memory size in KB to to vnodes approaches 64:1. */ #ifndef MAXVNODES_MAX #define MAXVNODES_MAX (512 * 1024 * 1024 / 64) /* 8M */ #endif /* * Initialize a vnode as it first enters the zone. */ static int vnode_init(void *mem, int size, int flags) { struct vnode *vp; struct bufobj *bo; vp = mem; bzero(vp, size); /* * Setup locks. */ vp->v_vnlock = &vp->v_lock; mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); /* * By default, don't allow shared locks unless filesystems opt-in. */ lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT, LK_NOSHARE | LK_IS_VNODE); /* * Initialize bufobj. */ bo = &vp->v_bufobj; bo->__bo_vnode = vp; rw_init(BO_LOCKPTR(bo), "bufobj interlock"); bo->bo_private = vp; TAILQ_INIT(&bo->bo_clean.bv_hd); TAILQ_INIT(&bo->bo_dirty.bv_hd); /* * Initialize namecache. */ LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); /* * Initialize rangelocks. */ rangelock_init(&vp->v_rl); return (0); } /* * Free a vnode when it is cleared from the zone. */ static void vnode_fini(void *mem, int size) { struct vnode *vp; struct bufobj *bo; vp = mem; rangelock_destroy(&vp->v_rl); lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); bo = &vp->v_bufobj; rw_destroy(BO_LOCKPTR(bo)); } static void vntblinit(void *dummy __unused) { u_int i; int physvnodes, virtvnodes; /* * Desiredvnodes is a function of the physical memory size and the * kernel's heap size. Generally speaking, it scales with the * physical memory size. The ratio of desiredvnodes to the physical * memory size is 1:16 until desiredvnodes exceeds 98,304. * Thereafter, the * marginal ratio of desiredvnodes to the physical memory size is * 1:64. However, desiredvnodes is limited by the kernel's heap * size. The memory required by desiredvnodes vnodes and vm objects * must not exceed 1/7th of the kernel's heap size. */ physvnodes = maxproc + pgtok(vm_cnt.v_page_count) / 64 + 3 * min(98304 * 16, pgtok(vm_cnt.v_page_count)) / 64; virtvnodes = vm_kmem_size / (7 * (sizeof(struct vm_object) + sizeof(struct vnode))); desiredvnodes = min(physvnodes, virtvnodes); if (desiredvnodes > MAXVNODES_MAX) { if (bootverbose) printf("Reducing kern.maxvnodes %d -> %d\n", desiredvnodes, MAXVNODES_MAX); desiredvnodes = MAXVNODES_MAX; } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); TAILQ_INIT(&vnode_free_list); mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* * Preallocate enough nodes to support one-per buf so that * we can not fail an insert. reassignbuf() callers can not * tolerate the insertion failure. */ buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); uma_prealloc(buf_trie_zone, nbuf); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); cv_init(&sync_wakeup, "syncer"); for (i = 1; i <= sizeof(struct vnode); i <<= 1) vnsz2log++; vnsz2log--; } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Eventually, mountlist_mtx is not released on failure. * * vfs_busy() is a custom lock, it can block the caller. * vfs_busy() only sleeps if the unmount is active on the mount point. * For a mountpoint mp, vfs_busy-enforced lock is before lock of any * vnode belonging to mp. * * Lookup uses vfs_busy() to traverse mount points. * root fs var fs * / vnode lock A / vnode lock (/var) D * /var vnode lock B /log vnode lock(/var/log) E * vfs_busy lock C vfs_busy lock F * * Within each file system, the lock order is C->A->B and F->D->E. * * When traversing across mounts, the system follows that lock order: * * C->A->B * | * +->F->D->E * * The lookup() process for namei("/var") illustrates the process: * VOP_LOOKUP() obtains B while A is held * vfs_busy() obtains a shared lock on F while A and B are held * vput() releases lock on B * vput() releases lock on A * VFS_ROOT() obtains lock on D while shared lock on F is held * vfs_unbusy() releases shared lock on F * vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A. * Attempt to lock A (instead of vp_crossmp) while D is held would * violate the global order, causing deadlocks. * * dounmount() locks B while F is drained. */ int vfs_busy(struct mount *mp, int flags) { MPASS((flags & ~MBF_MASK) == 0); CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags); MNT_ILOCK(mp); MNT_REF(mp); /* * If mount point is currenly being unmounted, sleep until the * mount point fate is decided. If thread doing the unmounting fails, * it will clear MNTK_UNMOUNT flag before waking us up, indicating * that this mount point has survived the unmount attempt and vfs_busy * should retry. Otherwise the unmounter thread will set MNTK_REFEXPIRE * flag in addition to MNTK_UNMOUNT, indicating that mount point is * about to be really destroyed. vfs_busy needs to release its * reference on the mount point in this case and return with ENOENT, * telling the caller that mount mount it tried to busy is no longer * valid. */ while (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) { MNT_REL(mp); MNT_IUNLOCK(mp); CTR1(KTR_VFS, "%s: failed busying before sleeping", __func__); return (ENOENT); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_kern_flag |= MNTK_MWAIT; msleep(mp, MNT_MTX(mp), PVFS | PDROP, "vfs_busy", 0); if (flags & MBF_MNTLSTLOCK) mtx_lock(&mountlist_mtx); MNT_ILOCK(mp); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_lockref++; MNT_IUNLOCK(mp); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(struct mount *mp) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_REL(mp); KASSERT(mp->mnt_lockref > 0, ("negative mnt_lockref")); mp->mnt_lockref--; if (mp->mnt_lockref == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) { MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT); CTR1(KTR_VFS, "%s: waking up waiters", __func__); mp->mnt_kern_flag &= ~MNTK_DRAINING; wakeup(&mp->mnt_lockref); } MNT_IUNLOCK(mp); } /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { vfs_ref(mp); mtx_unlock(&mountlist_mtx); return (mp); } } mtx_unlock(&mountlist_mtx); CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); return ((struct mount *) 0); } /* * Lookup a mount point by filesystem identifier, busying it before * returning. * * To avoid congestion on mountlist_mtx, implement simple direct-mapped * cache for popular filesystem identifiers. The cache is lockess, using * the fact that struct mount's are never freed. In worst case we may * get pointer to unmounted or even different filesystem, so we have to * check what we got, and go slow way if so. */ struct mount * vfs_busyfs(fsid_t *fsid) { #define FSID_CACHE_SIZE 256 typedef struct mount * volatile vmp_t; static vmp_t cache[FSID_CACHE_SIZE]; struct mount *mp; int error; uint32_t hash; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); hash = fsid->val[0] ^ fsid->val[1]; hash = (hash >> 16 ^ hash) & (FSID_CACHE_SIZE - 1); mp = cache[hash]; if (mp == NULL || mp->mnt_stat.f_fsid.val[0] != fsid->val[0] || mp->mnt_stat.f_fsid.val[1] != fsid->val[1]) goto slow; if (vfs_busy(mp, 0) != 0) { cache[hash] = NULL; goto slow; } if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); else vfs_unbusy(mp); slow: mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { error = vfs_busy(mp, MBF_MNTLSTLOCK); if (error) { cache[hash] = NULL; mtx_unlock(&mountlist_mtx); return (NULL); } cache[hash] = mp; return (mp); } } CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); mtx_unlock(&mountlist_mtx); return ((struct mount *) 0); } /* * Check if a user can access privileged mount options. */ int vfs_suser(struct mount *mp, struct thread *td) { int error; /* * If the thread is jailed, but this is not a jail-friendly file * system, deny immediately. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_JAIL) && jailed(td->td_ucred)) return (EPERM); /* * If the file system was mounted outside the jail of the calling * thread, deny immediately. */ if (prison_check(td->td_ucred, mp->mnt_cred) != 0) return (EPERM); /* * If file system supports delegated administration, we don't check * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified * by the file system itself. * If this is not the user that did original mount, we check for * the PRIV_VFS_MOUNT_OWNER privilege. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) && mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) { if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0) return (error); } return (0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(struct mount *mp) { static uint16_t mntid_base; struct mount *nmp; fsid_t tfsid; int mtype; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); mtx_lock(&mntid_mtx); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; for (;;) { tfsid.val[0] = makedev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; if ((nmp = vfs_getvfs(&tfsid)) == NULL) break; vfs_rel(nmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; mtx_unlock(&mntid_mtx); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_USEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, "File timestamp precision (0: seconds, " "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to ms, " "3+: sec + ns (max. precision))"); /* * Get a current timestamp. */ void vfs_timestamp(struct timespec *tsp) { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(struct vattr *vap) { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_birthtime.tv_sec = VNOVAL; vap->va_birthtime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * This routine is called when we have too many vnodes. It attempts * to free vnodes and will potentially free vnodes that still * have VM backing store (VM backing store is typically the cause * of a vnode blowout so we want to do this). Therefore, this operation * is not considered cheap. * * A number of conditions may prevent a vnode from being reclaimed. * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not * desireable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. */ static int vlrureclaim(struct mount *mp, int reclaim_nc_src, int trigger) { struct vnode *vp; int count, done, target; done = 0; vn_start_write(NULL, &mp, V_WAIT); MNT_ILOCK(mp); count = mp->mnt_nvnodelistsize; target = count * (int64_t)gapvnodes / imax(desiredvnodes, 1); target = target / 10 + 1; while (count != 0 && done < target) { vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); if (vp == NULL) break; /* * XXX LRU is completely broken for non-free vnodes. First * by calling here in mountpoint order, then by moving * unselected vnodes to the end here, and most grossly by * removing the vlruvp() function that was supposed to * maintain the order. (This function was born broken * since syncer problems prevented it doing anything.) The * order is closer to LRC (C = Created). * * LRU reclaiming of vnodes seems to have last worked in * FreeBSD-3 where LRU wasn't mentioned under any spelling. * Then there was no hold count, and inactive vnodes were * simply put on the free list in LRU order. The separate * lists also break LRU. We prefer to reclaim from the * free list for technical reasons. This tends to thrash * the free list to keep very unrecently used held vnodes. * The problem is mitigated by keeping the free list large. */ TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); --count; if (!VI_TRYLOCK(vp)) goto next_iter; /* * If it's been deconstructed already, it's still * referenced, or it exceeds the trigger, skip it. * Also skip free vnodes. We are trying to make space * to expand the free list, not reduce it. */ if (vp->v_usecount || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || ((vp->v_iflag & VI_FREE) != 0) || (vp->v_iflag & VI_DOOMED) != 0 || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); goto next_iter; } MNT_IUNLOCK(mp); vholdl(vp); if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) { vdrop(vp); goto next_iter_mntunlocked; } VI_LOCK(vp); /* * v_usecount may have been bumped after VOP_LOCK() dropped * the vnode interlock and before it was locked again. * * It is not necessary to recheck VI_DOOMED because it can * only be set by another thread that holds both the vnode * lock and vnode interlock. If another thread has the * vnode lock before we get to VOP_LOCK() and obtains the * vnode interlock after VOP_LOCK() drops the vnode * interlock, the other thread will be unable to drop the * vnode lock before our VOP_LOCK() call fails. */ if (vp->v_usecount || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || (vp->v_iflag & VI_FREE) != 0 || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VOP_UNLOCK(vp, LK_INTERLOCK); vdrop(vp); goto next_iter_mntunlocked; } KASSERT((vp->v_iflag & VI_DOOMED) == 0, ("VI_DOOMED unexpectedly detected in vlrureclaim()")); atomic_add_long(&recycles_count, 1); vgonel(vp); VOP_UNLOCK(vp, 0); vdropl(vp); done++; next_iter_mntunlocked: if (!should_yield()) goto relock_mnt; goto yield; next_iter: if (!should_yield()) continue; MNT_IUNLOCK(mp); yield: kern_yield(PRI_USER); relock_mnt: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); vn_finished_write(mp); return done; } /* * Attempt to reduce the free list by the requested amount. */ static void vnlru_free(int count) { struct vnode *vp; mtx_assert(&vnode_free_list_mtx, MA_OWNED); for (; count > 0; count--) { vp = TAILQ_FIRST(&vnode_free_list); /* * The list can be modified while the free_list_mtx * has been dropped and vp could be NULL here. */ if (!vp) break; VNASSERT(vp->v_op != NULL, vp, ("vnlru_free: vnode already reclaimed.")); KASSERT((vp->v_iflag & VI_FREE) != 0, ("Removing vnode not on freelist")); KASSERT((vp->v_iflag & VI_ACTIVE) == 0, ("Mangling active vnode")); TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); /* * Don't recycle if we can't get the interlock. */ if (!VI_TRYLOCK(vp)) { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); continue; } VNASSERT((vp->v_iflag & VI_FREE) != 0 && vp->v_holdcnt == 0, vp, ("vp inconsistent on freelist")); /* * The clear of VI_FREE prevents activation of the * vnode. There is no sense in putting the vnode on * the mount point active list, only to remove it * later during recycling. Inline the relevant part * of vholdl(), to avoid triggering assertions or * activating. */ freevnodes--; vp->v_iflag &= ~VI_FREE; refcount_acquire(&vp->v_holdcnt); mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); vtryrecycle(vp); /* * If the recycled succeeded this vdrop will actually free * the vnode. If not it will simply place it back on * the free list. */ vdrop(vp); mtx_lock(&vnode_free_list_mtx); } } /* XXX some names and initialization are bad for limits and watermarks. */ static int vspace(void) { int space; gapvnodes = imax(desiredvnodes - wantfreevnodes, 100); vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */ vlowat = vhiwat / 2; if (numvnodes > desiredvnodes) return (0); space = desiredvnodes - numvnodes; if (freevnodes > wantfreevnodes) space += freevnodes - wantfreevnodes; return (space); } /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some * interesting deadlock problems. */ static struct proc *vnlruproc; static int vnlruproc_sig; static void vnlru_proc(void) { struct mount *mp, *nmp; unsigned long ofreevnodes, onumvnodes; int done, force, reclaim_nc_src, trigger, usevnodes; EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc, SHUTDOWN_PRI_FIRST); force = 0; for (;;) { kproc_suspend_check(vnlruproc); mtx_lock(&vnode_free_list_mtx); /* * If numvnodes is too large (due to desiredvnodes being * adjusted using its sysctl, or emergency growth), first * try to reduce it by discarding from the free list. */ if (numvnodes > desiredvnodes && freevnodes > 0) vnlru_free(ulmin(numvnodes - desiredvnodes, freevnodes)); /* * Sleep if the vnode cache is in a good state. This is * when it is not over-full and has space for about a 4% * or 9% expansion (by growing its size or inexcessively * reducing its free list). Otherwise, try to reclaim * space for a 10% expansion. */ if (vstir && force == 0) { force = 1; vstir = 0; } if (vspace() >= vlowat && force == 0) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); msleep(vnlruproc, &vnode_free_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } mtx_unlock(&vnode_free_list_mtx); done = 0; ofreevnodes = freevnodes; onumvnodes = numvnodes; /* * Calculate parameters for recycling. These are the same * throughout the loop to give some semblance of fairness. * The trigger point is to avoid recycling vnodes with lots * of resident pages. We aren't trying to free memory; we * are trying to recycle or at least free vnodes. */ if (numvnodes <= desiredvnodes) usevnodes = numvnodes - freevnodes; else usevnodes = numvnodes; if (usevnodes <= 0) usevnodes = 1; /* * The trigger value is is chosen to give a conservatively * large value to ensure that it alone doesn't prevent * making progress. The value can easily be so large that * it is effectively infinite in some congested and * misconfigured cases, and this is necessary. Normally * it is about 8 to 100 (pages), which is quite large. */ trigger = vm_cnt.v_page_count * 2 / usevnodes; if (force < 2) trigger = vsmalltrigger; reclaim_nc_src = force >= 3; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } done += vlrureclaim(mp, reclaim_nc_src, trigger); mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes) uma_reclaim(); if (done == 0) { if (force == 0 || force == 1) { force = 2; continue; } if (force == 2) { force = 3; continue; } force = 0; vnlru_nowhere++; tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); } else kern_yield(PRI_USER); /* * After becoming active to expand above low water, keep * active until above high water. */ force = vspace() < vhiwat; } } static struct kproc_desc vnlru_kp = { "vnlru", vnlru_proc, &vnlruproc }; SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp); /* * Routines having to do with the management of the vnode table. */ /* * Try to recycle a freed vnode. We abort if anyone picks up a reference * before we actually vgone(). This function must be called with the vnode * held to prevent the vnode from being returned to the free list midway * through vgone(). */ static int vtryrecycle(struct vnode *vp) { struct mount *vnmp; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); VNASSERT(vp->v_holdcnt, vp, ("vtryrecycle: Recycling vp %p without a reference.", vp)); /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { CTR2(KTR_VFS, "%s: impossible to recycle, vp %p lock is already held", __func__, vp); return (EWOULDBLOCK); } /* * Don't recycle if its filesystem is being suspended. */ if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { VOP_UNLOCK(vp, 0); CTR2(KTR_VFS, "%s: impossible to recycle, cannot start the write for %p", __func__, vp); return (EBUSY); } /* * If we got this far, we need to acquire the interlock and see if * anyone picked up this vnode from another list. If not, we will * mark it with DOOMED via vgonel() so that anyone who does find it * will skip over it. */ VI_LOCK(vp); if (vp->v_usecount) { VOP_UNLOCK(vp, LK_INTERLOCK); vn_finished_write(vnmp); CTR2(KTR_VFS, "%s: impossible to recycle, %p is already referenced", __func__, vp); return (EBUSY); } if ((vp->v_iflag & VI_DOOMED) == 0) { atomic_add_long(&recycles_count, 1); vgonel(vp); } VOP_UNLOCK(vp, LK_INTERLOCK); vn_finished_write(vnmp); return (0); } static void vcheckspace(void) { if (vspace() < vlowat && vnlruproc_sig == 0) { vnlruproc_sig = 1; wakeup(vnlruproc); } } /* * Wait if necessary for space for a new vnode. */ static int getnewvnode_wait(int suspended) { mtx_assert(&vnode_free_list_mtx, MA_OWNED); if (numvnodes >= desiredvnodes) { if (suspended) { /* * The file system is being suspended. We cannot * risk a deadlock here, so allow allocation of * another vnode even if this would give too many. */ return (0); } if (vnlruproc_sig == 0) { vnlruproc_sig = 1; /* avoid unnecessary wakeups */ wakeup(vnlruproc); } msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, "vlruwk", hz); } /* Post-adjust like the pre-adjust in getnewvnode(). */ if (numvnodes + 1 > desiredvnodes && freevnodes > 1) vnlru_free(1); return (numvnodes >= desiredvnodes ? ENFILE : 0); } /* * This hack is fragile, and probably not needed any more now that the * watermark handling works. */ void getnewvnode_reserve(u_int count) { struct thread *td; /* Pre-adjust like the pre-adjust in getnewvnode(), with any count. */ /* XXX no longer so quick, but this part is not racy. */ mtx_lock(&vnode_free_list_mtx); if (numvnodes + count > desiredvnodes && freevnodes > wantfreevnodes) vnlru_free(ulmin(numvnodes + count - desiredvnodes, freevnodes - wantfreevnodes)); mtx_unlock(&vnode_free_list_mtx); td = curthread; /* First try to be quick and racy. */ if (atomic_fetchadd_long(&numvnodes, count) + count <= desiredvnodes) { td->td_vp_reserv += count; vcheckspace(); /* XXX no longer so quick, but more racy */ return; } else atomic_subtract_long(&numvnodes, count); mtx_lock(&vnode_free_list_mtx); while (count > 0) { if (getnewvnode_wait(0) == 0) { count--; td->td_vp_reserv++; atomic_add_long(&numvnodes, 1); } } vcheckspace(); mtx_unlock(&vnode_free_list_mtx); } /* * This hack is fragile, especially if desiredvnodes or wantvnodes are * misconfgured or changed significantly. Reducing desiredvnodes below * the reserved amount should cause bizarre behaviour like reducing it * below the number of active vnodes -- the system will try to reduce * numvnodes to match, but should fail, so the subtraction below should * not overflow. */ void getnewvnode_drop_reserve(void) { struct thread *td; td = curthread; atomic_subtract_long(&numvnodes, td->td_vp_reserv); td->td_vp_reserv = 0; } /* * Return the next vnode from the free list. */ int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp) { struct vnode *vp; struct thread *td; struct lock_object *lo; static int cyclecount; int error; CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag); vp = NULL; td = curthread; if (td->td_vp_reserv > 0) { td->td_vp_reserv -= 1; goto alloc; } mtx_lock(&vnode_free_list_mtx); if (numvnodes < desiredvnodes) cyclecount = 0; else if (cyclecount++ >= freevnodes) { cyclecount = 0; vstir = 1; } /* * Grow the vnode cache if it will not be above its target max * after growing. Otherwise, if the free list is nonempty, try * to reclaim 1 item from it before growing the cache (possibly * above its target max if the reclamation failed or is delayed). * Otherwise, wait for some space. In all cases, schedule * vnlru_proc() if we are getting short of space. The watermarks * should be chosen so that we never wait or even reclaim from * the free list to below its target minimum. */ if (numvnodes + 1 <= desiredvnodes) ; else if (freevnodes > 0) vnlru_free(1); else { error = getnewvnode_wait(mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (error != 0) { mtx_unlock(&vnode_free_list_mtx); return (error); } #endif } vcheckspace(); atomic_add_long(&numvnodes, 1); mtx_unlock(&vnode_free_list_mtx); alloc: atomic_add_long(&vnodes_created, 1); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK); /* * Locks are given the generic name "vnode" when created. * Follow the historic practice of using the filesystem * name when they allocated, e.g., "zfs", "ufs", "nfs, etc. * * Locks live in a witness group keyed on their name. Thus, * when a lock is renamed, it must also move from the witness * group of its old name to the witness group of its new name. * * The change only needs to be made when the vnode moves * from one filesystem type to another. We ensure that each * filesystem use a single static name pointer for its tag so * that we can compare pointers rather than doing a strcmp(). */ lo = &vp->v_vnlock->lock_object; if (lo->lo_name != tag) { lo->lo_name = tag; WITNESS_DESTROY(lo); WITNESS_INIT(lo, tag); } /* * By default, don't allow shared locks unless filesystems opt-in. */ vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE; /* * Finalize various vnode identity bits. */ KASSERT(vp->v_object == NULL, ("stale v_object %p", vp)); KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp)); KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp)); vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; v_init_counters(vp); vp->v_bufobj.bo_ops = &buf_ops_bio; #ifdef MAC mac_vnode_init(vp); if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0) mac_vnode_associate_singlelabel(mp, vp); else if (mp == NULL && vops != &dead_vnodeops) printf("NULL mp in getnewvnode()\n"); #endif if (mp != NULL) { vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize; if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0) vp->v_vflag |= VV_NOKNOTE; } /* * For the filesystems which do not use vfs_hash_insert(), * still initialize v_hash to have vfs_hash_index() useful. * E.g., nullfs uses vfs_hash_index() on the lower vnode for * its own hashing. */ vp->v_hash = (uintptr_t)vp >> vnsz2log; *vpp = vp; return (0); } /* * Delete from old mount point vnode list, if on one. */ static void delmntque(struct vnode *vp) { struct mount *mp; int active; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); VI_LOCK(vp); KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize, ("Active vnode list size %d > Vnode list size %d", mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize)); active = vp->v_iflag & VI_ACTIVE; vp->v_iflag &= ~VI_ACTIVE; if (active) { mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize--; mtx_unlock(&vnode_free_list_mtx); } vp->v_mount = NULL; VI_UNLOCK(vp); VNASSERT(mp->mnt_nvnodelistsize > 0, vp, ("bad mount point vnode list size")); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize--; MNT_REL(mp); MNT_IUNLOCK(mp); } static void insmntque_stddtr(struct vnode *vp, void *dtr_arg) { vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Insert into list of vnodes for the new mount point, if available. */ int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg) { KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); /* * We acquire the vnode interlock early to ensure that the * vnode cannot be recycled by another process releasing a * holdcnt on it before we get it on both the vnode list * and the active vnode list. The mount mutex protects only * manipulation of the vnode list and the vnode freelist * mutex protects only manipulation of the active vnode list. * Hence the need to hold the vnode interlock throughout. */ MNT_ILOCK(mp); VI_LOCK(vp); if (((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 && ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || mp->mnt_nvnodelistsize == 0)) && (vp->v_vflag & VV_FORCEINSMQ) == 0) { VI_UNLOCK(vp); MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); return (EBUSY); } vp->v_mount = mp; MNT_REF(mp); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; KASSERT((vp->v_iflag & VI_ACTIVE) == 0, ("Activating already active vnode")); vp->v_iflag |= VI_ACTIVE; mtx_lock(&vnode_free_list_mtx); TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize++; mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); MNT_IUNLOCK(mp); return (0); } int insmntque(struct vnode *vp, struct mount *mp) { return (insmntque1(vp, mp, insmntque_stddtr, NULL)); } /* * Flush out and invalidate all buffers associated with a bufobj * Called with the underlying object locked. */ int bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo) { int error; BO_LOCK(bo); if (flags & V_SAVE) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); return (error); } if (bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); if ((error = BO_SYNC(bo, MNT_WAIT)) != 0) return (error); /* * XXX We could save a lock/unlock if this was only * enabled under INVARIANTS */ BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: dirty bufs"); } } /* * If you alter this loop please notice that interlock is dropped and * reacquired in flushbuflist. Special care is needed to ensure that * no race conditions occur from this. */ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); if (error == 0 && !(flags & V_CLEANONLY)) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { BO_UNLOCK(bo); return (error); } } while (error != 0); /* * Wait for I/O to complete. XXX needs cleaning up. The vnode can * have write I/O in-progress but if there is a VM object then the * VM object can also have read-I/O in-progress. */ do { bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); if (bo->bo_object != NULL) { VM_OBJECT_WLOCK(bo->bo_object); vm_object_pip_wait(bo->bo_object, "bovlbx"); VM_OBJECT_WUNLOCK(bo->bo_object); } BO_LOCK(bo); } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); /* * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0) { VM_OBJECT_WLOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? OBJPR_CLEANONLY : 0); VM_OBJECT_WUNLOCK(bo->bo_object); } #ifdef INVARIANTS BO_LOCK(bo); if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); BO_UNLOCK(bo); #endif return (0); } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo) { CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); ASSERT_VOP_LOCKED(vp, "vinvalbuf"); if (vp->v_object != NULL && vp->v_object->handle != vp) return (0); return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo)); } /* * Flush out buffers on the specified list. * */ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo) { struct buf *bp, *nbp; int retval, error; daddr_t lblkno; b_xflags_t xflags; ASSERT_BO_WLOCKED(bo); retval = 0; TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) { if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) || ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0)) { continue; } lblkno = 0; xflags = 0; if (nbp != NULL) { lblkno = nbp->b_lblkno; xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "flushbuf", slpflag, slptimeo); if (error) { BO_LOCK(bo); return (error != ENOLCK ? error : EAGAIN); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { bremfree(bp); bp->b_flags |= B_ASYNC; bwrite(bp); BO_LOCK(bo); return (EAGAIN); /* XXX: why not loop ? */ } bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); BO_LOCK(bo); nbp = gbincore(bo, lblkno); if (nbp == NULL || (nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) != xflags) break; /* nbp invalid */ } return (retval); } int bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn) { struct buf *bp; int error; daddr_t lblkno; ASSERT_BO_LOCKED(bo); - for (lblkno = startn;; lblkno++) { + for (lblkno = startn;;) { +again: bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno); if (bp == NULL || bp->b_lblkno >= endn) break; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0); if (error != 0) { BO_RLOCK(bo); - return (error != ENOLCK ? error : EAGAIN); + if (error == ENOLCK) + goto again; + return (error); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); + lblkno = bp->b_lblkno + 1; if ((bp->b_flags & B_MANAGED) == 0) bremfree(bp); bp->b_flags |= B_RELBUF; /* * In the VMIO case, use the B_NOREUSE flag to hint that the * pages backing each buffer in the range are unlikely to be * reused. Dirty buffers will have the hint applied once * they've been written. */ if (bp->b_vp->v_object != NULL) bp->b_flags |= B_NOREUSE; brelse(bp); BO_RLOCK(bo); } return (0); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize) { struct buf *bp, *nbp; int anyfreed; int trunclbn; struct bufobj *bo; CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__, vp, cred, blksize, (uintmax_t)length); /* * Round up to the *next* lbn. */ trunclbn = (length + blksize - 1) / blksize; ASSERT_VOP_LOCKED(vp, "vtruncbuf"); restart: bo = &vp->v_bufobj; BO_LOCK(bo); anyfreed = 1; for (;anyfreed;) { anyfreed = 0; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNCLEAN) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { BO_UNLOCK(bo); goto restart; } } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { BO_UNLOCK(bo); goto restart; } } } if (length > 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) continue; /* * Since we hold the vnode lock this should only * fail if we're racing with the buf daemon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { goto restart; } VNASSERT((bp->b_flags & B_DELWRI), vp, ("buf(%p) on dirty queue without DELWRI", bp)); bremfree(bp); bawrite(bp); BO_LOCK(bo); goto restartsync; } } bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); vnode_pager_setsize(vp, length); return (0); } static void buf_vlist_remove(struct buf *bp) { struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); ASSERT_BO_WLOCKED(bp->b_bufobj); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != (BX_VNDIRTY|BX_VNCLEAN), ("buf_vlist_remove: Buf %p is on two lists", bp)); if (bp->b_xflags & BX_VNDIRTY) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno); TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* * Add the buffer to the sorted clean or dirty block list. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { struct bufv *bv; struct buf *n; int error; ASSERT_BO_WLOCKED(bo); KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0, ("dead bo %p", bo)); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); bp->b_xflags |= xflags; if (xflags & BX_VNDIRTY) bv = &bo->bo_dirty; else bv = &bo->bo_clean; /* * Keep the list ordered. Optimize empty list insertion. Assume * we tend to grow at the tail so lookup_le should usually be cheaper * than _ge. */ if (bv->bv_cnt == 0 || bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno) TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL) TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs); else TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs); error = BUF_PCTRIE_INSERT(&bv->bv_root, bp); if (error) panic("buf_vlist_add: Preallocated nodes insufficient."); bv->bv_cnt++; } /* * Look up a buffer using the buffer tries. */ struct buf * gbincore(struct bufobj *bo, daddr_t lblkno) { struct buf *bp; ASSERT_BO_LOCKED(bo); bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); if (bp != NULL) return (bp); return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); } /* * Associate a buffer with a vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { struct bufobj *bo; bo = &vp->v_bufobj; ASSERT_BO_WLOCKED(bo); VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); vhold(vp); bp->b_vp = vp; bp->b_bufobj = bo; /* * Insert onto list for new vnode. */ buf_vlist_add(bp, bo, BX_VNCLEAN); } /* * Disassociate a buffer from a vnode. */ void brelvp(struct buf *bp) { struct bufobj *bo; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; /* XXX */ bo = bp->b_bufobj; BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("brelvp: Buffer %p not on queue.", bp); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { bo->bo_flag &= ~BO_ONWORKLST; mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); } bp->b_vp = NULL; bp->b_bufobj = NULL; BO_UNLOCK(bo); vdrop(vp); } /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { int slot; ASSERT_BO_WLOCKED(bo); mtx_lock(&sync_mtx); if (bo->bo_flag & BO_ONWORKLST) LIST_REMOVE(bo, bo_synclist); else { bo->bo_flag |= BO_ONWORKLST; syncer_worklist_len++; } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist); mtx_unlock(&sync_mtx); } static int sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) { int error, len; mtx_lock(&sync_mtx); len = syncer_worklist_len - sync_vnode_count; mtx_unlock(&sync_mtx); error = SYSCTL_OUT(req, &len, sizeof(len)); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); static struct proc *updateproc; static void sched_sync(void); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); static int sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) { struct vnode *vp; struct mount *mp; *bo = LIST_FIRST(slp); if (*bo == NULL) return (0); vp = (*bo)->__bo_vnode; /* XXX */ if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) return (1); /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from * going away when we unlock the sync_mtx so that * we can acquire the vnode interlock. */ vholdl(vp); mtx_unlock(&sync_mtx); VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); mtx_lock(&sync_mtx); return (*bo == LIST_FIRST(slp)); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); BO_LOCK(*bo); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(*bo, syncdelay); } BO_UNLOCK(*bo); vdrop(vp); mtx_lock(&sync_mtx); return (0); } static int first_printf = 1; /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *next, *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; int last_work_seen; int net_worklist_len; int syncer_final_iter; int error; last_work_seen = 0; syncer_final_iter = 0; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); mtx_lock(&sync_mtx); for (;;) { if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kproc_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining..."); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while (!LIST_EMPTY(slp)) { error = sync_vnode(slp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } if (first_printf == 0) { /* * Drop the sync mutex, because some watchdog * drivers need to sleep while patting */ mtx_unlock(&sync_mtx); wdog_kern_pat(WD_LASTVAL); mtx_lock(&sync_mtx); } } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (syncer_state != SYNCER_RUNNING || time_uptime == starttime) { thread_lock(td); sched_prio(td, PPAUSE); thread_unlock(td); } if (syncer_state != SYNCER_RUNNING) cv_timedwait(&sync_wakeup, &sync_mtx, hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) cv_timedwait(&sync_wakeup, &sync_mtx, hz); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { int ret = 0; mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; ret = 1; } mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); return (ret); } /* * Tell the syncer to speed up its work and run though its work * list several times, then tell it to shut down. */ static void syncer_shutdown(void *arg, int howto) { if (howto & RB_NOSYNC) return; mtx_lock(&sync_mtx); syncer_state = SYNCER_SHUTTING_DOWN; rushjob = 0; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_shutdown(arg, howto); } void syncer_suspend(void) { syncer_shutdown(updateproc, 0); } void syncer_resume(void) { mtx_lock(&sync_mtx); first_printf = 1; syncer_state = SYNCER_RUNNING; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_resume(updateproc); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(struct buf *bp) { struct vnode *vp; struct bufobj *bo; int delay; #ifdef INVARIANTS struct bufv *bv; #endif vp = bp->b_vp; bo = bp->b_bufobj; ++reassignbufcalls; CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); /* * Delete from old vnode list, if on one. */ BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("reassignbuf: Buffer %p not on queue.", bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { if ((bo->bo_flag & BO_ONWORKLST) == 0) { switch (vp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: delay = metadelay; break; default: delay = filedelay; } vn_syncer_add_to_worklist(bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } } #ifdef INVARIANTS bv = &bo->bo_clean; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bv = &bo->bo_dirty; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif BO_UNLOCK(bo); } /* * A temporary hack until refcount_* APIs are sorted out. */ static __inline int vfs_refcount_acquire_if_not_zero(volatile u_int *count) { u_int old; for (;;) { old = *count; if (old == 0) return (0); if (atomic_cmpset_int(count, old, old + 1)) return (1); } } static __inline int vfs_refcount_release_if_not_last(volatile u_int *count) { u_int old; for (;;) { old = *count; if (old == 1) return (0); if (atomic_cmpset_int(count, old, old - 1)) return (1); } } static void v_init_counters(struct vnode *vp) { VNASSERT(vp->v_type == VNON && vp->v_data == NULL && vp->v_iflag == 0, vp, ("%s called for an initialized vnode", __FUNCTION__)); ASSERT_VI_UNLOCKED(vp, __FUNCTION__); refcount_init(&vp->v_holdcnt, 1); refcount_init(&vp->v_usecount, 1); } /* * Increment the use and hold counts on the vnode, taking care to reference * the driver's usecount if this is a chardev. The _vhold() will remove * the vnode from the free list if it is presently free. */ static void v_incr_usecount(struct vnode *vp) { ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (vp->v_type == VCHR) { VI_LOCK(vp); _vhold(vp, true); if (vp->v_iflag & VI_OWEINACT) { VNASSERT(vp->v_usecount == 0, vp, ("vnode with usecount and VI_OWEINACT set")); vp->v_iflag &= ~VI_OWEINACT; } refcount_acquire(&vp->v_usecount); v_incr_devcount(vp); VI_UNLOCK(vp); return; } _vhold(vp, false); if (vfs_refcount_acquire_if_not_zero(&vp->v_usecount)) { VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with usecount and VI_OWEINACT set")); } else { VI_LOCK(vp); if (vp->v_iflag & VI_OWEINACT) vp->v_iflag &= ~VI_OWEINACT; refcount_acquire(&vp->v_usecount); VI_UNLOCK(vp); } } /* * Increment si_usecount of the associated device, if any. */ static void v_incr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } } /* * Decrement si_usecount of the associated device, if any. */ static void v_decr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. VI_DOOMED is set if the vnode * is being destroyed. Only callers who specify LK_RETRY will * see doomed vnodes. If inactive processing was delayed in * vput try to do it here. * * Notes on lockless counter manipulation: * _vhold, vputx and other routines make various decisions based * on either holdcnt or usecount being 0. As long as either contuner * is not transitioning 0->1 nor 1->0, the manipulation can be done * with atomic operations. Otherwise the interlock is taken. */ int vget(struct vnode *vp, int flags, struct thread *td) { int error, oweinact; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("vget: invalid lock operation")); if ((flags & LK_INTERLOCK) != 0) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); if ((flags & LK_VNHELD) != 0) VNASSERT((vp->v_holdcnt > 0), vp, ("vget: LK_VNHELD passed but vnode not held")); CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); if ((flags & LK_VNHELD) == 0) _vhold(vp, (flags & LK_INTERLOCK) != 0); if ((error = vn_lock(vp, flags)) != 0) { vdrop(vp); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); } if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0) panic("vget: vn_lock failed to return ENOENT\n"); /* * We don't guarantee that any particular close will * trigger inactive processing so just make a best effort * here at preventing a reference to a removed file. If * we don't succeed no harm is done. * * Upgrade our holdcnt to a usecount. */ if (vp->v_type != VCHR && vfs_refcount_acquire_if_not_zero(&vp->v_usecount)) { VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with usecount and VI_OWEINACT set")); } else { VI_LOCK(vp); if ((vp->v_iflag & VI_OWEINACT) == 0) { oweinact = 0; } else { oweinact = 1; vp->v_iflag &= ~VI_OWEINACT; } refcount_acquire(&vp->v_usecount); v_incr_devcount(vp); if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE && (flags & LK_NOWAIT) == 0) vinactive(vp, td); VI_UNLOCK(vp); } return (0); } /* * Increase the reference count of a vnode. */ void vref(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); v_incr_usecount(vp); } /* * Return reference count of a vnode. * * The results of this call are only guaranteed when some mechanism is used to * stop other processes from gaining references to the vnode. This may be the * case if the caller holds the only reference. This is also useful when stale * data is acceptable as race conditions may be accounted for by some other * means. */ int vrefcnt(struct vnode *vp) { return (vp->v_usecount); } #define VPUTX_VRELE 1 #define VPUTX_VPUT 2 #define VPUTX_VUNREF 3 /* * Decrement the use and hold counts for a vnode. * * See an explanation near vget() as to why atomic operation is safe. */ static void vputx(struct vnode *vp, int func) { int error; KASSERT(vp != NULL, ("vputx: null vp")); if (func == VPUTX_VUNREF) ASSERT_VOP_LOCKED(vp, "vunref"); else if (func == VPUTX_VPUT) ASSERT_VOP_LOCKED(vp, "vput"); else KASSERT(func == VPUTX_VRELE, ("vputx: wrong func")); ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (vp->v_type != VCHR && vfs_refcount_release_if_not_last(&vp->v_usecount)) { if (func == VPUTX_VPUT) VOP_UNLOCK(vp, 0); vdrop(vp); return; } VI_LOCK(vp); /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ if (!refcount_release(&vp->v_usecount) || (vp->v_iflag & VI_DOINGINACT)) { if (func == VPUTX_VPUT) VOP_UNLOCK(vp, 0); v_decr_devcount(vp); vdropl(vp); return; } v_decr_devcount(vp); error = 0; if (vp->v_usecount != 0) { vprint("vputx: usecount not zero", vp); panic("vputx: usecount not zero"); } CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp); /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. */ vp->v_iflag |= VI_OWEINACT; switch (func) { case VPUTX_VRELE: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); VI_LOCK(vp); break; case VPUTX_VPUT: if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK | LK_NOWAIT); VI_LOCK(vp); } break; case VPUTX_VUNREF: if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_TRYUPGRADE | LK_INTERLOCK); VI_LOCK(vp); } break; } VNASSERT(vp->v_usecount == 0 || (vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with usecount and VI_OWEINACT set")); if (error == 0) { if (vp->v_iflag & VI_OWEINACT) vinactive(vp, curthread); if (func != VPUTX_VUNREF) VOP_UNLOCK(vp, 0); } vdropl(vp); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(struct vnode *vp) { vputx(vp, VPUTX_VRELE); } /* * Release an already locked vnode. This give the same effects as * unlock+vrele(), but takes less time and avoids releasing and * re-aquiring the lock (as vrele() acquires the lock internally.) */ void vput(struct vnode *vp) { vputx(vp, VPUTX_VPUT); } /* * Release an exclusively locked vnode. Do not unlock the vnode lock. */ void vunref(struct vnode *vp) { vputx(vp, VPUTX_VUNREF); } /* * Increase the hold count and activate if this is the first reference. */ void _vhold(struct vnode *vp, bool locked) { struct mount *mp; if (locked) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (!locked && vfs_refcount_acquire_if_not_zero(&vp->v_holdcnt)) { VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("_vhold: vnode with holdcnt is free")); return; } if (!locked) VI_LOCK(vp); if ((vp->v_iflag & VI_FREE) == 0) { refcount_acquire(&vp->v_holdcnt); if (!locked) VI_UNLOCK(vp); return; } VNASSERT(vp->v_holdcnt == 0, vp, ("%s: wrong hold count", __func__)); VNASSERT(vp->v_op != NULL, vp, ("%s: vnode already reclaimed.", __func__)); /* * Remove a vnode from the free list, mark it as in use, * and put it on the active list. */ mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); freevnodes--; vp->v_iflag &= ~VI_FREE; KASSERT((vp->v_iflag & VI_ACTIVE) == 0, ("Activating already active vnode")); vp->v_iflag |= VI_ACTIVE; mp = vp->v_mount; TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize++; mtx_unlock(&vnode_free_list_mtx); refcount_acquire(&vp->v_holdcnt); if (!locked) VI_UNLOCK(vp); } /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we place it on the free list unless it has been vgone'd * (marked VI_DOOMED) in which case we will free it. * * Because the vnode vm object keeps a hold reference on the vnode if * there is at least one resident non-cached page, the vnode cannot * leave the active list without the page cleanup done. */ void _vdrop(struct vnode *vp, bool locked) { struct bufobj *bo; struct mount *mp; int active; if (locked) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if ((int)vp->v_holdcnt <= 0) panic("vdrop: holdcnt %d", vp->v_holdcnt); if (vfs_refcount_release_if_not_last(&vp->v_holdcnt)) { if (locked) VI_UNLOCK(vp); return; } if (!locked) VI_LOCK(vp); if (refcount_release(&vp->v_holdcnt) == 0) { VI_UNLOCK(vp); return; } if ((vp->v_iflag & VI_DOOMED) == 0) { /* * Mark a vnode as free: remove it from its active list * and put it up for recycling on the freelist. */ VNASSERT(vp->v_op != NULL, vp, ("vdropl: vnode already reclaimed.")); VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free")); VNASSERT(vp->v_holdcnt == 0, vp, ("vdropl: freeing when we shouldn't")); active = vp->v_iflag & VI_ACTIVE; if ((vp->v_iflag & VI_OWEINACT) == 0) { vp->v_iflag &= ~VI_ACTIVE; mp = vp->v_mount; mtx_lock(&vnode_free_list_mtx); if (active) { TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize--; } TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); freevnodes++; vp->v_iflag |= VI_FREE; mtx_unlock(&vnode_free_list_mtx); } else { atomic_add_long(&free_owe_inact, 1); } VI_UNLOCK(vp); return; } /* * The vnode has been marked for destruction, so free it. * * The vnode will be returned to the zone where it will * normally remain until it is needed for another vnode. We * need to cleanup (or verify that the cleanup has already * been done) any residual data left from its current use * so as not to contaminate the freshly allocated vnode. */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); atomic_subtract_long(&numvnodes, 1); bo = &vp->v_bufobj; VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("cleaned vnode still on the free list.")); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, ("clean blk trie not empty")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp, ("dirty blk trie not empty")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp, ("Dangling rangelock waiters")); VI_UNLOCK(vp); #ifdef MAC mac_vnode_destroy(vp); #endif if (vp->v_pollinfo != NULL) { destroy_vpollinfo(vp->v_pollinfo); vp->v_pollinfo = NULL; } #ifdef INVARIANTS /* XXX Elsewhere we detect an already freed vnode via NULL v_op. */ vp->v_op = NULL; #endif bzero(&vp->v_un, sizeof(vp->v_un)); vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; vp->v_iflag = 0; vp->v_vflag = 0; bo->bo_flag = 0; uma_zfree(vnode_zone, vp); } /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. * OWEINACT tracks whether a vnode missed a call to inactive due to a * failed lock upgrade. */ void vinactive(struct vnode *vp, struct thread *td) { struct vm_object *obj; ASSERT_VOP_ELOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, ("vinactive: recursed on VI_DOINGINACT")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_iflag |= VI_DOINGINACT; vp->v_iflag &= ~VI_OWEINACT; VI_UNLOCK(vp); /* * Before moving off the active list, we must be sure that any * modified pages are converted into the vnode's dirty * buffers, since these will no longer be checked once the * vnode is on the inactive list. * * The write-out of the dirty pages is asynchronous. At the * point that VOP_INACTIVE() is called, there could still be * pending I/O and dirty pages in the object. */ obj = vp->v_object; if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, OBJPC_NOSYNC); VM_OBJECT_WUNLOCK(obj); } VOP_INACTIVE(vp, td); VI_LOCK(vp); VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, ("vinactive: lost VI_DOINGINACT")); vp->v_iflag &= ~VI_DOINGINACT; } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. * * `rootrefs' specifies the base reference count for the root vnode * of this filesystem. The root vnode is considered busy if its * v_usecount exceeds this value. On a successful return, vflush(, td) * will call vrele() on the root vnode exactly rootrefs times. * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must * be zero. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes"); #endif int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td) { struct vnode *vp, *mvp, *rootvp = NULL; struct vattr vattr; int busy = 0, error; CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp, rootrefs, flags); if (rootrefs > 0) { KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0, ("vflush: bad args")); /* * Get the filesystem root vnode. We can vput() it * immediately, since with rootrefs > 0, it won't go away. */ if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) { CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d", __func__, error); return (error); } vput(rootvp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { vholdl(vp); error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE); if (error) { vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } /* * Skip over a vnodes marked VV_SYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { VOP_UNLOCK(vp, 0); vdrop(vp); continue; } /* * If WRITECLOSE is set, flush out unlinked but still open * files (even if open only for reading) and regular file * vnodes open for writing. */ if (flags & WRITECLOSE) { if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); if (error != 0) { VOP_UNLOCK(vp, 0); vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } error = VOP_GETATTR(vp, &vattr, td->td_ucred); VI_LOCK(vp); if ((vp->v_type == VNON || (error == 0 && vattr.va_nlink > 0)) && (vp->v_writecount == 0 || vp->v_type != VREG)) { VOP_UNLOCK(vp, 0); vdropl(vp); continue; } } else VI_LOCK(vp); /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. * * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { vgonel(vp); } else { busy++; #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif } VOP_UNLOCK(vp, 0); vdropl(vp); } if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ VI_LOCK(rootvp); KASSERT(busy > 0, ("vflush: not busy")); VNASSERT(rootvp->v_usecount >= rootrefs, rootvp, ("vflush: usecount %d < rootrefs %d", rootvp->v_usecount, rootrefs)); if (busy == 1 && rootvp->v_usecount == rootrefs) { VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK); vgone(rootvp); VOP_UNLOCK(rootvp, 0); busy = 0; } else VI_UNLOCK(rootvp); } if (busy) { CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__, busy); return (EBUSY); } for (; rootrefs > 0; rootrefs--) vrele(rootvp); return (0); } /* * Recycle an unused vnode to the front of the free list. */ int vrecycle(struct vnode *vp) { int recycled; ASSERT_VOP_ELOCKED(vp, "vrecycle"); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); recycled = 0; VI_LOCK(vp); if (vp->v_usecount == 0) { recycled = 1; vgonel(vp); } VI_UNLOCK(vp); return (recycled); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(struct vnode *vp) { VI_LOCK(vp); vgonel(vp); VI_UNLOCK(vp); } static void notify_lowervp_vfs_dummy(struct mount *mp __unused, struct vnode *lowervp __unused) { } /* * Notify upper mounts about reclaimed or unlinked vnode. */ void vfs_notify_upper(struct vnode *vp, int event) { static struct vfsops vgonel_vfsops = { .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, }; struct mount *mp, *ump, *mmp; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); if (TAILQ_EMPTY(&mp->mnt_uppers)) goto unlock; MNT_IUNLOCK(mp); mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO); mmp->mnt_op = &vgonel_vfsops; mmp->mnt_kern_flag |= MNTK_MARKER; MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_VGONE_UPPER; for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) { if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) { ump = TAILQ_NEXT(ump, mnt_upper_link); continue; } TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); MNT_IUNLOCK(mp); switch (event) { case VFS_NOTIFY_UPPER_RECLAIM: VFS_RECLAIM_LOWERVP(ump, vp); break; case VFS_NOTIFY_UPPER_UNLINK: VFS_UNLINK_LOWERVP(ump, vp); break; default: KASSERT(0, ("invalid event %d", event)); break; } MNT_ILOCK(mp); ump = TAILQ_NEXT(mmp, mnt_upper_link); TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); } free(mmp, M_TEMP); mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER; if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) { mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER; wakeup(&mp->mnt_uppers); } unlock: MNT_IUNLOCK(mp); } /* * vgone, with the vp interlock held. */ static void vgonel(struct vnode *vp) { struct thread *td; int oweinact; int active; struct mount *mp; ASSERT_VOP_ELOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); VNASSERT(vp->v_holdcnt, vp, ("vgonel: vp %p has no reference.", vp)); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); td = curthread; /* * Don't vgonel if we're already doomed. */ if (vp->v_iflag & VI_DOOMED) return; vp->v_iflag |= VI_DOOMED; /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED, td); if (oweinact || active) { VI_LOCK(vp); if ((vp->v_iflag & VI_DOINGINACT) == 0) vinactive(vp, td); VI_UNLOCK(vp); } if (vp->v_type == VSOCK) vfs_unp_reclaim(vp); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ mp = NULL; if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd)) (void) vn_start_secondary_write(vp, &mp, V_WAIT); if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) { while (vinvalbuf(vp, 0, 0, 0) != 0) ; } BO_LOCK(&vp->v_bufobj); KASSERT(TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd) && vp->v_bufobj.bo_dirty.bv_cnt == 0 && TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) && vp->v_bufobj.bo_clean.bv_cnt == 0, ("vp %p bufobj not invalidated", vp)); vp->v_bufobj.bo_flag |= BO_DEAD; BO_UNLOCK(&vp->v_bufobj); /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, td)) panic("vgone: cannot reclaim"); if (mp != NULL) vn_finished_secondary_write(mp); VNASSERT(vp->v_object == NULL, vp, ("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag)); /* * Clear the advisory locks and wake up waiting threads. */ (void)VOP_ADVLOCKPURGE(vp); vp->v_lockf = NULL; /* * Delete from old mount point vnode list. */ delmntque(vp); cache_purge(vp); /* * Done with purge, reset to the standard lock and invalidate * the vnode. */ VI_LOCK(vp); vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_tag = "none"; vp->v_type = VBAD; } /* * Calculate the total number of references to a special device. */ int vcount(struct vnode *vp) { int count; dev_lock(); count = vp->v_rdev->si_usecount; dev_unlock(); return (count); } /* * Same as above, but using the struct cdev *as argument */ int count_dev(struct cdev *dev) { int count; dev_lock(); count = dev->si_usecount; dev_unlock(); return(count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD", "VMARKER"}; void vn_printf(struct vnode *vp, const char *fmt, ...) { va_list ap; char buf[256], buf2[16]; u_long flags; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); printf("tag %s, type %s\n", vp->v_tag, typename[vp->v_type]); printf(" usecount %d, writecount %d, refcount %d mountedhere %p\n", vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_mountedhere); buf[0] = '\0'; buf[1] = '\0'; if (vp->v_vflag & VV_ROOT) strlcat(buf, "|VV_ROOT", sizeof(buf)); if (vp->v_vflag & VV_ISTTY) strlcat(buf, "|VV_ISTTY", sizeof(buf)); if (vp->v_vflag & VV_NOSYNC) strlcat(buf, "|VV_NOSYNC", sizeof(buf)); if (vp->v_vflag & VV_ETERNALDEV) strlcat(buf, "|VV_ETERNALDEV", sizeof(buf)); if (vp->v_vflag & VV_CACHEDLABEL) strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf)); if (vp->v_vflag & VV_TEXT) strlcat(buf, "|VV_TEXT", sizeof(buf)); if (vp->v_vflag & VV_COPYONWRITE) strlcat(buf, "|VV_COPYONWRITE", sizeof(buf)); if (vp->v_vflag & VV_SYSTEM) strlcat(buf, "|VV_SYSTEM", sizeof(buf)); if (vp->v_vflag & VV_PROCDEP) strlcat(buf, "|VV_PROCDEP", sizeof(buf)); if (vp->v_vflag & VV_NOKNOTE) strlcat(buf, "|VV_NOKNOTE", sizeof(buf)); if (vp->v_vflag & VV_DELETED) strlcat(buf, "|VV_DELETED", sizeof(buf)); if (vp->v_vflag & VV_MD) strlcat(buf, "|VV_MD", sizeof(buf)); if (vp->v_vflag & VV_FORCEINSMQ) strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf)); flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV | VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP | VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); if (vp->v_iflag & VI_DOOMED) strlcat(buf, "|VI_DOOMED", sizeof(buf)); if (vp->v_iflag & VI_FREE) strlcat(buf, "|VI_FREE", sizeof(buf)); if (vp->v_iflag & VI_ACTIVE) strlcat(buf, "|VI_ACTIVE", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); flags = vp->v_iflag & ~(VI_MOUNT | VI_DOOMED | VI_FREE | VI_ACTIVE | VI_DOINGINACT | VI_OWEINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } printf(" flags (%s)\n", buf + 1); if (mtx_owned(VI_MTX(vp))) printf(" VI_LOCKed"); if (vp->v_object != NULL) printf(" v_object %p ref %d pages %d " "cleanbuf %d dirtybuf %d\n", vp->v_object, vp->v_object->ref_count, vp->v_object->resident_page_count, vp->v_bufobj.bo_clean.bv_cnt, vp->v_bufobj.bo_dirty.bv_cnt); printf(" "); lockmgr_printinfo(vp->v_vnlock); if (vp->v_data != NULL) VOP_PRINT(vp); } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) { struct mount *mp; struct vnode *vp; /* * Note: because this is DDB, we can't obey the locking semantics * for these structures, which means we could catch an inconsistent * state and dereference a nasty pointer. Not much to be done * about that. */ db_printf("Locked vnodes\n"); TAILQ_FOREACH(mp, &mountlist, mnt_list) { TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp)) vprint("", vp); } } } /* * Show details about the given vnode. */ DB_SHOW_COMMAND(vnode, db_show_vnode) { struct vnode *vp; if (!have_addr) return; vp = (struct vnode *)addr; vn_printf(vp, "vnode "); } /* * Show details about the given mount point. */ DB_SHOW_COMMAND(mount, db_show_mount) { struct mount *mp; struct vfsopt *opt; struct statfs *sp; struct vnode *vp; char buf[512]; uint64_t mflags; u_int flags; if (!have_addr) { /* No address given, print short info about all mount points. */ TAILQ_FOREACH(mp, &mountlist, mnt_list) { db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); if (db_pager_quit) break; } db_printf("\nMore info: show mount \n"); return; } mp = (struct mount *)addr; db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); buf[0] = '\0'; mflags = mp->mnt_flag; #define MNT_FLAG(flag) do { \ if (mflags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 4, sizeof(buf)); \ mflags &= ~(flag); \ } \ } while (0) MNT_FLAG(MNT_RDONLY); MNT_FLAG(MNT_SYNCHRONOUS); MNT_FLAG(MNT_NOEXEC); MNT_FLAG(MNT_NOSUID); MNT_FLAG(MNT_NFS4ACLS); MNT_FLAG(MNT_UNION); MNT_FLAG(MNT_ASYNC); MNT_FLAG(MNT_SUIDDIR); MNT_FLAG(MNT_SOFTDEP); MNT_FLAG(MNT_NOSYMFOLLOW); MNT_FLAG(MNT_GJOURNAL); MNT_FLAG(MNT_MULTILABEL); MNT_FLAG(MNT_ACLS); MNT_FLAG(MNT_NOATIME); MNT_FLAG(MNT_NOCLUSTERR); MNT_FLAG(MNT_NOCLUSTERW); MNT_FLAG(MNT_SUJ); MNT_FLAG(MNT_EXRDONLY); MNT_FLAG(MNT_EXPORTED); MNT_FLAG(MNT_DEFEXPORTED); MNT_FLAG(MNT_EXPORTANON); MNT_FLAG(MNT_EXKERB); MNT_FLAG(MNT_EXPUBLIC); MNT_FLAG(MNT_LOCAL); MNT_FLAG(MNT_QUOTA); MNT_FLAG(MNT_ROOTFS); MNT_FLAG(MNT_USER); MNT_FLAG(MNT_IGNORE); MNT_FLAG(MNT_UPDATE); MNT_FLAG(MNT_DELEXPORT); MNT_FLAG(MNT_RELOAD); MNT_FLAG(MNT_FORCE); MNT_FLAG(MNT_SNAPSHOT); MNT_FLAG(MNT_BYFSID); #undef MNT_FLAG if (mflags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%016jx", mflags); } db_printf(" mnt_flag = %s\n", buf); buf[0] = '\0'; flags = mp->mnt_kern_flag; #define MNT_KERN_FLAG(flag) do { \ if (flags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 5, sizeof(buf)); \ flags &= ~(flag); \ } \ } while (0) MNT_KERN_FLAG(MNTK_UNMOUNTF); MNT_KERN_FLAG(MNTK_ASYNC); MNT_KERN_FLAG(MNTK_SOFTDEP); MNT_KERN_FLAG(MNTK_NOINSMNTQ); MNT_KERN_FLAG(MNTK_DRAINING); MNT_KERN_FLAG(MNTK_REFEXPIRE); MNT_KERN_FLAG(MNTK_EXTENDED_SHARED); MNT_KERN_FLAG(MNTK_SHARED_WRITES); MNT_KERN_FLAG(MNTK_NO_IOPF); MNT_KERN_FLAG(MNTK_VGONE_UPPER); MNT_KERN_FLAG(MNTK_VGONE_WAITER); MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); MNT_KERN_FLAG(MNTK_SUSPEND); MNT_KERN_FLAG(MNTK_SUSPEND2); MNT_KERN_FLAG(MNTK_SUSPENDED); MNT_KERN_FLAG(MNTK_LOOKUP_SHARED); MNT_KERN_FLAG(MNTK_NOKNOTE); #undef MNT_KERN_FLAG if (flags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%08x", flags); } db_printf(" mnt_kern_flag = %s\n", buf); db_printf(" mnt_opt = "); opt = TAILQ_FIRST(mp->mnt_opt); if (opt != NULL) { db_printf("%s", opt->name); opt = TAILQ_NEXT(opt, link); while (opt != NULL) { db_printf(", %s", opt->name); opt = TAILQ_NEXT(opt, link); } } db_printf("\n"); sp = &mp->mnt_stat; db_printf(" mnt_stat = { version=%u type=%u flags=0x%016jx " "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju " "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju " "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n", (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags, (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize, (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree, (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files, (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites, (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads, (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax, (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]); db_printf(" mnt_cred = { uid=%u ruid=%u", (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid); if (jailed(mp->mnt_cred)) db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id); db_printf(" }\n"); db_printf(" mnt_ref = %d\n", mp->mnt_ref); db_printf(" mnt_gen = %d\n", mp->mnt_gen); db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); db_printf(" mnt_activevnodelistsize = %d\n", mp->mnt_activevnodelistsize); db_printf(" mnt_writeopcount = %d\n", mp->mnt_writeopcount); db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); db_printf(" mnt_lockref = %d\n", mp->mnt_lockref); db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); db_printf(" mnt_secondary_accwrites = %d\n", mp->mnt_secondary_accwrites); db_printf(" mnt_gjprovider = %s\n", mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL"); db_printf("\n\nList of active vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) { if (vp->v_type != VMARKER) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } db_printf("\n\nList of inactive vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } } #endif /* DDB */ /* * Fill in a struct xvfsconf based on a struct vfsconf. */ static int vfsconf2x(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf xvfsp; bzero(&xvfsp, sizeof(xvfsp)); strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; /* * These are unused in userland, we keep them * to not break binary compatibility. */ xvfsp.vfc_vfsops = NULL; xvfsp.vfc_next = NULL; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #ifdef COMPAT_FREEBSD32 struct xvfsconf32 { uint32_t vfc_vfsops; char vfc_name[MFSNAMELEN]; int32_t vfc_typenum; int32_t vfc_refcount; int32_t vfc_flags; uint32_t vfc_next; }; static int vfsconf2x32(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf32 xvfsp; strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; xvfsp.vfc_vfsops = 0; xvfsp.vfc_next = 0; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS) { struct vfsconf *vfsp; int error; error = 0; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) error = vfsconf2x32(req, vfsp); else #endif error = vfsconf2x(req, vfsp); if (error) break; } vfsconf_sunlock(); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist, "S,xvfsconf", "List of all configured filesystems"); #ifndef BURN_BRIDGES static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS); static int vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; log(LOG_WARNING, "userland calling deprecated sysctl, " "please rebuild world\n"); #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { if (vfsp->vfc_typenum == name[2]) break; } vfsconf_sunlock(); if (vfsp == NULL) return (EOPNOTSUPP); #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) return (vfsconf2x32(req, vfsp)); else #endif return (vfsconf2x(req, vfsp)); } return (EOPNOTSUPP); } static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&ovfs, sizeof(ovfs)); ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error != 0) { vfsconf_sunlock(); return (error); } } vfsconf_sunlock(); return (0); } #endif /* 1 || COMPAT_PRELITE2 */ #endif /* !BURN_BRIDGES */ #define KINFO_VNODESLOP 10 #ifdef notyet /* * Dump vnode list (via sysctl). */ /* ARGSUSED */ static int sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct xvnode *xvn; struct mount *mp; struct vnode *vp; int error, len, n; /* * Stale numvnodes access is not fatal here. */ req->lock = 0; len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, len)); error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK); n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (n == len) break; vref(vp); xvn[n].xv_size = sizeof *xvn; xvn[n].xv_vnode = vp; xvn[n].xv_id = 0; /* XXX compat */ #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field XV_COPY(usecount); XV_COPY(writecount); XV_COPY(holdcnt); XV_COPY(mount); XV_COPY(numoutput); XV_COPY(type); #undef XV_COPY xvn[n].xv_flag = vp->v_vflag; switch (vp->v_type) { case VREG: case VDIR: case VLNK: break; case VBLK: case VCHR: if (vp->v_rdev == NULL) { vrele(vp); continue; } xvn[n].xv_dev = dev2udev(vp->v_rdev); break; case VSOCK: xvn[n].xv_socket = vp->v_socket; break; case VFIFO: xvn[n].xv_fifo = vp->v_fifoinfo; break; case VNON: case VBAD: default: /* shouldn't happen? */ vrele(vp); continue; } vrele(vp); ++n; } MNT_IUNLOCK(mp); mtx_lock(&mountlist_mtx); vfs_unbusy(mp); if (n == len) break; } mtx_unlock(&mountlist_mtx); error = SYSCTL_OUT(req, xvn, n * sizeof *xvn); free(xvn, M_TEMP); return (error); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode", ""); #endif static void unmount_or_warn(struct mount *mp) { int error; error = dounmount(mp, MNT_FORCE, curthread); if (error != 0) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall(void) { struct mount *mp, *tmp; CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__); /* * Since this only runs when rebooting, it is not interlocked. */ TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) { vfs_ref(mp); /* * Forcibly unmounting "/dev" before "/" would prevent clean * unmount of the latter. */ if (mp == rootdevmp) continue; unmount_or_warn(mp); } if (rootdevmp != NULL) unmount_or_warn(rootdevmp); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *mvp; struct vm_object *obj; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) { obj = vp->v_object; if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 && (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)) { if (!vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, curthread)) { if (vp->v_vflag & VV_NOSYNC) { /* unlinked */ vput(vp); continue; } obj = vp->v_object; if (obj != NULL) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); VM_OBJECT_WUNLOCK(obj); } vput(vp); } } else VI_UNLOCK(vp); } } static void destroy_vpollinfo_free(struct vpollinfo *vi) { knlist_destroy(&vi->vpi_selinfo.si_note); mtx_destroy(&vi->vpi_lock); uma_zfree(vnodepoll_zone, vi); } static void destroy_vpollinfo(struct vpollinfo *vi) { knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); } /* * Initalize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; if (vp->v_pollinfo != NULL) return; vi = uma_zalloc(vnodepoll_zone, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); destroy_vpollinfo_free(vi); return; } vp->v_pollinfo = vi; VI_UNLOCK(vp); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { v_addpollinfo(vp); mtx_lock(&vp->v_pollinfo->vpi_lock); if (vp->v_pollinfo->vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo->vpi_revents; vp->v_pollinfo->vpi_revents &= ~events; mtx_unlock(&vp->v_pollinfo->vpi_lock); return (events); } vp->v_pollinfo->vpi_events |= events; selrecord(td, &vp->v_pollinfo->vpi_selinfo); mtx_unlock(&vp->v_pollinfo->vpi_lock); return (0); } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*)(struct vop_close_args *))nullop) static int sync_fsync(struct vop_fsync_args *); static int sync_inactive(struct vop_inactive_args *); static int sync_reclaim(struct vop_reclaim_args *); static struct vop_vector sync_vnodeops = { .vop_bypass = VOP_EOPNOTSUPP, .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ .vop_islocked = vop_stdislocked, /* islocked */ }; /* * Create a new filesystem syncer vnode for the specified mount point. */ void vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; struct bufobj *bo; static long start, incr, next; int error; /* Allocate a new vnode */ error = getnewvnode("syncer", mp, &sync_vnodeops, &vp); if (error != 0) panic("vfs_allocate_syncvnode: getnewvnode() failed"); vp->v_type = VNON; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque() failed"); vp->v_vflag &= ~VV_FORCEINSMQ; VOP_UNLOCK(vp, 0); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } bo = &vp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; if (mp->mnt_syncer == NULL) { mp->mnt_syncer = vp; vp = NULL; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); if (vp != NULL) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vgone(vp); vput(vp); } } void vfs_deallocate_syncvnode(struct mount *mp) { struct vnode *vp; mtx_lock(&sync_mtx); vp = mp->mnt_syncer; if (vp != NULL) mp->mnt_syncer = NULL; mtx_unlock(&sync_mtx); if (vp != NULL) vrele(vp); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(struct vop_fsync_args *ap) { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; int error, save; struct bufobj *bo; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ bo = &syncvp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay); BO_UNLOCK(bo); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ if (vfs_busy(mp, MBF_NOWAIT) != 0) return (0); if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { vfs_unbusy(mp); return (0); } save = curthread_pflags_set(TDP_SYNCIO); vfs_msync(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); curthread_pflags_restore(save); vn_finished_write(mp); vfs_unbusy(mp); return (error); } /* * The syncer vnode is no referenced. */ static int sync_inactive(struct vop_inactive_args *ap) { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected by sync_mtx. */ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj *bo; bo = &vp->v_bufobj; BO_LOCK(bo); mtx_lock(&sync_mtx); if (vp->v_mount->mnt_syncer == vp) vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; sync_vnode_count--; bo->bo_flag &= ~BO_ONWORKLST; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); return (0); } /* * Check if vnode represents a disk device */ int vn_isdisk(struct vnode *vp, int *errp) { int error; if (vp->v_type != VCHR) { error = ENOTBLK; goto out; } error = 0; dev_lock(); if (vp->v_rdev == NULL) error = ENXIO; else if (vp->v_rdev->si_devsw == NULL) error = ENXIO; else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK)) error = ENOTBLK; dev_unlock(); out: if (errp != NULL) *errp = error; return (error == 0); } /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() * to indicate to the caller whether privilege was used to satisfy the * request (obsoleted). Returns 0 on success, or an errno on failure. */ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused) { accmode_t dac_granted; accmode_t priv_granted; KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE), ("VAPPEND without VWRITE")); /* * Look for a normal, non-privileged way to access the file/directory * as requested. If it exists, go with that. */ if (privused != NULL) *privused = 0; dac_granted = 0; /* Check the owner. */ if (cred->cr_uid == file_uid) { dac_granted |= VADMIN; if (file_mode & S_IXUSR) dac_granted |= VEXEC; if (file_mode & S_IRUSR) dac_granted |= VREAD; if (file_mode & S_IWUSR) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check the groups (first match) */ if (groupmember(file_gid, cred)) { if (file_mode & S_IXGRP) dac_granted |= VEXEC; if (file_mode & S_IRGRP) dac_granted |= VREAD; if (file_mode & S_IWGRP) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check everyone else. */ if (file_mode & S_IXOTH) dac_granted |= VEXEC; if (file_mode & S_IROTH) dac_granted |= VREAD; if (file_mode & S_IWOTH) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); privcheck: /* * Build a privilege mask to determine if the set of privileges * satisfies the requirements when combined with the granted mask * from above. For each privilege, if the privilege is required, * bitwise or the request type onto the priv_granted mask. */ priv_granted = 0; if (type == VDIR) { /* * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC * requests, instead of PRIV_VFS_EXEC. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_LOOKUP, 0)) priv_granted |= VEXEC; } else { /* * Ensure that at least one execute bit is on. Otherwise, * a privileged user will always succeed, and we don't want * this to happen unless the file really is executable. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 && !priv_check_cred(cred, PRIV_VFS_EXEC, 0)) priv_granted |= VEXEC; } if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) && !priv_check_cred(cred, PRIV_VFS_READ, 0)) priv_granted |= VREAD; if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) && !priv_check_cred(cred, PRIV_VFS_WRITE, 0)) priv_granted |= (VWRITE | VAPPEND); if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) && !priv_check_cred(cred, PRIV_VFS_ADMIN, 0)) priv_granted |= VADMIN; if ((accmode & (priv_granted | dac_granted)) == accmode) { /* XXX audit: privilege used */ if (privused != NULL) *privused = 1; return (0); } return ((accmode & VADMIN) ? EPERM : EACCES); } /* * Credential check based on process requesting service, and per-attribute * permissions. */ int extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, struct thread *td, accmode_t accmode) { /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: /* Potentially should be: return (EPERM); */ return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM, 0)); case EXTATTR_NAMESPACE_USER: return (VOP_ACCESS(vp, accmode, cred, td)); default: return (EPERM); } } #ifdef DEBUG_VFS_LOCKS /* * This only exists to supress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) (panicstr != NULL || (vp) == NULL || \ (vp)->v_type == VCHR || (vp)->v_type == VBAD) int vfs_badlock_ddb = 1; /* Drop into debugger on violation. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0, "Drop into debugger on lock violation"); int vfs_badlock_mutex = 1; /* Check for interlock across VOPs. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex, 0, "Check for interlock across VOPs"); int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, "Print lock violations"); #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, &vfs_badlock_backtrace, 0, "Print backtrace at lock violations"); #endif static void vfs_badlock(const char *msg, const char *str, struct vnode *vp) { #ifdef KDB if (vfs_badlock_backtrace) kdb_backtrace(); #endif if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } void assert_vi_locked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is not locked but should be", str, vp); } void assert_vi_unlocked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is locked but should not be", str, vp); } void assert_vop_locked(struct vnode *vp, const char *str) { int locked; if (!IGNORE_LOCK(vp)) { locked = VOP_ISLOCKED(vp); if (locked == 0 || locked == LK_EXCLOTHER) vfs_badlock("is not locked but should be", str, vp); } } void assert_vop_unlocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) vfs_badlock("is locked but should not be", str, vp); } void assert_vop_elocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vfs_badlock("is not exclusive locked but should be", str, vp); } #if 0 void assert_vop_elocked_other(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLOTHER) vfs_badlock("is not exclusive locked by another thread", str, vp); } void assert_vop_slocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_SHARED) vfs_badlock("is not locked shared but should be", str, vp); } #endif /* 0 */ #endif /* DEBUG_VFS_LOCKS */ void vop_rename_fail(struct vop_rename_args *ap) { if (ap->a_tvp != NULL) vput(ap->a_tvp); if (ap->a_tdvp == ap->a_tvp) vrele(ap->a_tdvp); else vput(ap->a_tdvp); vrele(ap->a_fdvp); vrele(ap->a_fvp); } void vop_rename_pre(void *ap) { struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME"); /* Check the source (from). */ if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock && (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock)) ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked"); if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock) ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked"); /* Check the target. */ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) vhold(a->a_fvp); vhold(a->a_tdvp); if (a->a_tvp) vhold(a->a_tvp); } void vop_strategy_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_strategy_args *a; struct buf *bp; a = ap; bp = a->a_bp; /* * Cluster ops lock their component buffers but not the IO container. */ if ((bp->b_flags & B_CLUSTER) != 0) return; if (panicstr == NULL && !BUF_ISLOCKED(bp)) { if (vfs_badlock_print) printf( "VOP_STRATEGY: bp is not locked but should be\n"); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } #endif } void vop_lock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_lock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_unlock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_LOCKED(a->a_vp, "VOP_UNLOCK"); ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_unlock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_create_post(void *ap, int rc) { struct vop_create_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_deleteextattr_post(void *ap, int rc) { struct vop_deleteextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_link_post(void *ap, int rc) { struct vop_link_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } void vop_mkdir_post(void *ap, int rc) { struct vop_mkdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void vop_mknod_post(void *ap, int rc) { struct vop_mknod_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_reclaim_post(void *ap, int rc) { struct vop_reclaim_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_REVOKE); } void vop_remove_post(void *ap, int rc) { struct vop_remove_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_rename_post(void *ap, int rc) { struct vop_rename_args *a = ap; if (!rc) { VFS_KNOTE_UNLOCKED(a->a_fdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_tdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); if (a->a_tvp != a->a_fvp) vdrop(a->a_fvp); vdrop(a->a_tdvp); if (a->a_tvp) vdrop(a->a_tvp); } void vop_rmdir_post(void *ap, int rc) { struct vop_rmdir_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_setattr_post(void *ap, int rc) { struct vop_setattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_setextattr_post(void *ap, int rc) { struct vop_setextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_symlink_post(void *ap, int rc) { struct vop_symlink_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } static struct knlist fs_knlist; static void vfs_event_init(void *arg) { knlist_init_mtx(&fs_knlist, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); void vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused) { KNOTE_UNLOCKED(&fs_knlist, event); } static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); struct filterops fs_filtops = { .f_isfd = 0, .f_attach = filt_fsattach, .f_detach = filt_fsdetach, .f_event = filt_fsevent }; static int filt_fsattach(struct knote *kn) { kn->kn_flags |= EV_CLEAR; knlist_add(&fs_knlist, kn, 0); return (0); } static void filt_fsdetach(struct knote *kn) { knlist_remove(&fs_knlist, kn, 0); } static int filt_fsevent(struct knote *kn, long hint) { kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } static int sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS) { struct vfsidctl vc; int error; struct mount *mp; error = SYSCTL_IN(req, &vc, sizeof(vc)); if (error) return (error); if (vc.vc_vers != VFS_CTL_VERS1) return (EINVAL); mp = vfs_getvfs(&vc.vc_fsid); if (mp == NULL) return (ENOENT); /* ensure that a specific sysctl goes to the right filesystem. */ if (strcmp(vc.vc_fstypename, "*") != 0 && strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) { vfs_rel(mp); return (EINVAL); } VCTLTOREQ(&vc, req); error = VFS_SYSCTL(mp, vc.vc_op, req); vfs_rel(mp); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "", "Sysctl by fsid"); /* * Function to initialize a va_filerev field sensibly. * XXX: Wouldn't a random number make a lot more sense ?? */ u_quad_t init_va_filerev(void) { struct bintime bt; getbinuptime(&bt); return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL)); } static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static struct filterops vfsread_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsread }; static struct filterops vfswrite_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfswrite }; static struct filterops vfsvnode_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsvnode }; static void vfs_knllock(void *arg) { struct vnode *vp = arg; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } static void vfs_knlunlock(void *arg) { struct vnode *vp = arg; VOP_UNLOCK(vp, 0); } static void vfs_knl_assert_locked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked"); #endif } static void vfs_knl_assert_unlocked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked"); #endif } int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vfsread_filtops; break; case EVFILT_WRITE: kn->kn_fop = &vfswrite_filtops; break; case EVFILT_VNODE: kn->kn_fop = &vfsvnode_filtops; break; default: return (EINVAL); } kn->kn_hook = (caddr_t)vp; v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); knl = &vp->v_pollinfo->vpi_selinfo.si_note; vhold(vp); knlist_add(knl, kn, 0); return (0); } /* * Detach knote from vnode */ static void filt_vfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo")); knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); vdrop(vp); } /*ARGSUSED*/ static int filt_vfsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; struct vattr va; int res; /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) { VI_LOCK(vp); kn->kn_flags |= (EV_EOF | EV_ONESHOT); VI_UNLOCK(vp); return (1); } if (VOP_GETATTR(vp, &va, curthread->td_ucred)) return (0); VI_LOCK(vp); kn->kn_data = va.va_size - kn->kn_fp->f_offset; res = (kn->kn_sfflags & NOTE_FILE_POLL) != 0 || kn->kn_data != 0; VI_UNLOCK(vp); return (res); } /*ARGSUSED*/ static int filt_vfswrite(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; VI_LOCK(vp); /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = 0; VI_UNLOCK(vp); return (1); } static int filt_vfsvnode(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; int res; VI_LOCK(vp); if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) { kn->kn_flags |= EV_EOF; VI_UNLOCK(vp); return (1); } res = (kn->kn_fflags != 0); VI_UNLOCK(vp); return (res); } int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { int error; if (dp->d_reclen > ap->a_uio->uio_resid) return (ENAMETOOLONG); error = uiomove(dp, dp->d_reclen, ap->a_uio); if (error) { if (ap->a_ncookies != NULL) { if (ap->a_cookies != NULL) free(ap->a_cookies, M_TEMP); ap->a_cookies = NULL; *ap->a_ncookies = 0; } return (error); } if (ap->a_ncookies == NULL) return (0); KASSERT(ap->a_cookies, ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!")); *ap->a_cookies = realloc(*ap->a_cookies, (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO); (*ap->a_cookies)[*ap->a_ncookies] = off; return (0); } /* * Mark for update the access time of the file if the filesystem * supports VOP_MARKATIME. This functionality is used by execve and * mmap, so we want to avoid the I/O implied by directly setting * va_atime for the sake of efficiency. */ void vfs_mark_atime(struct vnode *vp, struct ucred *cred) { struct mount *mp; mp = vp->v_mount; ASSERT_VOP_LOCKED(vp, "vfs_mark_atime"); if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) (void)VOP_MARKATIME(vp); } /* * The purpose of this routine is to remove granularity from accmode_t, * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE, * VADMIN and VAPPEND. * * If it returns 0, the caller is supposed to continue with the usual * access checks using 'accmode' as modified by this routine. If it * returns nonzero value, the caller is supposed to return that value * as errno. * * Note that after this routine runs, accmode may be zero. */ int vfs_unixify_accmode(accmode_t *accmode) { /* * There is no way to specify explicit "deny" rule using * file mode or POSIX.1e ACLs. */ if (*accmode & VEXPLICIT_DENY) { *accmode = 0; return (0); } /* * None of these can be translated into usual access bits. * Also, the common case for NFSv4 ACLs is to not contain * either of these bits. Caller should check for VWRITE * on the containing directory instead. */ if (*accmode & (VDELETE_CHILD | VDELETE)) return (EPERM); if (*accmode & VADMIN_PERMS) { *accmode &= ~VADMIN_PERMS; *accmode |= VADMIN; } /* * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL * or VSYNCHRONIZE using file mode or POSIX.1e ACL. */ *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE); return (0); } /* * These are helper functions for filesystems to traverse all * their vnodes. See MNT_VNODE_FOREACH_ALL() in sys/mount.h. * * This interface replaces MNT_VNODE_FOREACH. */ MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); struct vnode * __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; if (should_yield()) kern_yield(PRI_USER); MNT_ILOCK(mp); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); vp = TAILQ_NEXT(*mvp, v_nmntvnodes); while (vp != NULL && (vp->v_type == VMARKER || (vp->v_iflag & VI_DOOMED) != 0)) vp = TAILQ_NEXT(vp, v_nmntvnodes); /* Check if we are done */ if (vp == NULL) { __mnt_vnode_markerfree_all(mvp, mp); /* MNT_IUNLOCK(mp); -- done in above function */ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); return (NULL); } TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); VI_LOCK(vp); MNT_IUNLOCK(mp); return (vp); } struct vnode * __mnt_vnode_first_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; *mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); MNT_ILOCK(mp); MNT_REF(mp); (*mvp)->v_type = VMARKER; vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && (vp->v_type == VMARKER || (vp->v_iflag & VI_DOOMED) != 0)) vp = TAILQ_NEXT(vp, v_nmntvnodes); /* Check if we are done */ if (vp == NULL) { MNT_REL(mp); MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); *mvp = NULL; return (NULL); } (*mvp)->v_mount = mp; TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); VI_LOCK(vp); MNT_IUNLOCK(mp); return (vp); } void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) { MNT_IUNLOCK(mp); return; } mtx_assert(MNT_MTX(mp), MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); MNT_REL(mp); MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); *mvp = NULL; } /* * These are helper functions for filesystems to traverse their * active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h */ static void mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) { KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); MNT_ILOCK(mp); MNT_REL(mp); MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); *mvp = NULL; } static struct vnode * mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) { struct vnode *vp, *nvp; mtx_assert(&vnode_free_list_mtx, MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); restart: vp = TAILQ_NEXT(*mvp, v_actfreelist); TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); while (vp != NULL) { if (vp->v_type == VMARKER) { vp = TAILQ_NEXT(vp, v_actfreelist); continue; } if (!VI_TRYLOCK(vp)) { if (mp_ncpus == 1 || should_yield()) { TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); pause("vnacti", 1); mtx_lock(&vnode_free_list_mtx); goto restart; } continue; } KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp)); KASSERT(vp->v_mount == mp || vp->v_mount == NULL, ("alien vnode on the active list %p %p", vp, mp)); if (vp->v_mount == mp && (vp->v_iflag & VI_DOOMED) == 0) break; nvp = TAILQ_NEXT(vp, v_actfreelist); VI_UNLOCK(vp); vp = nvp; } /* Check if we are done */ if (vp == NULL) { mtx_unlock(&vnode_free_list_mtx); mnt_vnode_markerfree_active(mvp, mp); return (NULL); } TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); ASSERT_VI_LOCKED(vp, "active iter"); KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp)); return (vp); } struct vnode * __mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) { if (should_yield()) kern_yield(PRI_USER); mtx_lock(&vnode_free_list_mtx); return (mnt_vnode_next_active(mvp, mp)); } struct vnode * __mnt_vnode_first_active(struct vnode **mvp, struct mount *mp) { struct vnode *vp; *mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); MNT_ILOCK(mp); MNT_REF(mp); MNT_IUNLOCK(mp); (*mvp)->v_type = VMARKER; (*mvp)->v_mount = mp; mtx_lock(&vnode_free_list_mtx); vp = TAILQ_FIRST(&mp->mnt_activevnodelist); if (vp == NULL) { mtx_unlock(&vnode_free_list_mtx); mnt_vnode_markerfree_active(mvp, mp); return (NULL); } TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); return (mnt_vnode_next_active(mvp, mp)); } void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) return; mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); mnt_vnode_markerfree_active(mvp, mp); } Index: projects/release-pkg/sys =================================================================== --- projects/release-pkg/sys (revision 293224) +++ projects/release-pkg/sys (revision 293225) Property changes on: projects/release-pkg/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r293196-293224 Index: projects/release-pkg/usr.sbin/bsdinstall/scripts/auto =================================================================== --- projects/release-pkg/usr.sbin/bsdinstall/scripts/auto (revision 293224) +++ projects/release-pkg/usr.sbin/bsdinstall/scripts/auto (revision 293225) @@ -1,414 +1,470 @@ #!/bin/sh #- # Copyright (c) 2011 Nathan Whitehorn # Copyright (c) 2013 Devin Teske # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # ############################################################ INCLUDES BSDCFG_SHARE="/usr/share/bsdconfig" . $BSDCFG_SHARE/common.subr || exit 1 f_include $BSDCFG_SHARE/dialog.subr ############################################################ FUNCTIONS error() { local msg if [ -n "$1" ]; then msg="$1\n\n" fi test -n "$DISTDIR_IS_UNIONFS" && umount -f $BSDINSTALL_DISTDIR test -f $PATH_FSTAB && bsdinstall umount dialog --backtitle "FreeBSD Installer" --title "Abort" \ --no-label "Exit" --yes-label "Restart" --yesno \ "${msg}An installation step has been aborted. Would you like to restart the installation or exit the installer?" 0 0 if [ $? -ne 0 ]; then exit 1 else exec $0 fi } hline_arrows_tab_enter="Press arrows, TAB or ENTER" msg_gpt_active_fix="Your hardware is known to have issues booting in CSM/Legacy/BIOS mode from GPT partitions that are not set active. Would you like the installer to apply this workaround for you?" msg_lenovo_fix="Your model of Lenovo is known to have a BIOS bug that prevents it booting from GPT partitions without UEFI. Would you like the installer to apply a workaround for you?" msg_no="NO" msg_yes="YES" # dialog_workaround # # Ask the user if they wish to apply a workaround # dialog_workaround() { local passed_msg="$1" local title="$DIALOG_TITLE" local btitle="$DIALOG_BACKTITLE" local prompt # Calculated below local hline="$hline_arrows_tab_enter" local height=8 width=50 prefix=" " local plen=${#prefix} list= line= local max_width=$(( $width - 3 - $plen )) local yes no defaultno extra_args format if [ "$USE_XDIALOG" ]; then yes=ok no=cancel defaultno=default-no extra_args="--wrap --left" format="$passed_msg" else yes=yes no=no defaultno=defaultno extra_args="--cr-wrap" format="$passed_msg" fi # Add height for Xdialog(1) [ "$USE_XDIALOG" ] && height=$(( $height + $height / 5 + 3 )) prompt=$( printf "$format" ) f_dprintf "%s: Workaround prompt" "$0" $DIALOG \ --title "$title" \ --backtitle "$btitle" \ --hline "$hline" \ --$yes-label "$msg_yes" \ --$no-label "$msg_no" \ $extra_args \ --yesno "$prompt" $height $width } ############################################################ MAIN f_dprintf "Began Installation at %s" "$( date )" rm -rf $BSDINSTALL_TMPETC mkdir $BSDINSTALL_TMPETC trap true SIGINT # This section is optional bsdinstall keymap trap error SIGINT # Catch cntrl-C here bsdinstall hostname || error "Set hostname failed" export DISTRIBUTIONS="base.txz kernel.txz" if [ -f $BSDINSTALL_DISTDIR/MANIFEST ]; then - DISTMENU=`awk -F'\t' '!/^(kernel|base)/{print $4,$5,$6}' $BSDINSTALL_DISTDIR/MANIFEST` + DISTMENU=`awk -F'\t' '!/^(kernel\.txz|base\.txz)/{print $1,$5,$6}' $BSDINSTALL_DISTDIR/MANIFEST` + DISTMENU="$(echo ${DISTMENU} | sed -E 's/\.txz//g')" exec 3>&1 EXTRA_DISTS=$( eval dialog \ --backtitle \"FreeBSD Installer\" \ --title \"Distribution Select\" --nocancel --separate-output \ --checklist \"Choose optional system components to install:\" \ 0 0 0 $DISTMENU \ 2>&1 1>&3 ) for dist in $EXTRA_DISTS; do export DISTRIBUTIONS="$DISTRIBUTIONS $dist.txz" done fi +LOCAL_DISTRIBUTIONS="MANIFEST" FETCH_DISTRIBUTIONS="" for dist in $DISTRIBUTIONS; do if [ ! -f $BSDINSTALL_DISTDIR/$dist ]; then FETCH_DISTRIBUTIONS="$FETCH_DISTRIBUTIONS $dist" + else + LOCAL_DISTRIBUTIONS="$LOCAL_DISTRIBUTIONS $dist" fi done +LOCAL_DISTRIBUTIONS=`echo $LOCAL_DISTRIBUTIONS` # Trim white space FETCH_DISTRIBUTIONS=`echo $FETCH_DISTRIBUTIONS` # Trim white space if [ -n "$FETCH_DISTRIBUTIONS" -a -n "$BSDINSTALL_CONFIGCURRENT" ]; then - dialog --backtitle "FreeBSD Installer" --title "Network Installation" --msgbox "No installation files were found on the boot volume. The next few screens will allow you to configure networking so that they can be downloaded from the Internet." 0 0 + dialog --backtitle "FreeBSD Installer" --title "Network Installation" --msgbox "Some installation files were not found on the boot volume. The next few screens will allow you to configure networking so that they can be downloaded from the Internet." 0 0 bsdinstall netconfig || error NETCONFIG_DONE=yes fi if [ -n "$FETCH_DISTRIBUTIONS" ]; then exec 3>&1 BSDINSTALL_DISTSITE=$(`dirname $0`/mirrorselect 2>&1 1>&3) MIRROR_BUTTON=$? exec 3>&- test $MIRROR_BUTTON -eq 0 || error "No mirror selected" export BSDINSTALL_DISTSITE fi rm -f $PATH_FSTAB touch $PATH_FSTAB # # Try to detect known broken platforms and apply their workarounds # if f_interactive; then sys_maker=$( kenv -q smbios.system.maker ) f_dprintf "smbios.system.maker=[%s]" "$sys_maker" sys_model=$( kenv -q smbios.system.product ) f_dprintf "smbios.system.product=[%s]" "$sys_model" sys_version=$( kenv -q smbios.system.version ) f_dprintf "smbios.system.version=[%s]" "$sys_version" sys_mb_maker=$( kenv -q smbios.planar.maker ) f_dprintf "smbios.planar.maker=[%s]" "$sys_mb_maker" sys_mb_product=$( kenv -q smbios.planar.product ) f_dprintf "smbios.planar.product=[%s]" "$sys_mb_product" # # Laptop Models # case "$sys_maker" in "LENOVO") case "$sys_version" in "ThinkPad X220"|"ThinkPad T420"|"ThinkPad T520") dialog_workaround "$msg_lenovo_fix" retval=$? f_dprintf "lenovofix_prompt=[%s]" "$retval" if [ $retval -eq $DIALOG_OK ]; then export ZFSBOOT_PARTITION_SCHEME="GPT + Lenovo Fix" export WORKAROUND_LENOVO=1 fi ;; esac ;; "Dell Inc.") case "$sys_model" in "Latitude E7440"|"Latitude E7240") dialog_workaround "$msg_gpt_active_fix" retval=$? f_dprintf "gpt_active_fix_prompt=[%s]" "$retval" if [ $retval -eq $DIALOG_OK ]; then export ZFSBOOT_PARTITION_SCHEME="GPT + Active" export WORKAROUND_GPTACTIVE=1 fi ;; esac ;; "Hewlett-Packard") case "$sys_model" in "HP ProBook 4330s") dialog_workaround "$msg_gpt_active_fix" retval=$? f_dprintf "gpt_active_fix_prompt=[%s]" "$retval" if [ $retval -eq $DIALOG_OK ]; then export ZFSBOOT_PARTITION_SCHEME="GPT + Active" export WORKAROUND_GPTACTIVE=1 fi ;; esac ;; esac # # Motherboard Models # case "$sys_mb_maker" in "Intel Corporation") case "$sys_mb_product" in "DP965LT"|"D510MO") dialog_workaround "$msg_gpt_active_fix" retval=$? f_dprintf "gpt_active_fix_prompt=[%s]" "$retval" if [ $retval -eq $DIALOG_OK ]; then export ZFSBOOT_PARTITION_SCHEME="GPT + Active" export WORKAROUND_GPTACTIVE=1 fi ;; esac ;; "Acer") case "$sys_mb_product" in "Veriton M6630G") dialog_workaround "$msg_gpt_active_fix" retval=$? f_dprintf "gpt_active_fix_prompt=[%s]" "$retval" if [ $retval -eq $DIALOG_OK ]; then export ZFSBOOT_PARTITION_SCHEME="GPT + Active" export WORKAROUND_GPTACTIVE=1 fi ;; esac ;; esac fi PMODES="\ \"Auto (UFS)\" \"Guided Disk Setup\" \ Manual \"Manual Disk Setup (experts)\" \ Shell \"Open a shell and partition by hand\"" CURARCH=$( uname -m ) case $CURARCH in amd64|i386) # Booting ZFS Supported PMODES="$PMODES \"Auto (ZFS)\" \"Guided Root-on-ZFS\"" ;; *) # Booting ZFS Unspported ;; esac exec 3>&1 PARTMODE=`echo $PMODES | xargs dialog --backtitle "FreeBSD Installer" \ --title "Partitioning" \ --menu "How would you like to partition your disk?" \ 0 0 0 2>&1 1>&3` || exit 1 exec 3>&- case "$PARTMODE" in "Auto (UFS)") # Guided bsdinstall autopart || error "Partitioning error" bsdinstall mount || error "Failed to mount filesystem" ;; "Shell") # Shell clear echo "Use this shell to set up partitions for the new system. When finished, mount the system at $BSDINSTALL_CHROOT and place an fstab file for the new system at $PATH_FSTAB. Then type 'exit'. You can also enter the partition editor at any time by entering 'bsdinstall partedit'." sh 2>&1 ;; "Manual") # Manual if f_isset debugFile; then # Give partedit the path to our logfile so it can append BSDINSTALL_LOG="${debugFile#+}" bsdinstall partedit || error "Partitioning error" else bsdinstall partedit || error "Partitioning error" fi bsdinstall mount || error "Failed to mount filesystem" ;; "Auto (ZFS)") # ZFS bsdinstall zfsboot || error "ZFS setup failed" bsdinstall mount || error "Failed to mount filesystem" ;; *) error "Unknown partitioning mode" ;; esac if [ ! -z "$FETCH_DISTRIBUTIONS" ]; then ALL_DISTRIBUTIONS="$DISTRIBUTIONS" + WANT_DEBUG= # Download to a directory in the new system as scratch space BSDINSTALL_FETCHDEST="$BSDINSTALL_CHROOT/usr/freebsd-dist" mkdir -p "$BSDINSTALL_FETCHDEST" || error "Could not create directory $BSDINSTALL_FETCHDEST" export DISTRIBUTIONS="$FETCH_DISTRIBUTIONS" # Try to use any existing distfiles if [ -d $BSDINSTALL_DISTDIR ]; then DISTDIR_IS_UNIONFS=1 mount_nullfs -o union "$BSDINSTALL_FETCHDEST" "$BSDINSTALL_DISTDIR" else - export DISTRIBUTIONS="$ALL_DISTRIBUTIONS" + export DISTRIBUTIONS="$FETCH_DISTRIBUTIONS" export BSDINSTALL_DISTDIR="$BSDINSTALL_FETCHDEST" fi export FTP_PASSIVE_MODE=YES - bsdinstall distfetch || error "Failed to fetch distribution" + # Iterate through the distribution list and set a flag if debugging + # distributions have been selected. + for _DISTRIBUTION in $DISTRIBUTIONS; do + case $_DISTRIBUTION in + *-dbg.*) + [ -e $BSDINSTALL_DISTDIR/$_DISTRIBUTION ] \ + && continue + WANT_DEBUG=1 + DEBUG_LIST="\n$DEBUG_LIST\n$_DISTRIBUTION" + ;; + *) + ;; + esac + done + + # Fetch the distributions. + bsdinstall distfetch + rc=$? + + if [ $rc -ne 0 ]; then + # If unable to fetch the remote distributions, recommend + # deselecting the debugging distributions, and retrying the + # installation, since failure to fetch *-dbg.txz should not + # be considered a fatal installation error. + msg="Failed to fetch remote distribution" + if [ ! -z "$WANT_DEBUG" ]; then + # Trim leading and trailing newlines. + DEBUG_LIST="${DEBUG_LIST%%\n}" + DEBUG_LIST="${DEBUG_LIST##\n}" + msg="$msg\n\nPlease deselect the following distributions" + msg="$msg and retry the installation:" + msg="$msg\n$DEBUG_LIST" + fi + error "$msg" + fi export DISTRIBUTIONS="$ALL_DISTRIBUTIONS" +fi + +if [ ! -z "$LOCAL_DISTRIBUTIONS" ]; then + # Download to a directory in the new system as scratch space + BSDINSTALL_FETCHDEST="$BSDINSTALL_CHROOT/usr/freebsd-dist" + mkdir -p "$BSDINSTALL_FETCHDEST" || error "Could not create directory $BSDINSTALL_FETCHDEST" + # Try to use any existing distfiles + if [ -d $BSDINSTALL_DISTDIR ]; then + DISTDIR_IS_UNIONFS=1 + mount_nullfs -o union "$BSDINSTALL_FETCHDEST" "$BSDINSTALL_DISTDIR" + export BSDINSTALL_DISTDIR="$BSDINSTALL_FETCHDEST" + fi + env DISTRIBUTIONS="$LOCAL_DISTRIBUTIONS" \ + BSDINSTALL_DISTSITE="file:///usr/freebsd-dist" \ + bsdinstall distfetch || \ + error "Failed to fetch distribution from local media" fi bsdinstall checksum || error "Distribution checksum failed" bsdinstall distextract || error "Distribution extract failed" bsdinstall rootpass || error "Could not set root password" trap true SIGINT # This section is optional if [ "$NETCONFIG_DONE" != yes ]; then bsdinstall netconfig # Don't check for errors -- the user may cancel fi bsdinstall time bsdinstall services dialog --backtitle "FreeBSD Installer" --title "Add User Accounts" --yesno \ "Would you like to add users to the installed system now?" 0 0 && \ bsdinstall adduser finalconfig() { exec 3>&1 REVISIT=$(dialog --backtitle "FreeBSD Installer" \ --title "Final Configuration" --no-cancel --menu \ "Setup of your FreeBSD system is nearly complete. You can now modify your configuration choices. After this screen, you will have an opportunity to make more complex changes using a shell." 0 0 0 \ "Exit" "Apply configuration and exit installer" \ "Add User" "Add a user to the system" \ "Root Password" "Change root password" \ "Hostname" "Set system hostname" \ "Network" "Networking configuration" \ "Services" "Set daemons to run on startup" \ "Time Zone" "Set system timezone" \ "Handbook" "Install FreeBSD Handbook (requires network)" 2>&1 1>&3) exec 3>&- case "$REVISIT" in "Add User") bsdinstall adduser finalconfig ;; "Root Password") bsdinstall rootpass finalconfig ;; "Hostname") bsdinstall hostname finalconfig ;; "Network") bsdinstall netconfig finalconfig ;; "Services") bsdinstall services finalconfig ;; "Time Zone") bsdinstall time finalconfig ;; "Handbook") bsdinstall docsinstall finalconfig ;; esac } # Allow user to change his mind finalconfig trap error SIGINT # SIGINT is bad again bsdinstall config || error "Failed to save config" if [ ! -z "$BSDINSTALL_FETCHDEST" ]; then [ "$BSDINSTALL_FETCHDEST" != "$BSDINSTALL_DISTDIR" ] && \ umount "$BSDINSTALL_DISTDIR" rm -rf "$BSDINSTALL_FETCHDEST" fi dialog --backtitle "FreeBSD Installer" --title "Manual Configuration" \ --default-button no --yesno \ "The installation is now finished. Before exiting the installer, would you like to open a shell in the new system to make any final manual modifications?" 0 0 if [ $? -eq 0 ]; then clear mount -t devfs devfs "$BSDINSTALL_CHROOT/dev" echo This shell is operating in a chroot in the new system. \ When finished making configuration changes, type \"exit\". chroot "$BSDINSTALL_CHROOT" /bin/sh 2>&1 fi bsdinstall entropy bsdinstall umount f_dprintf "Installation Completed at %s" "$( date )" ################################################################################ # END ################################################################################ Index: projects/release-pkg/usr.sbin/cron/crontab/crontab.5 =================================================================== --- projects/release-pkg/usr.sbin/cron/crontab/crontab.5 (revision 293224) +++ projects/release-pkg/usr.sbin/cron/crontab/crontab.5 (revision 293225) @@ -1,323 +1,323 @@ .\"/* Copyright 1988,1990,1993,1994 by Paul Vixie .\" * All rights reserved .\" * .\" * Distribute freely, except: don't remove my name from the source or .\" * documentation (don't take credit for my work), mark your changes (don't .\" * get me blamed for your possible bugs), don't alter or remove this .\" * notice. May be sold if buildable source is provided to buyer. No .\" * warrantee of any kind, express or implied, is included with this .\" * software; use at your own risk, responsibility for damages (if any) to .\" * anyone resulting from the use of this software rests entirely with the .\" * user. .\" * .\" * Send bug reports, bug fixes, enhancements, requests, flames, etc., and .\" * I'll try to keep a version up to date. I can be reached as follows: .\" * Paul Vixie uunet!decwrl!vixie!paul .\" */ .\" .\" $FreeBSD$ .\" -.Dd April 28, 2012 +.Dd January 5, 2016 .Dt CRONTAB 5 .Os .Sh NAME .Nm crontab .Nd tables for driving cron .Sh DESCRIPTION A .Nm file contains instructions to the .Xr cron 8 daemon of the general form: ``run this command at this time on this date''. Each user has their own crontab, and commands in any given crontab will be executed as the user who owns the crontab. Uucp and News will usually have their own crontabs, eliminating the need for explicitly running .Xr su 1 as part of a cron command. .Pp Blank lines and leading spaces and tabs are ignored. Lines whose first non-space character is a pound-sign (#) are comments, and are ignored. Note that comments are not allowed on the same line as cron commands, since they will be taken to be part of the command. Similarly, comments are not allowed on the same line as environment variable settings. .Pp An active line in a crontab will be either an environment setting or a cron command. An environment setting is of the form, .Bd -literal name = value .Ed .Pp where the spaces around the equal-sign (=) are optional, and any subsequent non-leading spaces in .Em value will be part of the value assigned to .Em name . The .Em value string may be placed in quotes (single or double, but matching) to preserve leading or trailing blanks. The .Em name string may also be placed in quote (single or double, but matching) to preserve leading, trailing or inner blanks. .Pp Several environment variables are set up automatically by the .Xr cron 8 daemon. .Ev SHELL is set to .Pa /bin/sh , .Ev PATH is set to -.Pa /usr/bin:/bin , +.Pa /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin , and .Ev LOGNAME and .Ev HOME are set from the .Pa /etc/passwd line of the crontab's owner. .Ev HOME , .Ev PATH and .Ev SHELL may be overridden by settings in the crontab; .Ev LOGNAME may not. .Pp (Another note: the .Ev LOGNAME variable is sometimes called .Ev USER on .Bx systems... On these systems, .Ev USER will be set also). .Pp In addition to .Ev LOGNAME , .Ev HOME , .Ev PATH , and .Ev SHELL , .Xr cron 8 will look at .Ev MAILTO if it has any reason to send mail as a result of running commands in ``this'' crontab. If .Ev MAILTO is defined (and non-empty), mail is sent to the user so named. .Ev MAILTO may also be used to direct mail to multiple recipients by separating recipient users with a comma. If .Ev MAILTO is defined but empty (MAILTO=""), no mail will be sent. Otherwise mail is sent to the owner of the crontab. This option is useful if you decide on .Pa /bin/mail instead of .Pa /usr/lib/sendmail as your mailer when you install cron -- .Pa /bin/mail does not do aliasing, and UUCP usually does not read its mail. .Pp The format of a cron command is very much the V7 standard, with a number of upward-compatible extensions. Each line has five time and date fields, followed by a user name (with optional ``:'' and ``/'' suffixes) if this is the system crontab file, followed by a command. Commands are executed by .Xr cron 8 when the minute, hour, and month of year fields match the current time, .Em and when at least one of the two day fields (day of month, or day of week) matches the current time (see ``Note'' below). .Xr cron 8 examines cron entries once every minute. The time and date fields are: .Bd -literal -offset indent field allowed values ----- -------------- minute 0-59 hour 0-23 day of month 1-31 month 1-12 (or names, see below) day of week 0-7 (0 or 7 is Sun, or use names) .Ed .Pp A field may be an asterisk (*), which always stands for ``first\-last''. .Pp Ranges of numbers are allowed. Ranges are two numbers separated with a hyphen. The specified range is inclusive. For example, 8-11 for an ``hours'' entry specifies execution at hours 8, 9, 10 and 11. .Pp Lists are allowed. A list is a set of numbers (or ranges) separated by commas. Examples: ``1,2,5,9'', ``0-4,8-12''. .Pp Step values can be used in conjunction with ranges. Following a range with ``/'' specifies skips of the number's value through the range. For example, ``0-23/2'' can be used in the hours field to specify command execution every other hour (the alternative in the V7 standard is ``0,2,4,6,8,10,12,14,16,18,20,22''). Steps are also permitted after an asterisk, so if you want to say ``every two hours'', just use ``*/2''. .Pp Names can also be used for the ``month'' and ``day of week'' fields. Use the first three letters of the particular day or month (case does not matter). Ranges or lists of names are not allowed. .Pp The ``sixth'' field (the rest of the line) specifies the command to be run. The entire command portion of the line, up to a newline or % character, will be executed by .Pa /bin/sh or by the shell specified in the .Ev SHELL variable of the cronfile. Percent-signs (%) in the command, unless escaped with backslash (\\), will be changed into newline characters, and all data after the first % will be sent to the command as standard input. .Pp Note: The day of a command's execution can be specified by two fields \(em day of month, and day of week. If both fields are restricted (ie, are not *), the command will be run when .Em either field matches the current time. For example, ``30 4 1,15 * 5'' would cause a command to be run at 4:30 am on the 1st and 15th of each month, plus every Friday. .Pp Instead of the first five fields, one of eight special strings may appear: .Bd -literal -offset indent string meaning ------ ------- @reboot Run once, at startup of cron. @yearly Run once a year, "0 0 1 1 *". @annually (same as @yearly) @monthly Run once a month, "0 0 1 * *". @weekly Run once a week, "0 0 * * 0". @daily Run once a day, "0 0 * * *". @midnight (same as @daily) @hourly Run once an hour, "0 * * * *". @every_minute Run once a minute, "*/1 * * * *". @every_second Run once a second. .Ed .Sh EXAMPLE CRON FILE .Bd -literal # use /bin/sh to run commands, overriding the default set by cron SHELL=/bin/sh # mail any output to `paul', no matter whose crontab this is MAILTO=paul # # run five minutes after midnight, every day 5 0 * * * $HOME/bin/daily.job >> $HOME/tmp/out 2>&1 # run at 2:15pm on the first of every month -- output mailed to paul 15 14 1 * * $HOME/bin/monthly # run at 10 pm on weekdays, annoy Joe 0 22 * * 1-5 mail -s "It's 10pm" joe%Joe,%%Where are your kids?% 23 0-23/2 * * * echo "run 23 minutes after midn, 2am, 4am ..., everyday" 5 4 * * sun echo "run at 5 after 4 every sunday" .Ed .Sh SEE ALSO .Xr crontab 1 , .Xr cron 8 .Sh EXTENSIONS When specifying day of week, both day 0 and day 7 will be considered Sunday. .Bx and .Tn ATT seem to disagree about this. .Pp Lists and ranges are allowed to co-exist in the same field. "1-3,7-9" would be rejected by .Tn ATT or .Bx cron -- they want to see "1-3" or "7,8,9" ONLY. .Pp Ranges can include "steps", so "1-9/2" is the same as "1,3,5,7,9". .Pp Names of months or days of the week can be specified by name. .Pp Environment variables can be set in the crontab. In .Bx or .Tn ATT , the environment handed to child processes is basically the one from .Pa /etc/rc . .Pp Command output is mailed to the crontab owner .No ( Bx cannot do this), can be mailed to a person other than the crontab owner (SysV cannot do this), or the feature can be turned off and no mail will be sent at all (SysV cannot do this either). .Pp All of the .Sq @ commands that can appear in place of the first five fields are extensions. .Sh AUTHORS .An Paul Vixie Aq Mt paul@vix.com .Sh BUGS If you are in one of the 70-odd countries that observe Daylight Savings Time, jobs scheduled during the rollback or advance may be affected if .Xr cron 8 is not started with the .Fl s flag. In general, it is not a good idea to schedule jobs during this period if .Xr cron 8 is not started with the .Fl s flag, which is enabled by default. See .Xr cron 8 for more details. .Pp For US timezones (except parts of AZ and HI) the time shift occurs at 2AM local time. For others, the output of the .Xr zdump 8 program's verbose .Fl ( v ) option can be used to determine the moment of time shift. Index: projects/release-pkg =================================================================== --- projects/release-pkg (revision 293224) +++ projects/release-pkg (revision 293225) Property changes on: projects/release-pkg ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r293196-293224